summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts.go7
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts_state_autogen.go24
-rw-r--r--pkg/sentry/fsimpl/devpts/root_inode_refs.go118
-rw-r--r--pkg/sentry/fsimpl/fuse/fuse_state_autogen.go24
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go7
-rw-r--r--pkg/sentry/fsimpl/fuse/inode_refs.go118
-rw-r--r--pkg/sentry/fsimpl/host/connected_endpoint_refs.go10
-rw-r--r--pkg/sentry/fsimpl/host/inode_refs.go10
-rw-r--r--pkg/sentry/fsimpl/kernfs/dentry_refs.go118
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go27
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go24
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go79
-rw-r--r--pkg/sentry/fsimpl/kernfs/static_directory_refs.go118
-rw-r--r--pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go118
-rw-r--r--pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go118
-rw-r--r--pkg/sentry/fsimpl/proc/proc_state_autogen.go306
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go7
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks_inode_refs.go118
-rw-r--r--pkg/sentry/fsimpl/proc/task.go8
-rw-r--r--pkg/sentry/fsimpl/proc/task_fds.go16
-rw-r--r--pkg/sentry/fsimpl/proc/task_inode_refs.go118
-rw-r--r--pkg/sentry/fsimpl/proc/task_net.go6
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go7
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_inode_refs.go118
-rw-r--r--pkg/sentry/fsimpl/sys/dir_refs.go118
-rw-r--r--pkg/sentry/fsimpl/sys/sys.go9
-rw-r--r--pkg/sentry/fsimpl/sys/sys_state_autogen.go31
-rw-r--r--pkg/sentry/fsimpl/tmpfs/inode_refs.go10
-rw-r--r--pkg/sentry/kernel/fd_table.go21
-rw-r--r--pkg/sentry/kernel/fd_table_refs.go118
-rw-r--r--pkg/sentry/kernel/fd_table_unsafe.go2
-rw-r--r--pkg/sentry/kernel/fs_context.go89
-rw-r--r--pkg/sentry/kernel/fs_context_refs.go118
-rw-r--r--pkg/sentry/kernel/kernel_state_autogen.go114
-rw-r--r--pkg/sentry/kernel/process_group_refs.go118
-rw-r--r--pkg/sentry/kernel/session_refs.go118
-rw-r--r--pkg/sentry/kernel/sessions.go29
-rw-r--r--pkg/sentry/kernel/shm/shm.go19
-rw-r--r--pkg/sentry/kernel/shm/shm_refs.go118
-rw-r--r--pkg/sentry/kernel/shm/shm_state_autogen.go30
-rw-r--r--pkg/sentry/mm/aio_context.go7
-rw-r--r--pkg/sentry/mm/aio_mappable_refs.go118
-rw-r--r--pkg/sentry/mm/mm_state_autogen.go60
-rw-r--r--pkg/sentry/mm/special_mappable.go7
-rw-r--r--pkg/sentry/mm/special_mappable_refs.go118
-rw-r--r--pkg/sentry/platform/ring0/defs_impl_arm64.go2
-rw-r--r--pkg/sentry/socket/unix/socket_refs.go10
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned.go8
-rw-r--r--pkg/sentry/socket/unix/transport/connectionless.go2
-rw-r--r--pkg/sentry/socket/unix/transport/queue.go13
-rw-r--r--pkg/sentry/socket/unix/transport/queue_refs.go118
-rw-r--r--pkg/sentry/socket/unix/transport/transport_state_autogen.go30
-rw-r--r--pkg/sentry/vfs/file_description.go39
-rw-r--r--pkg/sentry/vfs/file_description_refs.go118
-rw-r--r--pkg/sentry/vfs/filesystem.go37
-rw-r--r--pkg/sentry/vfs/filesystem_refs.go118
-rw-r--r--pkg/sentry/vfs/mount.go21
-rw-r--r--pkg/sentry/vfs/mount_namespace_refs.go118
-rw-r--r--pkg/sentry/vfs/vfs_state_autogen.go92
59 files changed, 3365 insertions, 357 deletions
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 3f3a099bd..0eaff9087 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -83,6 +83,7 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds
}
root.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 1, linux.ModeDirectory|0555)
root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ root.EnableLeakCheck()
root.dentry.Init(root)
// Construct the pts master inode and dentry. Linux always uses inode
@@ -110,6 +111,7 @@ func (fs *filesystem) Release(ctx context.Context) {
// rootInode is the root directory inode for the devpts mounts.
type rootInode struct {
+ rootInodeRefs
kernfs.AlwaysValid
kernfs.InodeAttrs
kernfs.InodeDirectoryNoNewChildren
@@ -233,3 +235,8 @@ func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback,
}
return offset, nil
}
+
+// DecRef implements kernfs.Inode.
+func (i *rootInode) DecRef(context.Context) {
+ i.rootInodeRefs.DecRef(i.Destroy)
+}
diff --git a/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go b/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go
index 5942f7bac..adc184d1b 100644
--- a/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go
+++ b/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go
@@ -120,6 +120,29 @@ func (x *queue) StateLoad(m state.Source) {
m.Load(4, &x.transformer)
}
+func (x *rootInodeRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/devpts.rootInodeRefs"
+}
+
+func (x *rootInodeRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *rootInodeRefs) beforeSave() {}
+
+func (x *rootInodeRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *rootInodeRefs) afterLoad() {}
+
+func (x *rootInodeRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *Terminal) StateTypeName() string {
return "pkg/sentry/fsimpl/devpts.Terminal"
}
@@ -157,5 +180,6 @@ func init() {
state.Register((*outputQueueTransformer)(nil))
state.Register((*inputQueueTransformer)(nil))
state.Register((*queue)(nil))
+ state.Register((*rootInodeRefs)(nil))
state.Register((*Terminal)(nil))
}
diff --git a/pkg/sentry/fsimpl/devpts/root_inode_refs.go b/pkg/sentry/fsimpl/devpts/root_inode_refs.go
new file mode 100644
index 000000000..051801202
--- /dev/null
+++ b/pkg/sentry/fsimpl/devpts/root_inode_refs.go
@@ -0,0 +1,118 @@
+package devpts
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var rootInodeownerType *rootInode
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type rootInodeRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *rootInodeRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, rootInodeownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *rootInodeRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*rootInodeRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *rootInodeRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *rootInodeRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, rootInodeownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *rootInodeRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *rootInodeRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, rootInodeownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
index e4ce04322..f72fe342e 100644
--- a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
+++ b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
@@ -99,6 +99,29 @@ func (x *futureResponse) StateLoad(m state.Source) {
m.Load(3, &x.data)
}
+func (x *inodeRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.inodeRefs"
+}
+
+func (x *inodeRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *inodeRefs) beforeSave() {}
+
+func (x *inodeRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *inodeRefs) afterLoad() {}
+
+func (x *inodeRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *requestList) StateTypeName() string {
return "pkg/sentry/fsimpl/fuse.requestList"
}
@@ -155,6 +178,7 @@ func init() {
state.Register((*Request)(nil))
state.Register((*Response)(nil))
state.Register((*futureResponse)(nil))
+ state.Register((*inodeRefs)(nil))
state.Register((*requestList)(nil))
state.Register((*requestEntry)(nil))
}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 44021ee4b..9717c0e15 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -198,6 +198,7 @@ func (fs *filesystem) Release(ctx context.Context) {
// inode implements kernfs.Inode.
type inode struct {
+ inodeRefs
kernfs.InodeAttrs
kernfs.InodeNoDynamicLookup
kernfs.InodeNotSymlink
@@ -213,6 +214,7 @@ func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *ke
i := &inode{}
i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ i.EnableLeakCheck()
i.dentry.Init(i)
return &i.dentry
@@ -324,3 +326,8 @@ func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptio
return statFromFUSEAttr(out.Attr, opts.Mask, fusefs.devMinor), nil
}
+
+// DecRef implements kernfs.Inode.
+func (i *inode) DecRef(context.Context) {
+ i.inodeRefs.DecRef(i.Destroy)
+}
diff --git a/pkg/sentry/fsimpl/fuse/inode_refs.go b/pkg/sentry/fsimpl/fuse/inode_refs.go
new file mode 100644
index 000000000..6b9456e1d
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/inode_refs.go
@@ -0,0 +1,118 @@
+package fuse
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var inodeownerType *inode
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type inodeRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *inodeRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, inodeownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *inodeRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*inodeRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *inodeRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *inodeRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, inodeownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *inodeRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *inodeRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, inodeownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go
index 3b7bf599e..babb3f664 100644
--- a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go
+++ b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go
@@ -1,6 +1,7 @@
package host
import (
+ "fmt"
"runtime"
"sync/atomic"
@@ -18,6 +19,11 @@ var ConnectedEndpointownerType *ConnectedEndpoint
// Note that the number of references is actually refCount + 1 so that a default
// zero-value Refs object contains one reference.
//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
// +stateify savable
type ConnectedEndpointRefs struct {
// refCount is composed of two fields:
@@ -62,7 +68,7 @@ func (r *ConnectedEndpointRefs) ReadRefs() int64 {
//go:nosplit
func (r *ConnectedEndpointRefs) IncRef() {
if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
- panic("Incrementing non-positive ref count")
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, ConnectedEndpointownerType))
}
}
@@ -101,7 +107,7 @@ func (r *ConnectedEndpointRefs) TryIncRef() bool {
func (r *ConnectedEndpointRefs) DecRef(destroy func()) {
switch v := atomic.AddInt64(&r.refCount, -1); {
case v < -1:
- panic("Decrementing non-positive ref count")
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, ConnectedEndpointownerType))
case v == -1:
diff --git a/pkg/sentry/fsimpl/host/inode_refs.go b/pkg/sentry/fsimpl/host/inode_refs.go
index 55c0fb3a9..17f90ce4a 100644
--- a/pkg/sentry/fsimpl/host/inode_refs.go
+++ b/pkg/sentry/fsimpl/host/inode_refs.go
@@ -1,6 +1,7 @@
package host
import (
+ "fmt"
"runtime"
"sync/atomic"
@@ -18,6 +19,11 @@ var inodeownerType *inode
// Note that the number of references is actually refCount + 1 so that a default
// zero-value Refs object contains one reference.
//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
// +stateify savable
type inodeRefs struct {
// refCount is composed of two fields:
@@ -62,7 +68,7 @@ func (r *inodeRefs) ReadRefs() int64 {
//go:nosplit
func (r *inodeRefs) IncRef() {
if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
- panic("Incrementing non-positive ref count")
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, inodeownerType))
}
}
@@ -101,7 +107,7 @@ func (r *inodeRefs) TryIncRef() bool {
func (r *inodeRefs) DecRef(destroy func()) {
switch v := atomic.AddInt64(&r.refCount, -1); {
case v < -1:
- panic("Decrementing non-positive ref count")
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, inodeownerType))
case v == -1:
diff --git a/pkg/sentry/fsimpl/kernfs/dentry_refs.go b/pkg/sentry/fsimpl/kernfs/dentry_refs.go
new file mode 100644
index 000000000..79863b3bc
--- /dev/null
+++ b/pkg/sentry/fsimpl/kernfs/dentry_refs.go
@@ -0,0 +1,118 @@
+package kernfs
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var DentryownerType *Dentry
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type DentryRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *DentryRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, DentryownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *DentryRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*DentryRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *DentryRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *DentryRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, DentryownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *DentryRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *DentryRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, DentryownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 885856868..f442a5606 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -20,7 +20,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -344,8 +343,6 @@ type OrderedChildrenOptions struct {
//
// Must be initialize with Init before first use.
type OrderedChildren struct {
- refs.AtomicRefCount
-
// Can children be modified by user syscalls? It set to false, interface
// methods that would modify the children return EPERM. Immutable.
writable bool
@@ -361,14 +358,14 @@ func (o *OrderedChildren) Init(opts OrderedChildrenOptions) {
o.set = make(map[string]*slot)
}
-// DecRef implements Inode.DecRef.
-func (o *OrderedChildren) DecRef(ctx context.Context) {
- o.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
- o.mu.Lock()
- defer o.mu.Unlock()
- o.order.Reset()
- o.set = nil
- })
+// Destroy clears the children stored in o. It should be called by structs
+// embedding OrderedChildren upon destruction, i.e. when their reference count
+// reaches zero.
+func (o *OrderedChildren) Destroy() {
+ o.mu.Lock()
+ defer o.mu.Unlock()
+ o.order.Reset()
+ o.set = nil
}
// Populate inserts children into this OrderedChildren, and d's dentry
@@ -549,6 +546,7 @@ func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.D
//
// +stateify savable
type StaticDirectory struct {
+ StaticDirectoryRefs
InodeNotSymlink
InodeDirectoryNoNewChildren
InodeAttrs
@@ -594,11 +592,16 @@ func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd
return fd.VFSFileDescription(), nil
}
-// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
+// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
return syserror.EPERM
}
+// DecRef implements kernfs.Inode.
+func (s *StaticDirectory) DecRef(context.Context) {
+ s.StaticDirectoryRefs.DecRef(s.Destroy)
+}
+
// AlwaysValid partially implements kernfs.inodeDynamicLookup.
type AlwaysValid struct{}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 51dbc050c..ca3685800 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -57,7 +57,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -161,9 +160,9 @@ const (
//
// Must be initialized by Init prior to first use.
type Dentry struct {
- vfsd vfs.Dentry
+ DentryRefs
- refs.AtomicRefCount
+ vfsd vfs.Dentry
// flags caches useful information about the dentry from the inode. See the
// dflags* consts above. Must be accessed by atomic ops.
@@ -194,6 +193,7 @@ func (d *Dentry) Init(inode Inode) {
if ftype == linux.ModeSymlink {
d.flags |= dflagsIsSymlink
}
+ d.EnableLeakCheck()
}
// VFSDentry returns the generic vfs dentry for this kernfs dentry.
@@ -213,16 +213,14 @@ func (d *Dentry) isSymlink() bool {
// DecRef implements vfs.DentryImpl.DecRef.
func (d *Dentry) DecRef(ctx context.Context) {
- d.AtomicRefCount.DecRefWithDestructor(ctx, d.destroy)
-}
-
-// Precondition: Dentry must be removed from VFS' dentry cache.
-func (d *Dentry) destroy(ctx context.Context) {
- d.inode.DecRef(ctx) // IncRef from Init.
- d.inode = nil
- if d.parent != nil {
- d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild.
- }
+ // Before the destructor is called, Dentry must be removed from VFS' dentry cache.
+ d.DentryRefs.DecRef(func() {
+ d.inode.DecRef(ctx) // IncRef from Init.
+ d.inode = nil
+ if d.parent != nil {
+ d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild.
+ }
+ })
}
// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go
index f396affb0..12aaf797f 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go
@@ -6,6 +6,29 @@ import (
"gvisor.dev/gvisor/pkg/state"
)
+func (x *DentryRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/kernfs.DentryRefs"
+}
+
+func (x *DentryRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *DentryRefs) beforeSave() {}
+
+func (x *DentryRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *DentryRefs) afterLoad() {}
+
+func (x *DentryRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *DynamicBytesFile) StateTypeName() string {
return "pkg/sentry/fsimpl/kernfs.DynamicBytesFile"
}
@@ -85,6 +108,7 @@ func (x *StaticDirectory) StateTypeName() string {
func (x *StaticDirectory) StateFields() []string {
return []string{
+ "StaticDirectoryRefs",
"InodeNotSymlink",
"InodeDirectoryNoNewChildren",
"InodeAttrs",
@@ -99,25 +123,27 @@ func (x *StaticDirectory) beforeSave() {}
func (x *StaticDirectory) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.InodeNotSymlink)
- m.Save(1, &x.InodeDirectoryNoNewChildren)
- m.Save(2, &x.InodeAttrs)
- m.Save(3, &x.InodeNoDynamicLookup)
- m.Save(4, &x.OrderedChildren)
- m.Save(5, &x.locks)
- m.Save(6, &x.fdOpts)
+ m.Save(0, &x.StaticDirectoryRefs)
+ m.Save(1, &x.InodeNotSymlink)
+ m.Save(2, &x.InodeDirectoryNoNewChildren)
+ m.Save(3, &x.InodeAttrs)
+ m.Save(4, &x.InodeNoDynamicLookup)
+ m.Save(5, &x.OrderedChildren)
+ m.Save(6, &x.locks)
+ m.Save(7, &x.fdOpts)
}
func (x *StaticDirectory) afterLoad() {}
func (x *StaticDirectory) StateLoad(m state.Source) {
- m.Load(0, &x.InodeNotSymlink)
- m.Load(1, &x.InodeDirectoryNoNewChildren)
- m.Load(2, &x.InodeAttrs)
- m.Load(3, &x.InodeNoDynamicLookup)
- m.Load(4, &x.OrderedChildren)
- m.Load(5, &x.locks)
- m.Load(6, &x.fdOpts)
+ m.Load(0, &x.StaticDirectoryRefs)
+ m.Load(1, &x.InodeNotSymlink)
+ m.Load(2, &x.InodeDirectoryNoNewChildren)
+ m.Load(3, &x.InodeAttrs)
+ m.Load(4, &x.InodeNoDynamicLookup)
+ m.Load(5, &x.OrderedChildren)
+ m.Load(6, &x.locks)
+ m.Load(7, &x.fdOpts)
}
func (x *slotList) StateTypeName() string {
@@ -172,10 +198,35 @@ func (x *slotEntry) StateLoad(m state.Source) {
m.Load(1, &x.prev)
}
+func (x *StaticDirectoryRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/kernfs.StaticDirectoryRefs"
+}
+
+func (x *StaticDirectoryRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *StaticDirectoryRefs) beforeSave() {}
+
+func (x *StaticDirectoryRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *StaticDirectoryRefs) afterLoad() {}
+
+func (x *StaticDirectoryRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func init() {
+ state.Register((*DentryRefs)(nil))
state.Register((*DynamicBytesFile)(nil))
state.Register((*DynamicBytesFD)(nil))
state.Register((*StaticDirectory)(nil))
state.Register((*slotList)(nil))
state.Register((*slotEntry)(nil))
+ state.Register((*StaticDirectoryRefs)(nil))
}
diff --git a/pkg/sentry/fsimpl/kernfs/static_directory_refs.go b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go
new file mode 100644
index 000000000..478b04bdd
--- /dev/null
+++ b/pkg/sentry/fsimpl/kernfs/static_directory_refs.go
@@ -0,0 +1,118 @@
+package kernfs
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var StaticDirectoryownerType *StaticDirectory
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type StaticDirectoryRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *StaticDirectoryRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, StaticDirectoryownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *StaticDirectoryRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*StaticDirectoryRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *StaticDirectoryRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *StaticDirectoryRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, StaticDirectoryownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *StaticDirectoryRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *StaticDirectoryRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, StaticDirectoryownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go
new file mode 100644
index 000000000..9431c1506
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/fd_dir_inode_refs.go
@@ -0,0 +1,118 @@
+package proc
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var fdDirInodeownerType *fdDirInode
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type fdDirInodeRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *fdDirInodeRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, fdDirInodeownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *fdDirInodeRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*fdDirInodeRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *fdDirInodeRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *fdDirInodeRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, fdDirInodeownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *fdDirInodeRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *fdDirInodeRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, fdDirInodeownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go
new file mode 100644
index 000000000..872b20eb0
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/fd_info_dir_inode_refs.go
@@ -0,0 +1,118 @@
+package proc
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var fdInfoDirInodeownerType *fdInfoDirInode
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type fdInfoDirInodeRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *fdInfoDirInodeRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, fdInfoDirInodeownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *fdInfoDirInodeRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*fdInfoDirInodeRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *fdInfoDirInodeRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *fdInfoDirInodeRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, fdInfoDirInodeownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *fdInfoDirInodeRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *fdInfoDirInodeRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, fdInfoDirInodeownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/proc/proc_state_autogen.go b/pkg/sentry/fsimpl/proc/proc_state_autogen.go
index a7e43b7a7..907ef38e0 100644
--- a/pkg/sentry/fsimpl/proc/proc_state_autogen.go
+++ b/pkg/sentry/fsimpl/proc/proc_state_autogen.go
@@ -6,6 +6,52 @@ import (
"gvisor.dev/gvisor/pkg/state"
)
+func (x *fdDirInodeRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/proc.fdDirInodeRefs"
+}
+
+func (x *fdDirInodeRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *fdDirInodeRefs) beforeSave() {}
+
+func (x *fdDirInodeRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *fdDirInodeRefs) afterLoad() {}
+
+func (x *fdDirInodeRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
+func (x *fdInfoDirInodeRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/proc.fdInfoDirInodeRefs"
+}
+
+func (x *fdInfoDirInodeRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *fdInfoDirInodeRefs) beforeSave() {}
+
+func (x *fdInfoDirInodeRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *fdInfoDirInodeRefs) afterLoad() {}
+
+func (x *fdInfoDirInodeRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *FilesystemType) StateTypeName() string {
return "pkg/sentry/fsimpl/proc.FilesystemType"
}
@@ -31,6 +77,7 @@ func (x *subtasksInode) StateTypeName() string {
func (x *subtasksInode) StateFields() []string {
return []string{
+ "subtasksInodeRefs",
"InodeNotSymlink",
"InodeDirectoryNoNewChildren",
"InodeAttrs",
@@ -48,31 +95,56 @@ func (x *subtasksInode) beforeSave() {}
func (x *subtasksInode) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.InodeNotSymlink)
- m.Save(1, &x.InodeDirectoryNoNewChildren)
- m.Save(2, &x.InodeAttrs)
- m.Save(3, &x.OrderedChildren)
- m.Save(4, &x.AlwaysValid)
- m.Save(5, &x.locks)
- m.Save(6, &x.fs)
- m.Save(7, &x.task)
- m.Save(8, &x.pidns)
- m.Save(9, &x.cgroupControllers)
+ m.Save(0, &x.subtasksInodeRefs)
+ m.Save(1, &x.InodeNotSymlink)
+ m.Save(2, &x.InodeDirectoryNoNewChildren)
+ m.Save(3, &x.InodeAttrs)
+ m.Save(4, &x.OrderedChildren)
+ m.Save(5, &x.AlwaysValid)
+ m.Save(6, &x.locks)
+ m.Save(7, &x.fs)
+ m.Save(8, &x.task)
+ m.Save(9, &x.pidns)
+ m.Save(10, &x.cgroupControllers)
}
func (x *subtasksInode) afterLoad() {}
func (x *subtasksInode) StateLoad(m state.Source) {
- m.Load(0, &x.InodeNotSymlink)
- m.Load(1, &x.InodeDirectoryNoNewChildren)
- m.Load(2, &x.InodeAttrs)
- m.Load(3, &x.OrderedChildren)
- m.Load(4, &x.AlwaysValid)
- m.Load(5, &x.locks)
- m.Load(6, &x.fs)
- m.Load(7, &x.task)
- m.Load(8, &x.pidns)
- m.Load(9, &x.cgroupControllers)
+ m.Load(0, &x.subtasksInodeRefs)
+ m.Load(1, &x.InodeNotSymlink)
+ m.Load(2, &x.InodeDirectoryNoNewChildren)
+ m.Load(3, &x.InodeAttrs)
+ m.Load(4, &x.OrderedChildren)
+ m.Load(5, &x.AlwaysValid)
+ m.Load(6, &x.locks)
+ m.Load(7, &x.fs)
+ m.Load(8, &x.task)
+ m.Load(9, &x.pidns)
+ m.Load(10, &x.cgroupControllers)
+}
+
+func (x *subtasksInodeRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/proc.subtasksInodeRefs"
+}
+
+func (x *subtasksInodeRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *subtasksInodeRefs) beforeSave() {}
+
+func (x *subtasksInodeRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *subtasksInodeRefs) afterLoad() {}
+
+func (x *subtasksInodeRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
}
func (x *taskInode) StateTypeName() string {
@@ -81,6 +153,7 @@ func (x *taskInode) StateTypeName() string {
func (x *taskInode) StateFields() []string {
return []string{
+ "taskInodeRefs",
"InodeNotSymlink",
"InodeDirectoryNoNewChildren",
"InodeNoDynamicLookup",
@@ -95,25 +168,27 @@ func (x *taskInode) beforeSave() {}
func (x *taskInode) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.InodeNotSymlink)
- m.Save(1, &x.InodeDirectoryNoNewChildren)
- m.Save(2, &x.InodeNoDynamicLookup)
- m.Save(3, &x.InodeAttrs)
- m.Save(4, &x.OrderedChildren)
- m.Save(5, &x.locks)
- m.Save(6, &x.task)
+ m.Save(0, &x.taskInodeRefs)
+ m.Save(1, &x.InodeNotSymlink)
+ m.Save(2, &x.InodeDirectoryNoNewChildren)
+ m.Save(3, &x.InodeNoDynamicLookup)
+ m.Save(4, &x.InodeAttrs)
+ m.Save(5, &x.OrderedChildren)
+ m.Save(6, &x.locks)
+ m.Save(7, &x.task)
}
func (x *taskInode) afterLoad() {}
func (x *taskInode) StateLoad(m state.Source) {
- m.Load(0, &x.InodeNotSymlink)
- m.Load(1, &x.InodeDirectoryNoNewChildren)
- m.Load(2, &x.InodeNoDynamicLookup)
- m.Load(3, &x.InodeAttrs)
- m.Load(4, &x.OrderedChildren)
- m.Load(5, &x.locks)
- m.Load(6, &x.task)
+ m.Load(0, &x.taskInodeRefs)
+ m.Load(1, &x.InodeNotSymlink)
+ m.Load(2, &x.InodeDirectoryNoNewChildren)
+ m.Load(3, &x.InodeNoDynamicLookup)
+ m.Load(4, &x.InodeAttrs)
+ m.Load(5, &x.OrderedChildren)
+ m.Load(6, &x.locks)
+ m.Load(7, &x.task)
}
func (x *fdDirInode) StateTypeName() string {
@@ -122,6 +197,7 @@ func (x *fdDirInode) StateTypeName() string {
func (x *fdDirInode) StateFields() []string {
return []string{
+ "fdDirInodeRefs",
"InodeNotSymlink",
"InodeDirectoryNoNewChildren",
"InodeAttrs",
@@ -135,23 +211,25 @@ func (x *fdDirInode) beforeSave() {}
func (x *fdDirInode) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.InodeNotSymlink)
- m.Save(1, &x.InodeDirectoryNoNewChildren)
- m.Save(2, &x.InodeAttrs)
- m.Save(3, &x.OrderedChildren)
- m.Save(4, &x.AlwaysValid)
- m.Save(5, &x.fdDir)
+ m.Save(0, &x.fdDirInodeRefs)
+ m.Save(1, &x.InodeNotSymlink)
+ m.Save(2, &x.InodeDirectoryNoNewChildren)
+ m.Save(3, &x.InodeAttrs)
+ m.Save(4, &x.OrderedChildren)
+ m.Save(5, &x.AlwaysValid)
+ m.Save(6, &x.fdDir)
}
func (x *fdDirInode) afterLoad() {}
func (x *fdDirInode) StateLoad(m state.Source) {
- m.Load(0, &x.InodeNotSymlink)
- m.Load(1, &x.InodeDirectoryNoNewChildren)
- m.Load(2, &x.InodeAttrs)
- m.Load(3, &x.OrderedChildren)
- m.Load(4, &x.AlwaysValid)
- m.Load(5, &x.fdDir)
+ m.Load(0, &x.fdDirInodeRefs)
+ m.Load(1, &x.InodeNotSymlink)
+ m.Load(2, &x.InodeDirectoryNoNewChildren)
+ m.Load(3, &x.InodeAttrs)
+ m.Load(4, &x.OrderedChildren)
+ m.Load(5, &x.AlwaysValid)
+ m.Load(6, &x.fdDir)
}
func (x *fdSymlink) StateTypeName() string {
@@ -195,6 +273,7 @@ func (x *fdInfoDirInode) StateTypeName() string {
func (x *fdInfoDirInode) StateFields() []string {
return []string{
+ "fdInfoDirInodeRefs",
"InodeNotSymlink",
"InodeDirectoryNoNewChildren",
"InodeAttrs",
@@ -208,23 +287,25 @@ func (x *fdInfoDirInode) beforeSave() {}
func (x *fdInfoDirInode) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.InodeNotSymlink)
- m.Save(1, &x.InodeDirectoryNoNewChildren)
- m.Save(2, &x.InodeAttrs)
- m.Save(3, &x.OrderedChildren)
- m.Save(4, &x.AlwaysValid)
- m.Save(5, &x.fdDir)
+ m.Save(0, &x.fdInfoDirInodeRefs)
+ m.Save(1, &x.InodeNotSymlink)
+ m.Save(2, &x.InodeDirectoryNoNewChildren)
+ m.Save(3, &x.InodeAttrs)
+ m.Save(4, &x.OrderedChildren)
+ m.Save(5, &x.AlwaysValid)
+ m.Save(6, &x.fdDir)
}
func (x *fdInfoDirInode) afterLoad() {}
func (x *fdInfoDirInode) StateLoad(m state.Source) {
- m.Load(0, &x.InodeNotSymlink)
- m.Load(1, &x.InodeDirectoryNoNewChildren)
- m.Load(2, &x.InodeAttrs)
- m.Load(3, &x.OrderedChildren)
- m.Load(4, &x.AlwaysValid)
- m.Load(5, &x.fdDir)
+ m.Load(0, &x.fdInfoDirInodeRefs)
+ m.Load(1, &x.InodeNotSymlink)
+ m.Load(2, &x.InodeDirectoryNoNewChildren)
+ m.Load(3, &x.InodeAttrs)
+ m.Load(4, &x.OrderedChildren)
+ m.Load(5, &x.AlwaysValid)
+ m.Load(6, &x.fdDir)
}
func (x *fdInfoData) StateTypeName() string {
@@ -234,7 +315,6 @@ func (x *fdInfoData) StateTypeName() string {
func (x *fdInfoData) StateFields() []string {
return []string{
"DynamicBytesFile",
- "AtomicRefCount",
"task",
"fd",
}
@@ -245,18 +325,16 @@ func (x *fdInfoData) beforeSave() {}
func (x *fdInfoData) StateSave(m state.Sink) {
x.beforeSave()
m.Save(0, &x.DynamicBytesFile)
- m.Save(1, &x.AtomicRefCount)
- m.Save(2, &x.task)
- m.Save(3, &x.fd)
+ m.Save(1, &x.task)
+ m.Save(2, &x.fd)
}
func (x *fdInfoData) afterLoad() {}
func (x *fdInfoData) StateLoad(m state.Source) {
m.Load(0, &x.DynamicBytesFile)
- m.Load(1, &x.AtomicRefCount)
- m.Load(2, &x.task)
- m.Load(3, &x.fd)
+ m.Load(1, &x.task)
+ m.Load(2, &x.fd)
}
func (x *auxvData) StateTypeName() string {
@@ -670,6 +748,29 @@ func (x *mountsData) StateLoad(m state.Source) {
m.Load(1, &x.task)
}
+func (x *taskInodeRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/proc.taskInodeRefs"
+}
+
+func (x *taskInodeRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *taskInodeRefs) beforeSave() {}
+
+func (x *taskInodeRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *taskInodeRefs) afterLoad() {}
+
+func (x *taskInodeRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *ifinet6) StateTypeName() string {
return "pkg/sentry/fsimpl/proc.ifinet6"
}
@@ -910,6 +1011,7 @@ func (x *tasksInode) StateTypeName() string {
func (x *tasksInode) StateFields() []string {
return []string{
+ "tasksInodeRefs",
"InodeNotSymlink",
"InodeDirectoryNoNewChildren",
"InodeAttrs",
@@ -928,33 +1030,35 @@ func (x *tasksInode) beforeSave() {}
func (x *tasksInode) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.InodeNotSymlink)
- m.Save(1, &x.InodeDirectoryNoNewChildren)
- m.Save(2, &x.InodeAttrs)
- m.Save(3, &x.OrderedChildren)
- m.Save(4, &x.AlwaysValid)
- m.Save(5, &x.locks)
- m.Save(6, &x.fs)
- m.Save(7, &x.pidns)
- m.Save(8, &x.selfSymlink)
- m.Save(9, &x.threadSelfSymlink)
- m.Save(10, &x.cgroupControllers)
+ m.Save(0, &x.tasksInodeRefs)
+ m.Save(1, &x.InodeNotSymlink)
+ m.Save(2, &x.InodeDirectoryNoNewChildren)
+ m.Save(3, &x.InodeAttrs)
+ m.Save(4, &x.OrderedChildren)
+ m.Save(5, &x.AlwaysValid)
+ m.Save(6, &x.locks)
+ m.Save(7, &x.fs)
+ m.Save(8, &x.pidns)
+ m.Save(9, &x.selfSymlink)
+ m.Save(10, &x.threadSelfSymlink)
+ m.Save(11, &x.cgroupControllers)
}
func (x *tasksInode) afterLoad() {}
func (x *tasksInode) StateLoad(m state.Source) {
- m.Load(0, &x.InodeNotSymlink)
- m.Load(1, &x.InodeDirectoryNoNewChildren)
- m.Load(2, &x.InodeAttrs)
- m.Load(3, &x.OrderedChildren)
- m.Load(4, &x.AlwaysValid)
- m.Load(5, &x.locks)
- m.Load(6, &x.fs)
- m.Load(7, &x.pidns)
- m.Load(8, &x.selfSymlink)
- m.Load(9, &x.threadSelfSymlink)
- m.Load(10, &x.cgroupControllers)
+ m.Load(0, &x.tasksInodeRefs)
+ m.Load(1, &x.InodeNotSymlink)
+ m.Load(2, &x.InodeDirectoryNoNewChildren)
+ m.Load(3, &x.InodeAttrs)
+ m.Load(4, &x.OrderedChildren)
+ m.Load(5, &x.AlwaysValid)
+ m.Load(6, &x.locks)
+ m.Load(7, &x.fs)
+ m.Load(8, &x.pidns)
+ m.Load(9, &x.selfSymlink)
+ m.Load(10, &x.threadSelfSymlink)
+ m.Load(11, &x.cgroupControllers)
}
func (x *statData) StateTypeName() string {
@@ -1095,6 +1199,29 @@ func (x *filesystemsData) StateLoad(m state.Source) {
m.Load(0, &x.DynamicBytesFile)
}
+func (x *tasksInodeRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/proc.tasksInodeRefs"
+}
+
+func (x *tasksInodeRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *tasksInodeRefs) beforeSave() {}
+
+func (x *tasksInodeRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *tasksInodeRefs) afterLoad() {}
+
+func (x *tasksInodeRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *mmapMinAddrData) StateTypeName() string {
return "pkg/sentry/fsimpl/proc.mmapMinAddrData"
}
@@ -1229,8 +1356,11 @@ func (x *tcpMemData) StateLoad(m state.Source) {
}
func init() {
+ state.Register((*fdDirInodeRefs)(nil))
+ state.Register((*fdInfoDirInodeRefs)(nil))
state.Register((*FilesystemType)(nil))
state.Register((*subtasksInode)(nil))
+ state.Register((*subtasksInodeRefs)(nil))
state.Register((*taskInode)(nil))
state.Register((*fdDirInode)(nil))
state.Register((*fdSymlink)(nil))
@@ -1251,6 +1381,7 @@ func init() {
state.Register((*exeSymlink)(nil))
state.Register((*mountInfoData)(nil))
state.Register((*mountsData)(nil))
+ state.Register((*taskInodeRefs)(nil))
state.Register((*ifinet6)(nil))
state.Register((*netDevData)(nil))
state.Register((*netUnixData)(nil))
@@ -1267,6 +1398,7 @@ func init() {
state.Register((*uptimeData)(nil))
state.Register((*versionData)(nil))
state.Register((*filesystemsData)(nil))
+ state.Register((*tasksInodeRefs)(nil))
state.Register((*mmapMinAddrData)(nil))
state.Register((*hostnameData)(nil))
state.Register((*tcpSackData)(nil))
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index f25747da3..01c0efb3a 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -31,6 +31,7 @@ import (
//
// +stateify savable
type subtasksInode struct {
+ subtasksInodeRefs
kernfs.InodeNotSymlink
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeAttrs
@@ -57,6 +58,7 @@ func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace,
// Note: credentials are overridden by taskOwnedInode.
subInode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
subInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ subInode.EnableLeakCheck()
inode := &taskOwnedInode{Inode: subInode, owner: task}
dentry := &kernfs.Dentry{}
@@ -182,3 +184,8 @@ func (i *subtasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs
func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
return syserror.EPERM
}
+
+// DecRef implements kernfs.Inode.
+func (i *subtasksInode) DecRef(context.Context) {
+ i.subtasksInodeRefs.DecRef(i.Destroy)
+}
diff --git a/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go
new file mode 100644
index 000000000..c6d9b3522
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/subtasks_inode_refs.go
@@ -0,0 +1,118 @@
+package proc
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var subtasksInodeownerType *subtasksInode
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type subtasksInodeRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *subtasksInodeRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, subtasksInodeownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *subtasksInodeRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*subtasksInodeRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *subtasksInodeRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *subtasksInodeRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, subtasksInodeownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *subtasksInodeRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *subtasksInodeRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, subtasksInodeownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index 109b31b4c..66b557abd 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -32,6 +32,7 @@ import (
//
// +stateify savable
type taskInode struct {
+ taskInodeRefs
kernfs.InodeNotSymlink
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeNoDynamicLookup
@@ -84,6 +85,7 @@ func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace
taskInode := &taskInode{task: task}
// Note: credentials are overridden by taskOwnedInode.
taskInode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
+ taskInode.EnableLeakCheck()
inode := &taskOwnedInode{Inode: taskInode, owner: task}
dentry := &kernfs.Dentry{}
@@ -119,6 +121,11 @@ func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, v
return syserror.EPERM
}
+// DecRef implements kernfs.Inode.
+func (i *taskInode) DecRef(context.Context) {
+ i.taskInodeRefs.DecRef(i.Destroy)
+}
+
// taskOwnedInode implements kernfs.Inode and overrides inode owner with task
// effective user and group.
type taskOwnedInode struct {
@@ -147,6 +154,7 @@ func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.
dir.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, kernfs.GenericDirectoryFDOptions{
SeekEnd: kernfs.SeekEndZero,
})
+ dir.EnableLeakCheck()
inode := &taskOwnedInode{Inode: dir, owner: task}
d := &kernfs.Dentry{}
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index e8fcb9aa1..0527b2de8 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -101,6 +100,7 @@ func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, off
//
// +stateify savable
type fdDirInode struct {
+ fdDirInodeRefs
kernfs.InodeNotSymlink
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeAttrs
@@ -120,6 +120,7 @@ func (fs *filesystem) newFDDirInode(task *kernel.Task) *kernfs.Dentry {
},
}
inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
+ inode.EnableLeakCheck()
dentry := &kernfs.Dentry{}
dentry.Init(inode)
@@ -175,6 +176,11 @@ func (i *fdDirInode) CheckPermissions(ctx context.Context, creds *auth.Credentia
return err
}
+// DecRef implements kernfs.Inode.
+func (i *fdDirInode) DecRef(context.Context) {
+ i.fdDirInodeRefs.DecRef(i.Destroy)
+}
+
// fdSymlink is an symlink for the /proc/[pid]/fd/[fd] file.
//
// +stateify savable
@@ -227,6 +233,7 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen
//
// +stateify savable
type fdInfoDirInode struct {
+ fdInfoDirInodeRefs
kernfs.InodeNotSymlink
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeAttrs
@@ -245,6 +252,7 @@ func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) *kernfs.Dentry {
},
}
inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
+ inode.EnableLeakCheck()
dentry := &kernfs.Dentry{}
dentry.Init(inode)
@@ -282,12 +290,16 @@ func (i *fdInfoDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *
return fd.VFSFileDescription(), nil
}
+// DecRef implements kernfs.Inode.
+func (i *fdInfoDirInode) DecRef(context.Context) {
+ i.fdInfoDirInodeRefs.DecRef(i.Destroy)
+}
+
// fdInfoData implements vfs.DynamicBytesSource for /proc/[pid]/fdinfo/[fd].
//
// +stateify savable
type fdInfoData struct {
kernfs.DynamicBytesFile
- refs.AtomicRefCount
task *kernel.Task
fd int32
diff --git a/pkg/sentry/fsimpl/proc/task_inode_refs.go b/pkg/sentry/fsimpl/proc/task_inode_refs.go
new file mode 100644
index 000000000..714488450
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/task_inode_refs.go
@@ -0,0 +1,118 @@
+package proc
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var taskInodeownerType *taskInode
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type taskInodeRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *taskInodeRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, taskInodeownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *taskInodeRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*taskInodeRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *taskInodeRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *taskInodeRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, taskInodeownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *taskInodeRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *taskInodeRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, taskInodeownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go
index a4c884bf9..4e69782c7 100644
--- a/pkg/sentry/fsimpl/proc/task_net.go
+++ b/pkg/sentry/fsimpl/proc/task_net.go
@@ -262,7 +262,7 @@ func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// For now, we always redact this pointer.
fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %8d",
(*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct.
- s.Refs()-1, // RefCount, don't count our own ref.
+ s.ReadRefs()-1, // RefCount, don't count our own ref.
0, // Protocol, always 0 for UDS.
sockFlags, // Flags.
sops.Endpoint().Type(), // Type.
@@ -430,7 +430,7 @@ func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel,
// Field: refcount. Don't count the ref we obtain while deferencing
// the weakref to this socket.
- fmt.Fprintf(buf, "%d ", s.Refs()-1)
+ fmt.Fprintf(buf, "%d ", s.ReadRefs()-1)
// Field: Socket struct address. Redacted due to the same reason as
// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
@@ -589,7 +589,7 @@ func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// Field: ref; reference count on the socket inode. Don't count the ref
// we obtain while deferencing the weakref to this socket.
- fmt.Fprintf(buf, "%d ", s.Refs()-1)
+ fmt.Fprintf(buf, "%d ", s.ReadRefs()-1)
// Field: Socket struct address. Redacted due to the same reason as
// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 1391992b7..863c4467e 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -37,6 +37,7 @@ const (
//
// +stateify savable
type tasksInode struct {
+ tasksInodeRefs
kernfs.InodeNotSymlink
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeAttrs
@@ -84,6 +85,7 @@ func (fs *filesystem) newTasksInode(k *kernel.Kernel, pidns *kernel.PIDNamespace
cgroupControllers: cgroupControllers,
}
inode.InodeAttrs.Init(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
+ inode.EnableLeakCheck()
dentry := &kernfs.Dentry{}
dentry.Init(inode)
@@ -226,6 +228,11 @@ func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.St
return stat, nil
}
+// DecRef implements kernfs.Inode.
+func (i *tasksInode) DecRef(context.Context) {
+ i.tasksInodeRefs.DecRef(i.Destroy)
+}
+
// staticFileSetStat implements a special static file that allows inode
// attributes to be set. This is to support /proc files that are readonly, but
// allow attributes to be set.
diff --git a/pkg/sentry/fsimpl/proc/tasks_inode_refs.go b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go
new file mode 100644
index 000000000..22d9cc488
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/tasks_inode_refs.go
@@ -0,0 +1,118 @@
+package proc
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var tasksInodeownerType *tasksInode
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type tasksInodeRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *tasksInodeRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, tasksInodeownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *tasksInodeRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*tasksInodeRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *tasksInodeRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *tasksInodeRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, tasksInodeownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *tasksInodeRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *tasksInodeRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, tasksInodeownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/sys/dir_refs.go b/pkg/sentry/fsimpl/sys/dir_refs.go
new file mode 100644
index 000000000..89609b198
--- /dev/null
+++ b/pkg/sentry/fsimpl/sys/dir_refs.go
@@ -0,0 +1,118 @@
+package sys
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var dirownerType *dir
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type dirRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *dirRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, dirownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *dirRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*dirRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *dirRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *dirRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, dirownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *dirRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *dirRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, dirownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index 1f042d9f7..ea30a4ec2 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -118,6 +118,7 @@ func (fs *filesystem) Release(ctx context.Context) {
// dir implements kernfs.Inode.
type dir struct {
+ dirRefs
kernfs.InodeAttrs
kernfs.InodeNoDynamicLookup
kernfs.InodeNotSymlink
@@ -133,6 +134,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte
d := &dir{}
d.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
d.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ d.EnableLeakCheck()
d.dentry.Init(d)
d.IncLinks(d.OrderedChildren.Populate(&d.dentry, contents))
@@ -140,7 +142,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte
return &d.dentry
}
-// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
+// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
return syserror.EPERM
}
@@ -156,6 +158,11 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry,
return fd.VFSFileDescription(), nil
}
+// DecRef implements kernfs.Inode.DecRef.
+func (d *dir) DecRef(context.Context) {
+ d.dirRefs.DecRef(d.Destroy)
+}
+
// cpuFile implements kernfs.Inode.
type cpuFile struct {
kernfs.DynamicBytesFile
diff --git a/pkg/sentry/fsimpl/sys/sys_state_autogen.go b/pkg/sentry/fsimpl/sys/sys_state_autogen.go
index 8866ddff9..347a46318 100644
--- a/pkg/sentry/fsimpl/sys/sys_state_autogen.go
+++ b/pkg/sentry/fsimpl/sys/sys_state_autogen.go
@@ -1,3 +1,34 @@
// automatically generated by stateify.
package sys
+
+import (
+ "gvisor.dev/gvisor/pkg/state"
+)
+
+func (x *dirRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/sys.dirRefs"
+}
+
+func (x *dirRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *dirRefs) beforeSave() {}
+
+func (x *dirRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *dirRefs) afterLoad() {}
+
+func (x *dirRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
+func init() {
+ state.Register((*dirRefs)(nil))
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/inode_refs.go b/pkg/sentry/fsimpl/tmpfs/inode_refs.go
index 8b7ff185f..dbf0b2766 100644
--- a/pkg/sentry/fsimpl/tmpfs/inode_refs.go
+++ b/pkg/sentry/fsimpl/tmpfs/inode_refs.go
@@ -1,6 +1,7 @@
package tmpfs
import (
+ "fmt"
"runtime"
"sync/atomic"
@@ -18,6 +19,11 @@ var inodeownerType *inode
// Note that the number of references is actually refCount + 1 so that a default
// zero-value Refs object contains one reference.
//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
// +stateify savable
type inodeRefs struct {
// refCount is composed of two fields:
@@ -62,7 +68,7 @@ func (r *inodeRefs) ReadRefs() int64 {
//go:nosplit
func (r *inodeRefs) IncRef() {
if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
- panic("Incrementing non-positive ref count")
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, inodeownerType))
}
}
@@ -101,7 +107,7 @@ func (r *inodeRefs) TryIncRef() bool {
func (r *inodeRefs) DecRef(destroy func()) {
switch v := atomic.AddInt64(&r.refCount, -1); {
case v < -1:
- panic("Decrementing non-positive ref count")
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, inodeownerType))
case v == -1:
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index ce53af69b..5773244ac 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -23,7 +23,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/limits"
@@ -78,7 +77,8 @@ type descriptor struct {
//
// +stateify savable
type FDTable struct {
- refs.AtomicRefCount
+ FDTableRefs
+
k *Kernel
// mu protects below.
@@ -176,16 +176,15 @@ func (k *Kernel) NewFDTable() *FDTable {
return f
}
-// destroy removes all of the file descriptors from the map.
-func (f *FDTable) destroy(ctx context.Context) {
- f.RemoveIf(ctx, func(*fs.File, *vfs.FileDescription, FDFlags) bool {
- return true
- })
-}
-
-// DecRef implements RefCounter.DecRef with destructor f.destroy.
+// DecRef implements RefCounter.DecRef.
+//
+// If f reaches zero references, all of its file descriptors are removed.
func (f *FDTable) DecRef(ctx context.Context) {
- f.DecRefWithDestructor(ctx, f.destroy)
+ f.FDTableRefs.DecRef(func() {
+ f.RemoveIf(ctx, func(*fs.File, *vfs.FileDescription, FDFlags) bool {
+ return true
+ })
+ })
}
// Size returns the number of file descriptor slots currently allocated.
diff --git a/pkg/sentry/kernel/fd_table_refs.go b/pkg/sentry/kernel/fd_table_refs.go
new file mode 100644
index 000000000..ecba138ac
--- /dev/null
+++ b/pkg/sentry/kernel/fd_table_refs.go
@@ -0,0 +1,118 @@
+package kernel
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var FDTableownerType *FDTable
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type FDTableRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *FDTableRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, FDTableownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *FDTableRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*FDTableRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *FDTableRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *FDTableRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, FDTableownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *FDTableRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *FDTableRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, FDTableownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go
index 7fd97dc53..6b8feb107 100644
--- a/pkg/sentry/kernel/fd_table_unsafe.go
+++ b/pkg/sentry/kernel/fd_table_unsafe.go
@@ -31,6 +31,8 @@ type descriptorTable struct {
}
// init initializes the table.
+//
+// TODO(gvisor.dev/1486): Enable leak check for FDTable.
func (f *FDTable) init() {
var slice []unsafe.Pointer // Empty slice.
atomic.StorePointer(&f.slice, unsafe.Pointer(&slice))
diff --git a/pkg/sentry/kernel/fs_context.go b/pkg/sentry/kernel/fs_context.go
index 8f2d36d5a..d46d1e1c1 100644
--- a/pkg/sentry/kernel/fs_context.go
+++ b/pkg/sentry/kernel/fs_context.go
@@ -18,7 +18,6 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -30,7 +29,7 @@ import (
//
// +stateify savable
type FSContext struct {
- refs.AtomicRefCount
+ FSContextRefs
// mu protects below.
mu sync.Mutex `state:"nosave"`
@@ -64,7 +63,7 @@ func newFSContext(root, cwd *fs.Dirent, umask uint) *FSContext {
cwd: cwd,
umask: umask,
}
- f.EnableLeakCheck("kernel.FSContext")
+ f.EnableLeakCheck()
return &f
}
@@ -77,54 +76,56 @@ func NewFSContextVFS2(root, cwd vfs.VirtualDentry, umask uint) *FSContext {
cwdVFS2: cwd,
umask: umask,
}
- f.EnableLeakCheck("kernel.FSContext")
+ f.EnableLeakCheck()
return &f
}
-// destroy is the destructor for an FSContext.
+// DecRef implements RefCounter.DecRef.
//
-// This will call DecRef on both root and cwd Dirents. If either call to
-// DecRef returns an error, then it will be propagated. If both calls to
-// DecRef return an error, then the one from root.DecRef will be propagated.
+// When f reaches zero references, DecRef will be called on both root and cwd
+// Dirents.
//
// Note that there may still be calls to WorkingDirectory() or RootDirectory()
// (that return nil). This is because valid references may still be held via
// proc files or other mechanisms.
-func (f *FSContext) destroy(ctx context.Context) {
- // Hold f.mu so that we don't race with RootDirectory() and
- // WorkingDirectory().
- f.mu.Lock()
- defer f.mu.Unlock()
-
- if VFS2Enabled {
- f.rootVFS2.DecRef(ctx)
- f.rootVFS2 = vfs.VirtualDentry{}
- f.cwdVFS2.DecRef(ctx)
- f.cwdVFS2 = vfs.VirtualDentry{}
- } else {
- f.root.DecRef(ctx)
- f.root = nil
- f.cwd.DecRef(ctx)
- f.cwd = nil
- }
-}
-
-// DecRef implements RefCounter.DecRef with destructor f.destroy.
func (f *FSContext) DecRef(ctx context.Context) {
- f.DecRefWithDestructor(ctx, f.destroy)
+ f.FSContextRefs.DecRef(func() {
+ // Hold f.mu so that we don't race with RootDirectory() and
+ // WorkingDirectory().
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ if VFS2Enabled {
+ f.rootVFS2.DecRef(ctx)
+ f.rootVFS2 = vfs.VirtualDentry{}
+ f.cwdVFS2.DecRef(ctx)
+ f.cwdVFS2 = vfs.VirtualDentry{}
+ } else {
+ f.root.DecRef(ctx)
+ f.root = nil
+ f.cwd.DecRef(ctx)
+ f.cwd = nil
+ }
+ })
}
// Fork forks this FSContext.
//
-// This is not a valid call after destroy.
+// This is not a valid call after f is destroyed.
func (f *FSContext) Fork() *FSContext {
f.mu.Lock()
defer f.mu.Unlock()
if VFS2Enabled {
+ if !f.cwdVFS2.Ok() {
+ panic("FSContext.Fork() called after destroy")
+ }
f.cwdVFS2.IncRef()
f.rootVFS2.IncRef()
} else {
+ if f.cwd == nil {
+ panic("FSContext.Fork() called after destroy")
+ }
f.cwd.IncRef()
f.root.IncRef()
}
@@ -140,8 +141,8 @@ func (f *FSContext) Fork() *FSContext {
// WorkingDirectory returns the current working directory.
//
-// This will return nil if called after destroy(), otherwise it will return a
-// Dirent with a reference taken.
+// This will return nil if called after f is destroyed, otherwise it will return
+// a Dirent with a reference taken.
func (f *FSContext) WorkingDirectory() *fs.Dirent {
f.mu.Lock()
defer f.mu.Unlock()
@@ -152,8 +153,8 @@ func (f *FSContext) WorkingDirectory() *fs.Dirent {
// WorkingDirectoryVFS2 returns the current working directory.
//
-// This will return nil if called after destroy(), otherwise it will return a
-// Dirent with a reference taken.
+// This will return nil if called after f is destroyed, otherwise it will return
+// a Dirent with a reference taken.
func (f *FSContext) WorkingDirectoryVFS2() vfs.VirtualDentry {
f.mu.Lock()
defer f.mu.Unlock()
@@ -165,7 +166,7 @@ func (f *FSContext) WorkingDirectoryVFS2() vfs.VirtualDentry {
// SetWorkingDirectory sets the current working directory.
// This will take an extra reference on the Dirent.
//
-// This is not a valid call after destroy.
+// This is not a valid call after f is destroyed.
func (f *FSContext) SetWorkingDirectory(ctx context.Context, d *fs.Dirent) {
if d == nil {
panic("FSContext.SetWorkingDirectory called with nil dirent")
@@ -187,11 +188,15 @@ func (f *FSContext) SetWorkingDirectory(ctx context.Context, d *fs.Dirent) {
// SetWorkingDirectoryVFS2 sets the current working directory.
// This will take an extra reference on the VirtualDentry.
//
-// This is not a valid call after destroy.
+// This is not a valid call after f is destroyed.
func (f *FSContext) SetWorkingDirectoryVFS2(ctx context.Context, d vfs.VirtualDentry) {
f.mu.Lock()
defer f.mu.Unlock()
+ if !f.cwdVFS2.Ok() {
+ panic(fmt.Sprintf("FSContext.SetWorkingDirectoryVFS2(%v)) called after destroy", d))
+ }
+
old := f.cwdVFS2
f.cwdVFS2 = d
d.IncRef()
@@ -200,8 +205,8 @@ func (f *FSContext) SetWorkingDirectoryVFS2(ctx context.Context, d vfs.VirtualDe
// RootDirectory returns the current filesystem root.
//
-// This will return nil if called after destroy(), otherwise it will return a
-// Dirent with a reference taken.
+// This will return nil if called after f is destroyed, otherwise it will return
+// a Dirent with a reference taken.
func (f *FSContext) RootDirectory() *fs.Dirent {
f.mu.Lock()
defer f.mu.Unlock()
@@ -213,8 +218,8 @@ func (f *FSContext) RootDirectory() *fs.Dirent {
// RootDirectoryVFS2 returns the current filesystem root.
//
-// This will return nil if called after destroy(), otherwise it will return a
-// Dirent with a reference taken.
+// This will return nil if called after f is destroyed, otherwise it will return
+// a Dirent with a reference taken.
func (f *FSContext) RootDirectoryVFS2() vfs.VirtualDentry {
f.mu.Lock()
defer f.mu.Unlock()
@@ -226,7 +231,7 @@ func (f *FSContext) RootDirectoryVFS2() vfs.VirtualDentry {
// SetRootDirectory sets the root directory.
// This will take an extra reference on the Dirent.
//
-// This is not a valid call after free.
+// This is not a valid call after f is destroyed.
func (f *FSContext) SetRootDirectory(ctx context.Context, d *fs.Dirent) {
if d == nil {
panic("FSContext.SetRootDirectory called with nil dirent")
@@ -247,7 +252,7 @@ func (f *FSContext) SetRootDirectory(ctx context.Context, d *fs.Dirent) {
// SetRootDirectoryVFS2 sets the root directory. It takes a reference on vd.
//
-// This is not a valid call after free.
+// This is not a valid call after f is destroyed.
func (f *FSContext) SetRootDirectoryVFS2(ctx context.Context, vd vfs.VirtualDentry) {
if !vd.Ok() {
panic("FSContext.SetRootDirectoryVFS2 called with zero-value VirtualDentry")
diff --git a/pkg/sentry/kernel/fs_context_refs.go b/pkg/sentry/kernel/fs_context_refs.go
new file mode 100644
index 000000000..fb2fde971
--- /dev/null
+++ b/pkg/sentry/kernel/fs_context_refs.go
@@ -0,0 +1,118 @@
+package kernel
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var FSContextownerType *FSContext
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type FSContextRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *FSContextRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, FSContextownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *FSContextRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*FSContextRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *FSContextRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *FSContextRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, FSContextownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *FSContextRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *FSContextRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, FSContextownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go
index e514b07d6..d0ff135d7 100644
--- a/pkg/sentry/kernel/kernel_state_autogen.go
+++ b/pkg/sentry/kernel/kernel_state_autogen.go
@@ -122,7 +122,7 @@ func (x *FDTable) StateTypeName() string {
func (x *FDTable) StateFields() []string {
return []string{
- "AtomicRefCount",
+ "FDTableRefs",
"k",
"next",
"used",
@@ -136,7 +136,7 @@ func (x *FDTable) StateSave(m state.Sink) {
x.beforeSave()
var descriptorTable map[int32]descriptor = x.saveDescriptorTable()
m.SaveValue(4, descriptorTable)
- m.Save(0, &x.AtomicRefCount)
+ m.Save(0, &x.FDTableRefs)
m.Save(1, &x.k)
m.Save(2, &x.next)
m.Save(3, &x.used)
@@ -145,20 +145,43 @@ func (x *FDTable) StateSave(m state.Sink) {
func (x *FDTable) afterLoad() {}
func (x *FDTable) StateLoad(m state.Source) {
- m.Load(0, &x.AtomicRefCount)
+ m.Load(0, &x.FDTableRefs)
m.Load(1, &x.k)
m.Load(2, &x.next)
m.Load(3, &x.used)
m.LoadValue(4, new(map[int32]descriptor), func(y interface{}) { x.loadDescriptorTable(y.(map[int32]descriptor)) })
}
+func (x *FDTableRefs) StateTypeName() string {
+ return "pkg/sentry/kernel.FDTableRefs"
+}
+
+func (x *FDTableRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *FDTableRefs) beforeSave() {}
+
+func (x *FDTableRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *FDTableRefs) afterLoad() {}
+
+func (x *FDTableRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *FSContext) StateTypeName() string {
return "pkg/sentry/kernel.FSContext"
}
func (x *FSContext) StateFields() []string {
return []string{
- "AtomicRefCount",
+ "FSContextRefs",
"root",
"rootVFS2",
"cwd",
@@ -171,7 +194,7 @@ func (x *FSContext) beforeSave() {}
func (x *FSContext) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.AtomicRefCount)
+ m.Save(0, &x.FSContextRefs)
m.Save(1, &x.root)
m.Save(2, &x.rootVFS2)
m.Save(3, &x.cwd)
@@ -182,7 +205,7 @@ func (x *FSContext) StateSave(m state.Sink) {
func (x *FSContext) afterLoad() {}
func (x *FSContext) StateLoad(m state.Source) {
- m.Load(0, &x.AtomicRefCount)
+ m.Load(0, &x.FSContextRefs)
m.Load(1, &x.root)
m.Load(2, &x.rootVFS2)
m.Load(3, &x.cwd)
@@ -190,6 +213,29 @@ func (x *FSContext) StateLoad(m state.Source) {
m.Load(5, &x.umask)
}
+func (x *FSContextRefs) StateTypeName() string {
+ return "pkg/sentry/kernel.FSContextRefs"
+}
+
+func (x *FSContextRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *FSContextRefs) beforeSave() {}
+
+func (x *FSContextRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *FSContextRefs) afterLoad() {}
+
+func (x *FSContextRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *IPCNamespace) StateTypeName() string {
return "pkg/sentry/kernel.IPCNamespace"
}
@@ -643,6 +689,29 @@ func (x *processGroupEntry) StateLoad(m state.Source) {
m.Load(1, &x.prev)
}
+func (x *ProcessGroupRefs) StateTypeName() string {
+ return "pkg/sentry/kernel.ProcessGroupRefs"
+}
+
+func (x *ProcessGroupRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *ProcessGroupRefs) beforeSave() {}
+
+func (x *ProcessGroupRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *ProcessGroupRefs) afterLoad() {}
+
+func (x *ProcessGroupRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *ptraceOptions) StateTypeName() string {
return "pkg/sentry/kernel.ptraceOptions"
}
@@ -794,13 +863,36 @@ func (x *sessionEntry) StateLoad(m state.Source) {
m.Load(1, &x.prev)
}
+func (x *SessionRefs) StateTypeName() string {
+ return "pkg/sentry/kernel.SessionRefs"
+}
+
+func (x *SessionRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *SessionRefs) beforeSave() {}
+
+func (x *SessionRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *SessionRefs) afterLoad() {}
+
+func (x *SessionRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *Session) StateTypeName() string {
return "pkg/sentry/kernel.Session"
}
func (x *Session) StateFields() []string {
return []string{
- "refs",
+ "SessionRefs",
"leader",
"id",
"foreground",
@@ -813,7 +905,7 @@ func (x *Session) beforeSave() {}
func (x *Session) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.refs)
+ m.Save(0, &x.SessionRefs)
m.Save(1, &x.leader)
m.Save(2, &x.id)
m.Save(3, &x.foreground)
@@ -824,7 +916,7 @@ func (x *Session) StateSave(m state.Sink) {
func (x *Session) afterLoad() {}
func (x *Session) StateLoad(m state.Source) {
- m.Load(0, &x.refs)
+ m.Load(0, &x.SessionRefs)
m.Load(1, &x.leader)
m.Load(2, &x.id)
m.Load(3, &x.foreground)
@@ -2167,7 +2259,9 @@ func init() {
state.Register((*FDFlags)(nil))
state.Register((*descriptor)(nil))
state.Register((*FDTable)(nil))
+ state.Register((*FDTableRefs)(nil))
state.Register((*FSContext)(nil))
+ state.Register((*FSContextRefs)(nil))
state.Register((*IPCNamespace)(nil))
state.Register((*Kernel)(nil))
state.Register((*SocketEntry)(nil))
@@ -2180,11 +2274,13 @@ func init() {
state.Register((*IntervalTimer)(nil))
state.Register((*processGroupList)(nil))
state.Register((*processGroupEntry)(nil))
+ state.Register((*ProcessGroupRefs)(nil))
state.Register((*ptraceOptions)(nil))
state.Register((*ptraceStop)(nil))
state.Register((*OldRSeqCriticalRegion)(nil))
state.Register((*sessionList)(nil))
state.Register((*sessionEntry)(nil))
+ state.Register((*SessionRefs)(nil))
state.Register((*Session)(nil))
state.Register((*ProcessGroup)(nil))
state.Register((*SignalHandlers)(nil))
diff --git a/pkg/sentry/kernel/process_group_refs.go b/pkg/sentry/kernel/process_group_refs.go
new file mode 100644
index 000000000..4ed6e6458
--- /dev/null
+++ b/pkg/sentry/kernel/process_group_refs.go
@@ -0,0 +1,118 @@
+package kernel
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var ProcessGroupownerType *ProcessGroup
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type ProcessGroupRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *ProcessGroupRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, ProcessGroupownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *ProcessGroupRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*ProcessGroupRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *ProcessGroupRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *ProcessGroupRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, ProcessGroupownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *ProcessGroupRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *ProcessGroupRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, ProcessGroupownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/kernel/session_refs.go b/pkg/sentry/kernel/session_refs.go
new file mode 100644
index 000000000..f2e1bb797
--- /dev/null
+++ b/pkg/sentry/kernel/session_refs.go
@@ -0,0 +1,118 @@
+package kernel
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var SessionownerType *Session
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type SessionRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *SessionRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, SessionownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *SessionRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*SessionRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *SessionRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *SessionRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, SessionownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *SessionRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *SessionRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, SessionownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index 5c4c622c2..df5c8421b 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -16,8 +16,6 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -32,7 +30,7 @@ type ProcessGroupID ThreadID
//
// +stateify savable
type Session struct {
- refs refs.AtomicRefCount
+ SessionRefs
// leader is the originator of the Session.
//
@@ -62,16 +60,11 @@ type Session struct {
sessionEntry
}
-// incRef grabs a reference.
-func (s *Session) incRef() {
- s.refs.IncRef()
-}
-
-// decRef drops a reference.
+// DecRef drops a reference.
//
// Precondition: callers must hold TaskSet.mu for writing.
-func (s *Session) decRef() {
- s.refs.DecRefWithDestructor(nil, func(context.Context) {
+func (s *Session) DecRef() {
+ s.SessionRefs.DecRef(func() {
// Remove translations from the leader.
for ns := s.leader.pidns; ns != nil; ns = ns.parent {
id := ns.sids[s]
@@ -88,7 +81,7 @@ func (s *Session) decRef() {
//
// +stateify savable
type ProcessGroup struct {
- refs refs.AtomicRefCount // not exported.
+ refs ProcessGroupRefs
// originator is the originator of the group.
//
@@ -163,7 +156,7 @@ func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) {
}
alive := true
- pg.refs.DecRefWithDestructor(nil, func(context.Context) {
+ pg.refs.DecRef(func() {
alive = false // don't bother with handleOrphan.
// Remove translations from the originator.
@@ -175,7 +168,7 @@ func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) {
// Remove the list of process groups.
pg.session.processGroups.Remove(pg)
- pg.session.decRef()
+ pg.session.DecRef()
})
if alive {
pg.handleOrphan()
@@ -302,7 +295,7 @@ func (tg *ThreadGroup) createSession() error {
id: SessionID(id),
leader: tg,
}
- s.refs.EnableLeakCheck("kernel.Session")
+ s.EnableLeakCheck()
// Create a new ProcessGroup, belonging to that Session.
// This also has a single reference (assigned below).
@@ -316,7 +309,7 @@ func (tg *ThreadGroup) createSession() error {
session: s,
ancestors: 0,
}
- pg.refs.EnableLeakCheck("kernel.ProcessGroup")
+ pg.refs.EnableLeakCheck()
// Tie them and return the result.
s.processGroups.PushBack(pg)
@@ -396,13 +389,13 @@ func (tg *ThreadGroup) CreateProcessGroup() error {
//
// We manually adjust the ancestors if the parent is in the same
// session.
- tg.processGroup.session.incRef()
+ tg.processGroup.session.IncRef()
pg := ProcessGroup{
id: ProcessGroupID(id),
originator: tg,
session: tg.processGroup.session,
}
- pg.refs.EnableLeakCheck("kernel.ProcessGroup")
+ pg.refs.EnableLeakCheck()
if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session {
pg.ancestors++
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index 13ec7afe0..00c03585e 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -39,7 +39,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
@@ -252,7 +251,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi
creatorPID: pid,
changeTime: ktime.NowFromContext(ctx),
}
- shm.EnableLeakCheck("kernel.Shm")
+ shm.EnableLeakCheck()
// Find the next available ID.
for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
@@ -337,14 +336,14 @@ func (r *Registry) remove(s *Shm) {
//
// +stateify savable
type Shm struct {
- // AtomicRefCount tracks the number of references to this segment.
+ // ShmRefs tracks the number of references to this segment.
//
// A segment holds a reference to itself until it is marked for
// destruction.
//
// In addition to direct users, the MemoryManager will hold references
// via MappingIdentity.
- refs.AtomicRefCount
+ ShmRefs
mfp pgalloc.MemoryFileProvider
@@ -428,11 +427,14 @@ func (s *Shm) InodeID() uint64 {
return uint64(s.ID)
}
-// DecRef overrides refs.RefCount.DecRef with a destructor.
+// DecRef drops a reference on s.
//
// Precondition: Caller must not hold s.mu.
func (s *Shm) DecRef(ctx context.Context) {
- s.DecRefWithDestructor(ctx, s.destroy)
+ s.ShmRefs.DecRef(func() {
+ s.mfp.MemoryFile().DecRef(s.fr)
+ s.registry.remove(s)
+ })
}
// Msync implements memmap.MappingIdentity.Msync. Msync is a no-op for shm
@@ -642,11 +644,6 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error {
return nil
}
-func (s *Shm) destroy(context.Context) {
- s.mfp.MemoryFile().DecRef(s.fr)
- s.registry.remove(s)
-}
-
// MarkDestroyed marks a segment for destruction. The segment is actually
// destroyed once it has no references. MarkDestroyed may be called multiple
// times, and is safe to call after a segment has already been destroyed. See
diff --git a/pkg/sentry/kernel/shm/shm_refs.go b/pkg/sentry/kernel/shm/shm_refs.go
new file mode 100644
index 000000000..51e07d0b3
--- /dev/null
+++ b/pkg/sentry/kernel/shm/shm_refs.go
@@ -0,0 +1,118 @@
+package shm
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var ShmownerType *Shm
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type ShmRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *ShmRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, ShmownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *ShmRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*ShmRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *ShmRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *ShmRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, ShmownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *ShmRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *ShmRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, ShmownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/kernel/shm/shm_state_autogen.go b/pkg/sentry/kernel/shm/shm_state_autogen.go
index c98632087..fa80353b6 100644
--- a/pkg/sentry/kernel/shm/shm_state_autogen.go
+++ b/pkg/sentry/kernel/shm/shm_state_autogen.go
@@ -47,7 +47,7 @@ func (x *Shm) StateTypeName() string {
func (x *Shm) StateFields() []string {
return []string{
- "AtomicRefCount",
+ "ShmRefs",
"mfp",
"registry",
"ID",
@@ -71,7 +71,7 @@ func (x *Shm) beforeSave() {}
func (x *Shm) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.AtomicRefCount)
+ m.Save(0, &x.ShmRefs)
m.Save(1, &x.mfp)
m.Save(2, &x.registry)
m.Save(3, &x.ID)
@@ -93,7 +93,7 @@ func (x *Shm) StateSave(m state.Sink) {
func (x *Shm) afterLoad() {}
func (x *Shm) StateLoad(m state.Source) {
- m.Load(0, &x.AtomicRefCount)
+ m.Load(0, &x.ShmRefs)
m.Load(1, &x.mfp)
m.Load(2, &x.registry)
m.Load(3, &x.ID)
@@ -112,7 +112,31 @@ func (x *Shm) StateLoad(m state.Source) {
m.Load(16, &x.pendingDestruction)
}
+func (x *ShmRefs) StateTypeName() string {
+ return "pkg/sentry/kernel/shm.ShmRefs"
+}
+
+func (x *ShmRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *ShmRefs) beforeSave() {}
+
+func (x *ShmRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *ShmRefs) afterLoad() {}
+
+func (x *ShmRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func init() {
state.Register((*Registry)(nil))
state.Register((*Shm)(nil))
+ state.Register((*ShmRefs)(nil))
}
diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go
index 16fea53c4..7bf48cb2c 100644
--- a/pkg/sentry/mm/aio_context.go
+++ b/pkg/sentry/mm/aio_context.go
@@ -17,7 +17,6 @@ package mm
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/usage"
@@ -239,7 +238,7 @@ func (ctx *AIOContext) Drain() {
//
// +stateify savable
type aioMappable struct {
- refs.AtomicRefCount
+ aioMappableRefs
mfp pgalloc.MemoryFileProvider
fr memmap.FileRange
@@ -253,13 +252,13 @@ func newAIOMappable(mfp pgalloc.MemoryFileProvider) (*aioMappable, error) {
return nil, err
}
m := aioMappable{mfp: mfp, fr: fr}
- m.EnableLeakCheck("mm.aioMappable")
+ m.EnableLeakCheck()
return &m, nil
}
// DecRef implements refs.RefCounter.DecRef.
func (m *aioMappable) DecRef(ctx context.Context) {
- m.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
+ m.aioMappableRefs.DecRef(func() {
m.mfp.MemoryFile().DecRef(m.fr)
})
}
diff --git a/pkg/sentry/mm/aio_mappable_refs.go b/pkg/sentry/mm/aio_mappable_refs.go
new file mode 100644
index 000000000..b99909f07
--- /dev/null
+++ b/pkg/sentry/mm/aio_mappable_refs.go
@@ -0,0 +1,118 @@
+package mm
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var aioMappableownerType *aioMappable
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type aioMappableRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *aioMappableRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, aioMappableownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *aioMappableRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*aioMappableRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *aioMappableRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *aioMappableRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, aioMappableownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *aioMappableRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *aioMappableRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, aioMappableownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/mm/mm_state_autogen.go b/pkg/sentry/mm/mm_state_autogen.go
index 2f81a8240..8ab51450c 100644
--- a/pkg/sentry/mm/mm_state_autogen.go
+++ b/pkg/sentry/mm/mm_state_autogen.go
@@ -92,7 +92,7 @@ func (x *aioMappable) StateTypeName() string {
func (x *aioMappable) StateFields() []string {
return []string{
- "AtomicRefCount",
+ "aioMappableRefs",
"mfp",
"fr",
}
@@ -102,7 +102,7 @@ func (x *aioMappable) beforeSave() {}
func (x *aioMappable) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.AtomicRefCount)
+ m.Save(0, &x.aioMappableRefs)
m.Save(1, &x.mfp)
m.Save(2, &x.fr)
}
@@ -110,11 +110,34 @@ func (x *aioMappable) StateSave(m state.Sink) {
func (x *aioMappable) afterLoad() {}
func (x *aioMappable) StateLoad(m state.Source) {
- m.Load(0, &x.AtomicRefCount)
+ m.Load(0, &x.aioMappableRefs)
m.Load(1, &x.mfp)
m.Load(2, &x.fr)
}
+func (x *aioMappableRefs) StateTypeName() string {
+ return "pkg/sentry/mm.aioMappableRefs"
+}
+
+func (x *aioMappableRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *aioMappableRefs) beforeSave() {}
+
+func (x *aioMappableRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *aioMappableRefs) afterLoad() {}
+
+func (x *aioMappableRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *fileRefcountSet) StateTypeName() string {
return "pkg/sentry/mm.fileRefcountSet"
}
@@ -565,7 +588,7 @@ func (x *SpecialMappable) StateTypeName() string {
func (x *SpecialMappable) StateFields() []string {
return []string{
- "AtomicRefCount",
+ "SpecialMappableRefs",
"mfp",
"fr",
"name",
@@ -576,7 +599,7 @@ func (x *SpecialMappable) beforeSave() {}
func (x *SpecialMappable) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.AtomicRefCount)
+ m.Save(0, &x.SpecialMappableRefs)
m.Save(1, &x.mfp)
m.Save(2, &x.fr)
m.Save(3, &x.name)
@@ -585,12 +608,35 @@ func (x *SpecialMappable) StateSave(m state.Sink) {
func (x *SpecialMappable) afterLoad() {}
func (x *SpecialMappable) StateLoad(m state.Source) {
- m.Load(0, &x.AtomicRefCount)
+ m.Load(0, &x.SpecialMappableRefs)
m.Load(1, &x.mfp)
m.Load(2, &x.fr)
m.Load(3, &x.name)
}
+func (x *SpecialMappableRefs) StateTypeName() string {
+ return "pkg/sentry/mm.SpecialMappableRefs"
+}
+
+func (x *SpecialMappableRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *SpecialMappableRefs) beforeSave() {}
+
+func (x *SpecialMappableRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *SpecialMappableRefs) afterLoad() {}
+
+func (x *SpecialMappableRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *vmaSet) StateTypeName() string {
return "pkg/sentry/mm.vmaSet"
}
@@ -693,6 +739,7 @@ func init() {
state.Register((*ioResult)(nil))
state.Register((*AIOContext)(nil))
state.Register((*aioMappable)(nil))
+ state.Register((*aioMappableRefs)(nil))
state.Register((*fileRefcountSet)(nil))
state.Register((*fileRefcountnode)(nil))
state.Register((*fileRefcountSegmentDataSlices)(nil))
@@ -706,6 +753,7 @@ func init() {
state.Register((*pmanode)(nil))
state.Register((*pmaSegmentDataSlices)(nil))
state.Register((*SpecialMappable)(nil))
+ state.Register((*SpecialMappableRefs)(nil))
state.Register((*vmaSet)(nil))
state.Register((*vmanode)(nil))
state.Register((*vmaSegmentDataSlices)(nil))
diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go
index 4cdb52eb6..f4c93baeb 100644
--- a/pkg/sentry/mm/special_mappable.go
+++ b/pkg/sentry/mm/special_mappable.go
@@ -16,7 +16,6 @@ package mm
import (
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/usage"
@@ -31,7 +30,7 @@ import (
//
// +stateify savable
type SpecialMappable struct {
- refs.AtomicRefCount
+ SpecialMappableRefs
mfp pgalloc.MemoryFileProvider
fr memmap.FileRange
@@ -45,13 +44,13 @@ type SpecialMappable struct {
// Preconditions: fr.Length() != 0.
func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *SpecialMappable {
m := SpecialMappable{mfp: mfp, fr: fr, name: name}
- m.EnableLeakCheck("mm.SpecialMappable")
+ m.EnableLeakCheck()
return &m
}
// DecRef implements refs.RefCounter.DecRef.
func (m *SpecialMappable) DecRef(ctx context.Context) {
- m.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
+ m.SpecialMappableRefs.DecRef(func() {
m.mfp.MemoryFile().DecRef(m.fr)
})
}
diff --git a/pkg/sentry/mm/special_mappable_refs.go b/pkg/sentry/mm/special_mappable_refs.go
new file mode 100644
index 000000000..035bbe690
--- /dev/null
+++ b/pkg/sentry/mm/special_mappable_refs.go
@@ -0,0 +1,118 @@
+package mm
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var SpecialMappableownerType *SpecialMappable
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type SpecialMappableRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *SpecialMappableRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, SpecialMappableownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *SpecialMappableRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*SpecialMappableRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *SpecialMappableRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *SpecialMappableRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, SpecialMappableownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *SpecialMappableRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *SpecialMappableRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, SpecialMappableownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/platform/ring0/defs_impl_arm64.go b/pkg/sentry/platform/ring0/defs_impl_arm64.go
index 2dac9ad14..424b66f76 100644
--- a/pkg/sentry/platform/ring0/defs_impl_arm64.go
+++ b/pkg/sentry/platform/ring0/defs_impl_arm64.go
@@ -3,11 +3,11 @@ package ring0
import (
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
- "reflect"
"fmt"
"gvisor.dev/gvisor/pkg/usermem"
"io"
+ "reflect"
)
// Useful bits.
diff --git a/pkg/sentry/socket/unix/socket_refs.go b/pkg/sentry/socket/unix/socket_refs.go
index 4c6ec186b..dababb85f 100644
--- a/pkg/sentry/socket/unix/socket_refs.go
+++ b/pkg/sentry/socket/unix/socket_refs.go
@@ -1,6 +1,7 @@
package unix
import (
+ "fmt"
"runtime"
"sync/atomic"
@@ -18,6 +19,11 @@ var socketOpsCommonownerType *socketOpsCommon
// Note that the number of references is actually refCount + 1 so that a default
// zero-value Refs object contains one reference.
//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
// +stateify savable
type socketOpsCommonRefs struct {
// refCount is composed of two fields:
@@ -62,7 +68,7 @@ func (r *socketOpsCommonRefs) ReadRefs() int64 {
//go:nosplit
func (r *socketOpsCommonRefs) IncRef() {
if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
- panic("Incrementing non-positive ref count")
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, socketOpsCommonownerType))
}
}
@@ -101,7 +107,7 @@ func (r *socketOpsCommonRefs) TryIncRef() bool {
func (r *socketOpsCommonRefs) DecRef(destroy func()) {
switch v := atomic.AddInt64(&r.refCount, -1); {
case v < -1:
- panic("Decrementing non-positive ref count")
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, socketOpsCommonownerType))
case v == -1:
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index c67b602f0..e3a75b519 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -142,9 +142,9 @@ func NewPair(ctx context.Context, stype linux.SockType, uid UniqueIDProvider) (E
}
q1 := &queue{ReaderQueue: a.Queue, WriterQueue: b.Queue, limit: initialLimit}
- q1.EnableLeakCheck("transport.queue")
+ q1.EnableLeakCheck()
q2 := &queue{ReaderQueue: b.Queue, WriterQueue: a.Queue, limit: initialLimit}
- q2.EnableLeakCheck("transport.queue")
+ q2.EnableLeakCheck()
if stype == linux.SOCK_STREAM {
a.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{q1}}
@@ -300,14 +300,14 @@ func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce Conn
}
readQueue := &queue{ReaderQueue: ce.WaiterQueue(), WriterQueue: ne.Queue, limit: initialLimit}
- readQueue.EnableLeakCheck("transport.queue")
+ readQueue.EnableLeakCheck()
ne.connected = &connectedEndpoint{
endpoint: ce,
writeQueue: readQueue,
}
writeQueue := &queue{ReaderQueue: ne.Queue, WriterQueue: ce.WaiterQueue(), limit: initialLimit}
- writeQueue.EnableLeakCheck("transport.queue")
+ writeQueue.EnableLeakCheck()
if e.stype == linux.SOCK_STREAM {
ne.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{readQueue: writeQueue}}
} else {
diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go
index 70ee8f9b8..4751b2fd8 100644
--- a/pkg/sentry/socket/unix/transport/connectionless.go
+++ b/pkg/sentry/socket/unix/transport/connectionless.go
@@ -42,7 +42,7 @@ var (
func NewConnectionless(ctx context.Context) Endpoint {
ep := &connectionlessEndpoint{baseEndpoint{Queue: &waiter.Queue{}}}
q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: initialLimit}
- q.EnableLeakCheck("transport.queue")
+ q.EnableLeakCheck()
ep.receiver = &queueReceiver{readQueue: &q}
return ep
}
diff --git a/pkg/sentry/socket/unix/transport/queue.go b/pkg/sentry/socket/unix/transport/queue.go
index ef6043e19..342def28f 100644
--- a/pkg/sentry/socket/unix/transport/queue.go
+++ b/pkg/sentry/socket/unix/transport/queue.go
@@ -16,7 +16,6 @@ package transport
import (
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -28,7 +27,7 @@ import (
//
// +stateify savable
type queue struct {
- refs.AtomicRefCount
+ queueRefs
ReaderQueue *waiter.Queue
WriterQueue *waiter.Queue
@@ -68,11 +67,13 @@ func (q *queue) Reset(ctx context.Context) {
q.mu.Unlock()
}
-// DecRef implements RefCounter.DecRef with destructor q.Reset.
+// DecRef implements RefCounter.DecRef.
func (q *queue) DecRef(ctx context.Context) {
- q.DecRefWithDestructor(ctx, q.Reset)
- // We don't need to notify after resetting because no one cares about
- // this queue after all references have been dropped.
+ q.queueRefs.DecRef(func() {
+ // We don't need to notify after resetting because no one cares about
+ // this queue after all references have been dropped.
+ q.Reset(ctx)
+ })
}
// IsReadable determines if q is currently readable.
diff --git a/pkg/sentry/socket/unix/transport/queue_refs.go b/pkg/sentry/socket/unix/transport/queue_refs.go
new file mode 100644
index 000000000..0d4e34988
--- /dev/null
+++ b/pkg/sentry/socket/unix/transport/queue_refs.go
@@ -0,0 +1,118 @@
+package transport
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var queueownerType *queue
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type queueRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *queueRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, queueownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *queueRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*queueRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *queueRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *queueRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, queueownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *queueRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *queueRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, queueownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/socket/unix/transport/transport_state_autogen.go b/pkg/sentry/socket/unix/transport/transport_state_autogen.go
index 4b88ea3ae..91e632833 100644
--- a/pkg/sentry/socket/unix/transport/transport_state_autogen.go
+++ b/pkg/sentry/socket/unix/transport/transport_state_autogen.go
@@ -71,7 +71,7 @@ func (x *queue) StateTypeName() string {
func (x *queue) StateFields() []string {
return []string{
- "AtomicRefCount",
+ "queueRefs",
"ReaderQueue",
"WriterQueue",
"closed",
@@ -86,7 +86,7 @@ func (x *queue) beforeSave() {}
func (x *queue) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.AtomicRefCount)
+ m.Save(0, &x.queueRefs)
m.Save(1, &x.ReaderQueue)
m.Save(2, &x.WriterQueue)
m.Save(3, &x.closed)
@@ -99,7 +99,7 @@ func (x *queue) StateSave(m state.Sink) {
func (x *queue) afterLoad() {}
func (x *queue) StateLoad(m state.Source) {
- m.Load(0, &x.AtomicRefCount)
+ m.Load(0, &x.queueRefs)
m.Load(1, &x.ReaderQueue)
m.Load(2, &x.WriterQueue)
m.Load(3, &x.closed)
@@ -109,6 +109,29 @@ func (x *queue) StateLoad(m state.Source) {
m.Load(7, &x.dataList)
}
+func (x *queueRefs) StateTypeName() string {
+ return "pkg/sentry/socket/unix/transport.queueRefs"
+}
+
+func (x *queueRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *queueRefs) beforeSave() {}
+
+func (x *queueRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *queueRefs) afterLoad() {}
+
+func (x *queueRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *messageList) StateTypeName() string {
return "pkg/sentry/socket/unix/transport.messageList"
}
@@ -339,6 +362,7 @@ func init() {
state.Register((*connectionedEndpoint)(nil))
state.Register((*connectionlessEndpoint)(nil))
state.Register((*queue)(nil))
+ state.Register((*queueRefs)(nil))
state.Register((*messageList)(nil))
state.Register((*messageEntry)(nil))
state.Register((*ControlMessages)(nil))
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 3219a9e13..22a54fa48 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -38,9 +38,7 @@ import (
//
// FileDescription is analogous to Linux's struct file.
type FileDescription struct {
- // refs is the reference count. refs is accessed using atomic memory
- // operations.
- refs int64
+ FileDescriptionRefs
// flagsMu protects statusFlags and asyncHandler below.
flagsMu sync.Mutex
@@ -131,7 +129,7 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou
}
}
- fd.refs = 1
+ fd.EnableLeakCheck()
// Remove "file creation flags" to mirror the behavior from file.f_flags in
// fs/open.c:do_dentry_open.
@@ -149,30 +147,9 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou
return nil
}
-// IncRef increments fd's reference count.
-func (fd *FileDescription) IncRef() {
- atomic.AddInt64(&fd.refs, 1)
-}
-
-// TryIncRef increments fd's reference count and returns true. If fd's
-// reference count is already zero, TryIncRef does nothing and returns false.
-//
-// TryIncRef does not require that a reference is held on fd.
-func (fd *FileDescription) TryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&fd.refs)
- if refs <= 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&fd.refs, refs, refs+1) {
- return true
- }
- }
-}
-
// DecRef decrements fd's reference count.
func (fd *FileDescription) DecRef(ctx context.Context) {
- if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 {
+ fd.FileDescriptionRefs.DecRef(func() {
// Unregister fd from all epoll instances.
fd.epollMu.Lock()
epolls := fd.epolls
@@ -208,15 +185,7 @@ func (fd *FileDescription) DecRef(ctx context.Context) {
}
fd.asyncHandler = nil
fd.flagsMu.Unlock()
- } else if refs < 0 {
- panic("FileDescription.DecRef() called without holding a reference")
- }
-}
-
-// Refs returns the current number of references. The returned count
-// is inherently racy and is unsafe to use without external synchronization.
-func (fd *FileDescription) Refs() int64 {
- return atomic.LoadInt64(&fd.refs)
+ })
}
// Mount returns the mount on which fd was opened. It does not take a reference
diff --git a/pkg/sentry/vfs/file_description_refs.go b/pkg/sentry/vfs/file_description_refs.go
new file mode 100644
index 000000000..bdd7e6554
--- /dev/null
+++ b/pkg/sentry/vfs/file_description_refs.go
@@ -0,0 +1,118 @@
+package vfs
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var FileDescriptionownerType *FileDescription
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type FileDescriptionRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *FileDescriptionRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, FileDescriptionownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *FileDescriptionRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*FileDescriptionRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *FileDescriptionRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *FileDescriptionRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, FileDescriptionownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *FileDescriptionRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *FileDescriptionRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, FileDescriptionownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go
index 2c60cfab2..46851f638 100644
--- a/pkg/sentry/vfs/filesystem.go
+++ b/pkg/sentry/vfs/filesystem.go
@@ -15,8 +15,6 @@
package vfs
import (
- "sync/atomic"
-
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
@@ -34,9 +32,7 @@ import (
//
// +stateify savable
type Filesystem struct {
- // refs is the reference count. refs is accessed using atomic memory
- // operations.
- refs int64
+ FilesystemRefs
// vfs is the VirtualFilesystem that uses this Filesystem. vfs is
// immutable.
@@ -52,7 +48,7 @@ type Filesystem struct {
// Init must be called before first use of fs.
func (fs *Filesystem) Init(vfsObj *VirtualFilesystem, fsType FilesystemType, impl FilesystemImpl) {
- fs.refs = 1
+ fs.EnableLeakCheck()
fs.vfs = vfsObj
fs.fsType = fsType
fs.impl = impl
@@ -76,39 +72,14 @@ func (fs *Filesystem) Impl() FilesystemImpl {
return fs.impl
}
-// IncRef increments fs' reference count.
-func (fs *Filesystem) IncRef() {
- if atomic.AddInt64(&fs.refs, 1) <= 1 {
- panic("Filesystem.IncRef() called without holding a reference")
- }
-}
-
-// TryIncRef increments fs' reference count and returns true. If fs' reference
-// count is zero, TryIncRef does nothing and returns false.
-//
-// TryIncRef does not require that a reference is held on fs.
-func (fs *Filesystem) TryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&fs.refs)
- if refs <= 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&fs.refs, refs, refs+1) {
- return true
- }
- }
-}
-
// DecRef decrements fs' reference count.
func (fs *Filesystem) DecRef(ctx context.Context) {
- if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 {
+ fs.FilesystemRefs.DecRef(func() {
fs.vfs.filesystemsMu.Lock()
delete(fs.vfs.filesystems, fs)
fs.vfs.filesystemsMu.Unlock()
fs.impl.Release(ctx)
- } else if refs < 0 {
- panic("Filesystem.decRef() called without holding a reference")
- }
+ })
}
// FilesystemImpl contains implementation details for a Filesystem.
diff --git a/pkg/sentry/vfs/filesystem_refs.go b/pkg/sentry/vfs/filesystem_refs.go
new file mode 100644
index 000000000..38a9a986f
--- /dev/null
+++ b/pkg/sentry/vfs/filesystem_refs.go
@@ -0,0 +1,118 @@
+package vfs
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var FilesystemownerType *Filesystem
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type FilesystemRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *FilesystemRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, FilesystemownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *FilesystemRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*FilesystemRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *FilesystemRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *FilesystemRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, FilesystemownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *FilesystemRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *FilesystemRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, FilesystemownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index cd5456eef..db5fb3bb1 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -128,16 +128,14 @@ func (mnt *Mount) Options() MountOptions {
//
// +stateify savable
type MountNamespace struct {
+ MountNamespaceRefs
+
// Owner is the usernamespace that owns this mount namespace.
Owner *auth.UserNamespace
// root is the MountNamespace's root mount. root is immutable.
root *Mount
- // refs is the reference count. refs is accessed using atomic memory
- // operations.
- refs int64
-
// mountpoints maps all Dentries which are mount points in this namespace
// to the number of Mounts for which they are mount points. mountpoints is
// protected by VirtualFilesystem.mountMu.
@@ -168,9 +166,9 @@ func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth
}
mntns := &MountNamespace{
Owner: creds.UserNamespace,
- refs: 1,
mountpoints: make(map[*Dentry]uint32),
}
+ mntns.EnableLeakCheck()
mntns.root = newMount(vfs, fs, root, mntns, &MountOptions{})
return mntns, nil
}
@@ -509,17 +507,10 @@ func (mnt *Mount) DecRef(ctx context.Context) {
}
}
-// IncRef increments mntns' reference count.
-func (mntns *MountNamespace) IncRef() {
- if atomic.AddInt64(&mntns.refs, 1) <= 1 {
- panic("MountNamespace.IncRef() called without holding a reference")
- }
-}
-
// DecRef decrements mntns' reference count.
func (mntns *MountNamespace) DecRef(ctx context.Context) {
vfs := mntns.root.fs.VirtualFilesystem()
- if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 {
+ mntns.MountNamespaceRefs.DecRef(func() {
vfs.mountMu.Lock()
vfs.mounts.seq.BeginWrite()
vdsToDecRef, mountsToDecRef := vfs.umountRecursiveLocked(mntns.root, &umountRecursiveOptions{
@@ -533,9 +524,7 @@ func (mntns *MountNamespace) DecRef(ctx context.Context) {
for _, mnt := range mountsToDecRef {
mnt.DecRef(ctx)
}
- } else if refs < 0 {
- panic("MountNamespace.DecRef() called without holding a reference")
- }
+ })
}
// getMountAt returns the last Mount in the stack mounted at (mnt, d). It takes
diff --git a/pkg/sentry/vfs/mount_namespace_refs.go b/pkg/sentry/vfs/mount_namespace_refs.go
new file mode 100644
index 000000000..63285fb8e
--- /dev/null
+++ b/pkg/sentry/vfs/mount_namespace_refs.go
@@ -0,0 +1,118 @@
+package vfs
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var MountNamespaceownerType *MountNamespace
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type MountNamespaceRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *MountNamespaceRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, MountNamespaceownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *MountNamespaceRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*MountNamespaceRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *MountNamespaceRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *MountNamespaceRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, MountNamespaceownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *MountNamespaceRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *MountNamespaceRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, MountNamespaceownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/vfs/vfs_state_autogen.go b/pkg/sentry/vfs/vfs_state_autogen.go
index 7bd988336..be649f60a 100644
--- a/pkg/sentry/vfs/vfs_state_autogen.go
+++ b/pkg/sentry/vfs/vfs_state_autogen.go
@@ -188,13 +188,36 @@ func (x *eventEntry) StateLoad(m state.Source) {
m.Load(1, &x.prev)
}
+func (x *FileDescriptionRefs) StateTypeName() string {
+ return "pkg/sentry/vfs.FileDescriptionRefs"
+}
+
+func (x *FileDescriptionRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *FileDescriptionRefs) beforeSave() {}
+
+func (x *FileDescriptionRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *FileDescriptionRefs) afterLoad() {}
+
+func (x *FileDescriptionRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *Filesystem) StateTypeName() string {
return "pkg/sentry/vfs.Filesystem"
}
func (x *Filesystem) StateFields() []string {
return []string{
- "refs",
+ "FilesystemRefs",
"vfs",
"fsType",
"impl",
@@ -205,7 +228,7 @@ func (x *Filesystem) beforeSave() {}
func (x *Filesystem) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.refs)
+ m.Save(0, &x.FilesystemRefs)
m.Save(1, &x.vfs)
m.Save(2, &x.fsType)
m.Save(3, &x.impl)
@@ -214,12 +237,35 @@ func (x *Filesystem) StateSave(m state.Sink) {
func (x *Filesystem) afterLoad() {}
func (x *Filesystem) StateLoad(m state.Source) {
- m.Load(0, &x.refs)
+ m.Load(0, &x.FilesystemRefs)
m.Load(1, &x.vfs)
m.Load(2, &x.fsType)
m.Load(3, &x.impl)
}
+func (x *FilesystemRefs) StateTypeName() string {
+ return "pkg/sentry/vfs.FilesystemRefs"
+}
+
+func (x *FilesystemRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *FilesystemRefs) beforeSave() {}
+
+func (x *FilesystemRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *FilesystemRefs) afterLoad() {}
+
+func (x *FilesystemRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *registeredFilesystemType) StateTypeName() string {
return "pkg/sentry/vfs.registeredFilesystemType"
}
@@ -448,9 +494,9 @@ func (x *MountNamespace) StateTypeName() string {
func (x *MountNamespace) StateFields() []string {
return []string{
+ "MountNamespaceRefs",
"Owner",
"root",
- "refs",
"mountpoints",
}
}
@@ -459,21 +505,44 @@ func (x *MountNamespace) beforeSave() {}
func (x *MountNamespace) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.Owner)
- m.Save(1, &x.root)
- m.Save(2, &x.refs)
+ m.Save(0, &x.MountNamespaceRefs)
+ m.Save(1, &x.Owner)
+ m.Save(2, &x.root)
m.Save(3, &x.mountpoints)
}
func (x *MountNamespace) afterLoad() {}
func (x *MountNamespace) StateLoad(m state.Source) {
- m.Load(0, &x.Owner)
- m.Load(1, &x.root)
- m.Load(2, &x.refs)
+ m.Load(0, &x.MountNamespaceRefs)
+ m.Load(1, &x.Owner)
+ m.Load(2, &x.root)
m.Load(3, &x.mountpoints)
}
+func (x *MountNamespaceRefs) StateTypeName() string {
+ return "pkg/sentry/vfs.MountNamespaceRefs"
+}
+
+func (x *MountNamespaceRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *MountNamespaceRefs) beforeSave() {}
+
+func (x *MountNamespaceRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *MountNamespaceRefs) afterLoad() {}
+
+func (x *MountNamespaceRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *VirtualFilesystem) StateTypeName() string {
return "pkg/sentry/vfs.VirtualFilesystem"
}
@@ -555,7 +624,9 @@ func init() {
state.Register((*epollInterestEntry)(nil))
state.Register((*eventList)(nil))
state.Register((*eventEntry)(nil))
+ state.Register((*FileDescriptionRefs)(nil))
state.Register((*Filesystem)(nil))
+ state.Register((*FilesystemRefs)(nil))
state.Register((*registeredFilesystemType)(nil))
state.Register((*Inotify)(nil))
state.Register((*Watches)(nil))
@@ -563,6 +634,7 @@ func init() {
state.Register((*Event)(nil))
state.Register((*Mount)(nil))
state.Register((*MountNamespace)(nil))
+ state.Register((*MountNamespaceRefs)(nil))
state.Register((*VirtualFilesystem)(nil))
state.Register((*VirtualDentry)(nil))
}