summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2020-08-20 21:16:11 +0000
committergVisor bot <gvisor-bot@google.com>2020-08-20 21:16:11 +0000
commit233e1b775fd3188da44f8cbf8449350f21ad7f91 (patch)
tree4de4664371e68acf68a0e9dfb49ae3ea274c371e /pkg/sentry
parent8c844b60d240b3b00a45620044be39e8797294f7 (diff)
parent3163aff866852e730777be4ef689b0405c6332cd (diff)
Merge release-20200810.0-75-g3163aff86 (automated)
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fsimpl/host/connected_endpoint_refs.go112
-rw-r--r--pkg/sentry/fsimpl/host/host.go94
-rw-r--r--pkg/sentry/fsimpl/host/host_state_autogen.go54
-rw-r--r--pkg/sentry/fsimpl/host/inode_refs.go112
-rw-r--r--pkg/sentry/fsimpl/host/socket.go20
-rw-r--r--pkg/sentry/platform/ring0/defs_impl_arm64.go4
6 files changed, 340 insertions, 56 deletions
diff --git a/pkg/sentry/fsimpl/host/connected_endpoint_refs.go b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go
new file mode 100644
index 000000000..3b7bf599e
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/connected_endpoint_refs.go
@@ -0,0 +1,112 @@
+package host
+
+import (
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var ConnectedEndpointownerType *ConnectedEndpoint
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// +stateify savable
+type ConnectedEndpointRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *ConnectedEndpointRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, ConnectedEndpointownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *ConnectedEndpointRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*ConnectedEndpointRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *ConnectedEndpointRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *ConnectedEndpointRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic("Incrementing non-positive ref count")
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *ConnectedEndpointRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *ConnectedEndpointRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic("Decrementing non-positive ref count")
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 56869f59a..2d3821f33 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -27,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/refs"
fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
@@ -41,6 +40,44 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
+func newInode(fs *filesystem, hostFD int, fileType linux.FileMode, isTTY bool) (*inode, error) {
+ // Determine if hostFD is seekable. If not, this syscall will return ESPIPE
+ // (see fs/read_write.c:llseek), e.g. for pipes, sockets, and some character
+ // devices.
+ _, err := unix.Seek(hostFD, 0, linux.SEEK_CUR)
+ seekable := err != syserror.ESPIPE
+
+ i := &inode{
+ hostFD: hostFD,
+ ino: fs.NextIno(),
+ isTTY: isTTY,
+ wouldBlock: wouldBlock(uint32(fileType)),
+ seekable: seekable,
+ // NOTE(b/38213152): Technically, some obscure char devices can be memory
+ // mapped, but we only allow regular files.
+ canMap: fileType == linux.S_IFREG,
+ }
+ i.pf.inode = i
+ i.refs.EnableLeakCheck()
+
+ // Non-seekable files can't be memory mapped, assert this.
+ if !i.seekable && i.canMap {
+ panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped")
+ }
+
+ // If the hostFD would block, we must set it to non-blocking and handle
+ // blocking behavior in the sentry.
+ if i.wouldBlock {
+ if err := syscall.SetNonblock(i.hostFD, true); err != nil {
+ return nil, err
+ }
+ if err := fdnotifier.AddFD(int32(i.hostFD), &i.queue); err != nil {
+ return nil, err
+ }
+ }
+ return i, nil
+}
+
// NewFDOptions contains options to NewFD.
type NewFDOptions struct {
// If IsTTY is true, the file descriptor is a TTY.
@@ -76,44 +113,11 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
flags = uint32(flagsInt)
}
- fileMode := linux.FileMode(s.Mode)
- fileType := fileMode.FileType()
-
- // Determine if hostFD is seekable. If not, this syscall will return ESPIPE
- // (see fs/read_write.c:llseek), e.g. for pipes, sockets, and some character
- // devices.
- _, err := unix.Seek(hostFD, 0, linux.SEEK_CUR)
- seekable := err != syserror.ESPIPE
-
- i := &inode{
- hostFD: hostFD,
- ino: fs.NextIno(),
- isTTY: opts.IsTTY,
- wouldBlock: wouldBlock(uint32(fileType)),
- seekable: seekable,
- // NOTE(b/38213152): Technically, some obscure char devices can be memory
- // mapped, but we only allow regular files.
- canMap: fileType == linux.S_IFREG,
- }
- i.pf.inode = i
-
- // Non-seekable files can't be memory mapped, assert this.
- if !i.seekable && i.canMap {
- panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped")
- }
-
- // If the hostFD would block, we must set it to non-blocking and handle
- // blocking behavior in the sentry.
- if i.wouldBlock {
- if err := syscall.SetNonblock(i.hostFD, true); err != nil {
- return nil, err
- }
- if err := fdnotifier.AddFD(int32(i.hostFD), &i.queue); err != nil {
- return nil, err
- }
- }
-
d := &kernfs.Dentry{}
+ i, err := newInode(fs, hostFD, linux.FileMode(s.Mode).FileType(), opts.IsTTY)
+ if err != nil {
+ return nil, err
+ }
d.Init(i)
// i.open will take a reference on d.
@@ -188,7 +192,7 @@ type inode struct {
locks vfs.FileLocks
// When the reference count reaches zero, the host fd is closed.
- refs.AtomicRefCount
+ refs inodeRefs
// hostFD contains the host fd that this file was originally created from,
// which must be available at time of restore.
@@ -430,9 +434,19 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
return nil
}
+// IncRef implements kernfs.Inode.
+func (i *inode) IncRef() {
+ i.refs.IncRef()
+}
+
+// TryIncRef implements kernfs.Inode.
+func (i *inode) TryIncRef() bool {
+ return i.refs.TryIncRef()
+}
+
// DecRef implements kernfs.Inode.
func (i *inode) DecRef(ctx context.Context) {
- i.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
+ i.refs.DecRef(func() {
if i.wouldBlock {
fdnotifier.RemoveFD(int32(i.hostFD))
}
diff --git a/pkg/sentry/fsimpl/host/host_state_autogen.go b/pkg/sentry/fsimpl/host/host_state_autogen.go
index 17320c417..400320c78 100644
--- a/pkg/sentry/fsimpl/host/host_state_autogen.go
+++ b/pkg/sentry/fsimpl/host/host_state_autogen.go
@@ -6,13 +6,59 @@ import (
"gvisor.dev/gvisor/pkg/state"
)
+func (x *ConnectedEndpointRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/host.ConnectedEndpointRefs"
+}
+
+func (x *ConnectedEndpointRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *ConnectedEndpointRefs) beforeSave() {}
+
+func (x *ConnectedEndpointRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *ConnectedEndpointRefs) afterLoad() {}
+
+func (x *ConnectedEndpointRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
+func (x *inodeRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/host.inodeRefs"
+}
+
+func (x *inodeRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (x *inodeRefs) beforeSave() {}
+
+func (x *inodeRefs) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.refCount)
+}
+
+func (x *inodeRefs) afterLoad() {}
+
+func (x *inodeRefs) StateLoad(m state.Source) {
+ m.Load(0, &x.refCount)
+}
+
func (x *ConnectedEndpoint) StateTypeName() string {
return "pkg/sentry/fsimpl/host.ConnectedEndpoint"
}
func (x *ConnectedEndpoint) StateFields() []string {
return []string{
- "ref",
+ "ConnectedEndpointRefs",
"fd",
"addr",
"stype",
@@ -23,7 +69,7 @@ func (x *ConnectedEndpoint) beforeSave() {}
func (x *ConnectedEndpoint) StateSave(m state.Sink) {
x.beforeSave()
- m.Save(0, &x.ref)
+ m.Save(0, &x.ConnectedEndpointRefs)
m.Save(1, &x.fd)
m.Save(2, &x.addr)
m.Save(3, &x.stype)
@@ -32,12 +78,14 @@ func (x *ConnectedEndpoint) StateSave(m state.Sink) {
func (x *ConnectedEndpoint) afterLoad() {}
func (x *ConnectedEndpoint) StateLoad(m state.Source) {
- m.Load(0, &x.ref)
+ m.Load(0, &x.ConnectedEndpointRefs)
m.Load(1, &x.fd)
m.Load(2, &x.addr)
m.Load(3, &x.stype)
}
func init() {
+ state.Register((*ConnectedEndpointRefs)(nil))
+ state.Register((*inodeRefs)(nil))
state.Register((*ConnectedEndpoint)(nil))
}
diff --git a/pkg/sentry/fsimpl/host/inode_refs.go b/pkg/sentry/fsimpl/host/inode_refs.go
new file mode 100644
index 000000000..55c0fb3a9
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/inode_refs.go
@@ -0,0 +1,112 @@
+package host
+
+import (
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var inodeownerType *inode
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// +stateify savable
+type inodeRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *inodeRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, inodeownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *inodeRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*inodeRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *inodeRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *inodeRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic("Incrementing non-positive ref count")
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *inodeRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *inodeRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic("Decrementing non-positive ref count")
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
index 4979dd0a9..131145b85 100644
--- a/pkg/sentry/fsimpl/host/socket.go
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/socket/control"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/uniqueid"
@@ -59,8 +58,7 @@ func newEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue) (transpor
//
// +stateify savable
type ConnectedEndpoint struct {
- // ref keeps track of references to a ConnectedEndpoint.
- ref refs.AtomicRefCount
+ ConnectedEndpointRefs
// mu protects fd below.
mu sync.RWMutex `state:"nosave"`
@@ -132,9 +130,9 @@ func NewConnectedEndpoint(ctx context.Context, hostFD int, addr string, saveable
return nil, err
}
- // AtomicRefCounters start off with a single reference. We need two.
- e.ref.IncRef()
- e.ref.EnableLeakCheck("host.ConnectedEndpoint")
+ // ConnectedEndpointRefs start off with a single reference. We need two.
+ e.IncRef()
+ e.EnableLeakCheck()
return &e, nil
}
@@ -318,7 +316,7 @@ func (c *ConnectedEndpoint) destroyLocked() {
// Release implements transport.ConnectedEndpoint.Release and
// transport.Receiver.Release.
func (c *ConnectedEndpoint) Release(ctx context.Context) {
- c.ref.DecRefWithDestructor(ctx, func(context.Context) {
+ c.DecRef(func() {
c.mu.Lock()
c.destroyLocked()
c.mu.Unlock()
@@ -348,7 +346,7 @@ func (e *SCMConnectedEndpoint) Init() error {
// Release implements transport.ConnectedEndpoint.Release and
// transport.Receiver.Release.
func (e *SCMConnectedEndpoint) Release(ctx context.Context) {
- e.ref.DecRefWithDestructor(ctx, func(context.Context) {
+ e.DecRef(func() {
e.mu.Lock()
if err := syscall.Close(e.fd); err != nil {
log.Warningf("Failed to close host fd %d: %v", err)
@@ -378,8 +376,8 @@ func NewSCMEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue, addr s
return nil, err
}
- // AtomicRefCounters start off with a single reference. We need two.
- e.ref.IncRef()
- e.ref.EnableLeakCheck("host.SCMConnectedEndpoint")
+ // ConnectedEndpointRefs start off with a single reference. We need two.
+ e.IncRef()
+ e.EnableLeakCheck()
return &e, nil
}
diff --git a/pkg/sentry/platform/ring0/defs_impl_arm64.go b/pkg/sentry/platform/ring0/defs_impl_arm64.go
index eda1e1484..2dac9ad14 100644
--- a/pkg/sentry/platform/ring0/defs_impl_arm64.go
+++ b/pkg/sentry/platform/ring0/defs_impl_arm64.go
@@ -1,13 +1,13 @@
package ring0
import (
- "fmt"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
- "io"
"reflect"
+ "fmt"
"gvisor.dev/gvisor/pkg/usermem"
+ "io"
)
// Useful bits.