diff options
Diffstat (limited to 'pkg/sentry/socket/unix')
-rw-r--r-- | pkg/sentry/socket/unix/socket_refs.go | 111 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/unix.go | 22 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/unix_state_autogen.go | 36 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/unix_vfs2.go | 6 |
4 files changed, 166 insertions, 9 deletions
diff --git a/pkg/sentry/socket/unix/socket_refs.go b/pkg/sentry/socket/unix/socket_refs.go new file mode 100644 index 000000000..8eb0c9327 --- /dev/null +++ b/pkg/sentry/socket/unix/socket_refs.go @@ -0,0 +1,111 @@ +package unix + +import ( + "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" + "runtime" + "sync/atomic" +) + +// ownerType is used to customize logging. Note that we use a pointer to T so +// that we do not copy the entire object when passed as a format parameter. +var socketOpsCommonownerType *socketOpsCommon + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// Note that the number of references is actually refCount + 1 so that a default +// zero-value Refs object contains one reference. +// +// +stateify savable +type socketOpsCommonRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +func (r *socketOpsCommonRefs) finalize() { + var note string + switch refs_vfs1.GetLeakMode() { + case refs_vfs1.NoLeakChecking: + return + case refs_vfs1.UninitializedLeakChecking: + note = "(Leak checker uninitialized): " + } + if n := r.ReadRefs(); n != 0 { + log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, socketOpsCommonownerType, n) + } +} + +// EnableLeakCheck checks for reference leaks when Refs gets garbage collected. +func (r *socketOpsCommonRefs) EnableLeakCheck() { + if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking { + runtime.SetFinalizer(r, (*socketOpsCommonRefs).finalize) + } +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *socketOpsCommonRefs) ReadRefs() int64 { + + return atomic.LoadInt64(&r.refCount) + 1 +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *socketOpsCommonRefs) IncRef() { + if v := atomic.AddInt64(&r.refCount, 1); v <= 0 { + panic("Incrementing non-positive ref count") + } +} + +// TryIncRef implements refs.RefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *socketOpsCommonRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + v := atomic.AddInt64(&r.refCount, speculativeRef) + if int32(v) < 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + atomic.AddInt64(&r.refCount, -speculativeRef+1) + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *socketOpsCommonRefs) DecRef(destroy func()) { + switch v := atomic.AddInt64(&r.refCount, -1); { + case v < -1: + panic("Decrementing non-positive ref count") + + case v == -1: + + if destroy != nil { + destroy() + } + } +} diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 2b8454edb..b7e8e4325 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -80,7 +79,7 @@ func NewWithDirent(ctx context.Context, d *fs.Dirent, ep transport.Endpoint, sty stype: stype, }, } - s.EnableLeakCheck("unix.SocketOperations") + s.EnableLeakCheck() return fs.NewFile(ctx, d, flags, &s) } @@ -89,17 +88,26 @@ func NewWithDirent(ctx context.Context, d *fs.Dirent, ep transport.Endpoint, sty // // +stateify savable type socketOpsCommon struct { - refs.AtomicRefCount + socketOpsCommonRefs socket.SendReceiveTimeout ep transport.Endpoint stype linux.SockType + + // abstractName and abstractNamespace indicate the name and namespace of the + // socket if it is bound to an abstract socket namespace. Once the socket is + // bound, they cannot be modified. + abstractName string + abstractNamespace *kernel.AbstractSocketNamespace } // DecRef implements RefCounter.DecRef. func (s *socketOpsCommon) DecRef(ctx context.Context) { - s.DecRefWithDestructor(ctx, func(context.Context) { + s.socketOpsCommonRefs.DecRef(func() { s.ep.Close(ctx) + if s.abstractNamespace != nil { + s.abstractNamespace.Remove(s.abstractName, s) + } }) } @@ -284,10 +292,14 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { if t.IsNetworkNamespaced() { return syserr.ErrInvalidEndpointState } - if err := t.AbstractSockets().Bind(t, p[1:], bep, s); err != nil { + asn := t.AbstractSockets() + name := p[1:] + if err := asn.Bind(t, name, bep, s); err != nil { // syserr.ErrPortInUse corresponds to EADDRINUSE. return syserr.ErrPortInUse } + s.abstractName = name + s.abstractNamespace = asn } else { // The parent and name. var d *fs.Dirent diff --git a/pkg/sentry/socket/unix/unix_state_autogen.go b/pkg/sentry/socket/unix/unix_state_autogen.go index 4a3bbc11b..6966529c6 100644 --- a/pkg/sentry/socket/unix/unix_state_autogen.go +++ b/pkg/sentry/socket/unix/unix_state_autogen.go @@ -6,6 +6,29 @@ import ( "gvisor.dev/gvisor/pkg/state" ) +func (x *socketOpsCommonRefs) StateTypeName() string { + return "pkg/sentry/socket/unix.socketOpsCommonRefs" +} + +func (x *socketOpsCommonRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (x *socketOpsCommonRefs) beforeSave() {} + +func (x *socketOpsCommonRefs) StateSave(m state.Sink) { + x.beforeSave() + m.Save(0, &x.refCount) +} + +func (x *socketOpsCommonRefs) afterLoad() {} + +func (x *socketOpsCommonRefs) StateLoad(m state.Source) { + m.Load(0, &x.refCount) +} + func (x *SocketOperations) StateTypeName() string { return "pkg/sentry/socket/unix.SocketOperations" } @@ -35,10 +58,12 @@ func (x *socketOpsCommon) StateTypeName() string { func (x *socketOpsCommon) StateFields() []string { return []string{ - "AtomicRefCount", + "socketOpsCommonRefs", "SendReceiveTimeout", "ep", "stype", + "abstractName", + "abstractNamespace", } } @@ -46,22 +71,27 @@ func (x *socketOpsCommon) beforeSave() {} func (x *socketOpsCommon) StateSave(m state.Sink) { x.beforeSave() - m.Save(0, &x.AtomicRefCount) + m.Save(0, &x.socketOpsCommonRefs) m.Save(1, &x.SendReceiveTimeout) m.Save(2, &x.ep) m.Save(3, &x.stype) + m.Save(4, &x.abstractName) + m.Save(5, &x.abstractNamespace) } func (x *socketOpsCommon) afterLoad() {} func (x *socketOpsCommon) StateLoad(m state.Source) { - m.Load(0, &x.AtomicRefCount) + m.Load(0, &x.socketOpsCommonRefs) m.Load(1, &x.SendReceiveTimeout) m.Load(2, &x.ep) m.Load(3, &x.stype) + m.Load(4, &x.abstractName) + m.Load(5, &x.abstractNamespace) } func init() { + state.Register((*socketOpsCommonRefs)(nil)) state.Register((*SocketOperations)(nil)) state.Register((*socketOpsCommon)(nil)) } diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index dfa25241a..d066ef8ab 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -183,10 +183,14 @@ func (s *SocketVFS2) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { if t.IsNetworkNamespaced() { return syserr.ErrInvalidEndpointState } - if err := t.AbstractSockets().Bind(t, p[1:], bep, s); err != nil { + asn := t.AbstractSockets() + name := p[1:] + if err := asn.Bind(t, name, bep, s); err != nil { // syserr.ErrPortInUse corresponds to EADDRINUSE. return syserr.ErrPortInUse } + s.abstractName = name + s.abstractNamespace = asn } else { path := fspath.Parse(p) root := t.FSContext().RootDirectoryVFS2() |