diff options
author | Tiwei Bie <tiwei.btw@antgroup.com> | 2020-09-24 14:16:12 +0800 |
---|---|---|
committer | Tiwei Bie <tiwei.btw@antgroup.com> | 2020-09-24 14:16:12 +0800 |
commit | 71f8cab91b2005c9e3ab904e3a2cba99cb031230 (patch) | |
tree | 6e89cb64281c7a027355322a44e96792308436a1 /pkg/sentry | |
parent | 332e1716fc93e3f2ffe6961d3c296503d2079bc8 (diff) |
Fix socket record leak in VFS2
VFS2 socket record is not removed from the system-wide
socket table when the socket is released, which will lead
to a memory leak. This patch fixes this issue.
Fixes: #3874
Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com>
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/kernel/BUILD | 4 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 63 | ||||
-rw-r--r-- | pkg/sentry/socket/hostinet/socket_vfs2.go | 7 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/socket_vfs2.go | 7 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/netstack_vfs2.go | 7 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/BUILD | 16 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/unix.go | 36 | ||||
-rw-r--r-- | pkg/sentry/socket/unix/unix_vfs2.go | 20 |
8 files changed, 124 insertions, 36 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index a868ee7fe..083071b5e 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -69,8 +69,8 @@ go_template_instance( prefix = "socket", template = "//pkg/ilist:generic_list", types = { - "Element": "*SocketRecord", - "Linker": "*SocketRecord", + "Element": "*SocketRecordVFS1", + "Linker": "*SocketRecordVFS1", }, ) diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index ebd2ec3df..d9c62ff91 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -220,10 +220,15 @@ type Kernel struct { // danglingEndpoints is used to save / restore tcpip.DanglingEndpoints. danglingEndpoints struct{} `state:".([]tcpip.Endpoint)"` - // sockets is the list of all network sockets the system. Protected by - // extMu. + // sockets is the list of all network sockets in the system. + // Protected by extMu. + // TODO(gvisor.dev/issue/1624): Only used by VFS1. sockets socketList + // socketsVFS2 records all network sockets in the system. Protected by + // extMu. + socketsVFS2 map[*vfs.FileDescription]*SocketRecord + // nextSocketRecord is the next entry number to use in sockets. Protected // by extMu. nextSocketRecord uint64 @@ -414,6 +419,8 @@ func (k *Kernel) Init(args InitKernelArgs) error { return fmt.Errorf("failed to create sockfs mount: %v", err) } k.socketMount = socketMount + + k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord) } return nil @@ -1509,20 +1516,27 @@ func (k *Kernel) SupervisorContext() context.Context { } } -// SocketRecord represents a socket recorded in Kernel.sockets. It implements -// refs.WeakRefUser for sockets stored in the socket table. +// SocketRecord represents a socket recorded in Kernel.socketsVFS2. // // +stateify savable type SocketRecord struct { - socketEntry k *Kernel - Sock *refs.WeakRef - SockVFS2 *vfs.FileDescription - ID uint64 // Socket table entry number. + Sock *refs.WeakRef // TODO(gvisor.dev/issue/1624): Only used by VFS1. + SockVFS2 *vfs.FileDescription // Only used by VFS2. + ID uint64 // Socket table entry number. +} + +// SocketRecordVFS1 represents a socket recorded in Kernel.sockets. It implements +// refs.WeakRefUser for sockets stored in the socket table. +// +// +stateify savable +type SocketRecordVFS1 struct { + socketEntry + SocketRecord } // WeakRefGone implements refs.WeakRefUser.WeakRefGone. -func (s *SocketRecord) WeakRefGone(context.Context) { +func (s *SocketRecordVFS1) WeakRefGone(context.Context) { s.k.extMu.Lock() s.k.sockets.Remove(s) s.k.extMu.Unlock() @@ -1535,7 +1549,12 @@ func (k *Kernel) RecordSocket(sock *fs.File) { k.extMu.Lock() id := k.nextSocketRecord k.nextSocketRecord++ - s := &SocketRecord{k: k, ID: id} + s := &SocketRecordVFS1{ + SocketRecord: SocketRecord{ + k: k, + ID: id, + }, + } s.Sock = refs.NewWeakRef(sock, s) k.sockets.PushBack(s) k.extMu.Unlock() @@ -1547,9 +1566,12 @@ func (k *Kernel) RecordSocket(sock *fs.File) { // Precondition: Caller must hold a reference to sock. // // Note that the socket table will not hold a reference on the -// vfs.FileDescription, because we do not support weak refs on VFS2 files. +// vfs.FileDescription. func (k *Kernel) RecordSocketVFS2(sock *vfs.FileDescription) { k.extMu.Lock() + if _, ok := k.socketsVFS2[sock]; ok { + panic(fmt.Sprintf("Socket %p added twice", sock)) + } id := k.nextSocketRecord k.nextSocketRecord++ s := &SocketRecord{ @@ -1557,7 +1579,14 @@ func (k *Kernel) RecordSocketVFS2(sock *vfs.FileDescription) { ID: id, SockVFS2: sock, } - k.sockets.PushBack(s) + k.socketsVFS2[sock] = s + k.extMu.Unlock() +} + +// DeleteSocketVFS2 removes a VFS2 socket from the system-wide socket table. +func (k *Kernel) DeleteSocketVFS2(sock *vfs.FileDescription) { + k.extMu.Lock() + delete(k.socketsVFS2, sock) k.extMu.Unlock() } @@ -1568,8 +1597,14 @@ func (k *Kernel) RecordSocketVFS2(sock *vfs.FileDescription) { func (k *Kernel) ListSockets() []*SocketRecord { k.extMu.Lock() var socks []*SocketRecord - for s := k.sockets.Front(); s != nil; s = s.Next() { - socks = append(socks, s) + if VFS2Enabled { + for _, s := range k.socketsVFS2 { + socks = append(socks, s) + } + } else { + for s := k.sockets.Front(); s != nil; s = s.Next() { + socks = append(socks, &s.SocketRecord) + } } k.extMu.Unlock() return socks diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go index 97bc6027f..e5acbac50 100644 --- a/pkg/sentry/socket/hostinet/socket_vfs2.go +++ b/pkg/sentry/socket/hostinet/socket_vfs2.go @@ -77,6 +77,13 @@ func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol in return vfsfd, nil } +// Release implements vfs.FileDescriptionImpl.Release. +func (s *socketVFS2) Release(ctx context.Context) { + t := kernel.TaskFromContext(ctx) + t.Kernel().DeleteSocketVFS2(&s.vfsfd) + s.socketOpsCommon.Release(ctx) +} + // Readiness implements waiter.Waitable.Readiness. func (s *socketVFS2) Readiness(mask waiter.EventMask) waiter.EventMask { return s.socketOpsCommon.Readiness(mask) diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go index a38d25da9..c83b23242 100644 --- a/pkg/sentry/socket/netlink/socket_vfs2.go +++ b/pkg/sentry/socket/netlink/socket_vfs2.go @@ -82,6 +82,13 @@ func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketV return fd, nil } +// Release implements vfs.FileDescriptionImpl.Release. +func (s *SocketVFS2) Release(ctx context.Context) { + t := kernel.TaskFromContext(ctx) + t.Kernel().DeleteSocketVFS2(&s.vfsfd) + s.socketOpsCommon.Release(ctx) +} + // Readiness implements waiter.Waitable.Readiness. func (s *SocketVFS2) Readiness(mask waiter.EventMask) waiter.EventMask { return s.socketOpsCommon.Readiness(mask) diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index c0212ad76..4c6791fff 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -79,6 +79,13 @@ func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, qu return vfsfd, nil } +// Release implements vfs.FileDescriptionImpl.Release. +func (s *SocketVFS2) Release(ctx context.Context) { + t := kernel.TaskFromContext(ctx) + t.Kernel().DeleteSocketVFS2(&s.vfsfd) + s.socketOpsCommon.Release(ctx) +} + // Readiness implements waiter.Waitable.Readiness. func (s *SocketVFS2) Readiness(mask waiter.EventMask) waiter.EventMask { return s.socketOpsCommon.Readiness(mask) diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index a89583dad..cc7408698 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -7,10 +7,21 @@ go_template_instance( name = "socket_refs", out = "socket_refs.go", package = "unix", - prefix = "socketOpsCommon", + prefix = "socketOperations", template = "//pkg/refs_vfs2:refs_template", types = { - "T": "socketOpsCommon", + "T": "SocketOperations", + }, +) + +go_template_instance( + name = "socket_vfs2_refs", + out = "socket_vfs2_refs.go", + package = "unix", + prefix = "socketVFS2", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "SocketVFS2", }, ) @@ -20,6 +31,7 @@ go_library( "device.go", "io.go", "socket_refs.go", + "socket_vfs2_refs.go", "unix.go", "unix_vfs2.go", ], diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 917055cea..f80011ce4 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -55,6 +55,7 @@ type SocketOperations struct { fsutil.FileNoopFlush `state:"nosave"` fsutil.FileUseInodeUnstableAttr `state:"nosave"` + socketOperationsRefs socketOpsCommon } @@ -84,11 +85,27 @@ func NewWithDirent(ctx context.Context, d *fs.Dirent, ep transport.Endpoint, sty return fs.NewFile(ctx, d, flags, &s) } +// DecRef implements RefCounter.DecRef. +func (s *SocketOperations) DecRef(ctx context.Context) { + s.socketOperationsRefs.DecRef(func() { + s.ep.Close(ctx) + if s.abstractNamespace != nil { + s.abstractNamespace.Remove(s.abstractName, s) + } + }) +} + +// Release implemements fs.FileOperations.Release. +func (s *SocketOperations) Release(ctx context.Context) { + // Release only decrements a reference on s because s may be referenced in + // the abstract socket namespace. + s.DecRef(ctx) +} + // socketOpsCommon contains the socket operations common to VFS1 and VFS2. // // +stateify savable type socketOpsCommon struct { - socketOpsCommonRefs socket.SendReceiveTimeout ep transport.Endpoint @@ -101,23 +118,6 @@ type socketOpsCommon struct { abstractNamespace *kernel.AbstractSocketNamespace } -// DecRef implements RefCounter.DecRef. -func (s *socketOpsCommon) DecRef(ctx context.Context) { - s.socketOpsCommonRefs.DecRef(func() { - s.ep.Close(ctx) - if s.abstractNamespace != nil { - s.abstractNamespace.Remove(s.abstractName, s) - } - }) -} - -// Release implemements fs.FileOperations.Release. -func (s *socketOpsCommon) Release(ctx context.Context) { - // Release only decrements a reference on s because s may be referenced in - // the abstract socket namespace. - s.DecRef(ctx) -} - func (s *socketOpsCommon) isPacket() bool { switch s.stype { case linux.SOCK_DGRAM, linux.SOCK_SEQPACKET: diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 3688f22d2..59f6fe44d 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -43,6 +43,7 @@ type SocketVFS2 struct { vfs.DentryMetadataFileDescriptionImpl vfs.LockFD + socketVFS2Refs socketOpsCommon } @@ -88,6 +89,25 @@ func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint3 return vfsfd, nil } +// DecRef implements RefCounter.DecRef. +func (s *SocketVFS2) DecRef(ctx context.Context) { + s.socketVFS2Refs.DecRef(func() { + t := kernel.TaskFromContext(ctx) + t.Kernel().DeleteSocketVFS2(&s.vfsfd) + s.ep.Close(ctx) + if s.abstractNamespace != nil { + s.abstractNamespace.Remove(s.abstractName, s) + } + }) +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (s *SocketVFS2) Release(ctx context.Context) { + // Release only decrements a reference on s because s may be referenced in + // the abstract socket namespace. + s.DecRef(ctx) +} + // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by // a transport.Endpoint. func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { |