diff options
author | Mithun Iyer <iyerm@google.com> | 2021-06-01 23:34:58 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-06-01 23:37:48 -0700 |
commit | 9357b4f7eb51b78311cb7d6478d5ac3bfcf79948 (patch) | |
tree | 0c8e40228dd220b58c240551e611979fdad00097 | |
parent | 12f4118437584c4a0e4738b9dde3e1885cef3de8 (diff) |
Ensure full shutdown of endpoint on notifyClose
Address a race with non-blocking connect and socket close, causing the
FIN (because of socket close) to not be sent out, even after completing
the handshake.
The race occurs with this sequence:
(1) endpoint Connect starts handshake, sending out SYN
(2) handshake complete() releases endpoint lock, waiting on sleeper.Fetch()
(3) endpoint Close acquires endpoint lock, does not enqueue FIN (as the
endpoint is not yet connected) and asserts notifyClose
(4) SYNACK from peer gets enqueued asserting newSegmentWaker
(5) handshake complete() re-aqcuires lock, first processes newSegmentWaker
event, transitions to ESTABLISHED and proceeds to protocolMainLoop()
(6) protocolMainLoop() exits while processing notifyClose
When the execution follows the above sequence, no FIN is sent to the peer.
This causes the listener side to have a half-open connection sitting in
the accept queue.
Fix this by ensuring that the protocolMainLoop() performs clean shutdown
when the endpoint state is still ESTABLISHED.
This would not be a bug, if during handshake complete(), sleeper.Fetch()
prioritized notificationWaker over newSegmentWaker. In that case, the
handshake would not have completed in (5) above.
Fixes #6067
PiperOrigin-RevId: 376994395
-rw-r--r-- | pkg/tcpip/transport/tcp/connect.go | 14 | ||||
-rw-r--r-- | test/syscalls/linux/socket_inet_loopback.cc | 72 |
2 files changed, 83 insertions, 3 deletions
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 570e5081c..9958547d3 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -1474,11 +1474,19 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ return &tcpip.ErrConnectionReset{} } - if n¬ifyClose != 0 && closeTimer == nil { - if e.EndpointState() == StateFinWait2 && e.closed { + if n¬ifyClose != 0 && e.closed { + switch e.EndpointState() { + case StateEstablished: + // Perform full shutdown if the endpoint is still + // established. This can occur when notifyClose + // was asserted just before becoming established. + e.shutdownLocked(tcpip.ShutdownWrite | tcpip.ShutdownRead) + case StateFinWait2: // The socket has been closed and we are in FIN_WAIT2 // so start the FIN_WAIT2 timer. - closeTimer = e.stack.Clock().AfterFunc(e.tcpLingerTimeout, closeWaker.Assert) + if closeTimer == nil { + closeTimer = e.stack.Clock().AfterFunc(e.tcpLingerTimeout, closeWaker.Assert) + } } } diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 2fc160cdd..a3bdada86 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -692,6 +692,78 @@ TEST_P(SocketInetLoopbackTest, TCPListenShutdownConnectingRead) { }); } +// Test close of a non-blocking connecting socket. +TEST_P(SocketInetLoopbackTest, TCPNonBlockingConnectClose) { + TestParam const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT( + bind(listen_fd.get(), AsSockAddr(&listen_addr), listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), 0), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), AsSockAddr(&listen_addr), &addrlen), + SyscallSucceeds()); + ASSERT_EQ(addrlen, listener.addr_len); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + + // Try many iterations to catch a race with socket close and handshake + // completion. + for (int i = 0; i < 1000; ++i) { + FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + ASSERT_THAT( + connect(client.get(), AsSockAddr(&conn_addr), connector.addr_len), + SyscallFailsWithErrno(EINPROGRESS)); + ASSERT_THAT(close(client.release()), SyscallSucceeds()); + + // Accept any connections and check if they were closed from the peer. Not + // all client connects would result in an acceptable connection as the + // client handshake might never complete if the socket close was processed + // sooner than the non-blocking connect OR the accept queue is full. We are + // only interested in the case where we do have an acceptable completed + // connection. The accept is non-blocking here, which means that at the time + // of listener close (after the loop ends), we could still have a completed + // connection (from connect of any previous iteration) in the accept queue. + // The listener close would clean up the accept queue. + int accepted_fd; + ASSERT_THAT(accepted_fd = accept(listen_fd.get(), nullptr, nullptr), + AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(EWOULDBLOCK))); + if (accepted_fd < 0) { + continue; + } + FileDescriptor accepted(accepted_fd); + struct pollfd pfd = { + .fd = accepted.get(), + .events = POLLIN | POLLRDHUP, + }; + // Use a large timeout to accomodate for retransmitted FINs. + constexpr int kTimeout = 30000; + int n = poll(&pfd, 1, kTimeout); + ASSERT_GE(n, 0) << strerror(errno); + ASSERT_EQ(n, 1); + + if (IsRunningOnGvisor() && GvisorPlatform() != Platform::kFuchsia) { + // TODO(gvisor.dev/issue/6015): Notify POLLRDHUP on incoming FIN. + ASSERT_EQ(pfd.revents, POLLIN); + } else { + ASSERT_EQ(pfd.revents, POLLIN | POLLRDHUP); + } + ASSERT_THAT(close(accepted.release()), SyscallSucceeds()); + } +} + // TODO(b/157236388): Remove once bug is fixed. Test fails w/ // random save as established connections which can't be delivered to the accept // queue because the queue is full are not correctly delivered after restore |