diff options
-rw-r--r-- | pkg/tcpip/transport/tcp/accept.go | 33 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/connect.go | 31 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/tcp_test.go | 267 | ||||
-rw-r--r-- | test/syscalls/linux/socket_inet_loopback.cc | 63 |
4 files changed, 219 insertions, 175 deletions
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index a32e20b06..d05259c0a 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -251,7 +251,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head // Perform the 3-way handshake. h := newHandshake(ep, l.rcvWnd) - h.resetToSynRcvd(cookie, irs, opts, l.listenEP) + h.resetToSynRcvd(cookie, irs, opts) if err := h.execute(); err != nil { ep.stack.Stats().TCP.FailedConnectionAttempts.Increment() ep.Close() @@ -294,7 +294,6 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header defer decSynRcvdCount() defer e.decSynRcvdCount() defer s.decRef() - n, err := ctx.createEndpointAndPerformHandshake(s, opts) if err != nil { e.stack.Stats().TCP.FailedConnectionAttempts.Increment() @@ -306,7 +305,7 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header func (e *endpoint) incSynRcvdCount() bool { e.mu.Lock() - if l, c := len(e.acceptedChan), cap(e.acceptedChan); l == c && e.synRcvdCount >= c { + if e.synRcvdCount >= cap(e.acceptedChan) { e.mu.Unlock() return false } @@ -321,6 +320,16 @@ func (e *endpoint) decSynRcvdCount() { e.mu.Unlock() } +func (e *endpoint) acceptQueueIsFull() bool { + e.mu.Lock() + if l, c := len(e.acceptedChan)+e.synRcvdCount, cap(e.acceptedChan); l >= c { + e.mu.Unlock() + return true + } + e.mu.Unlock() + return false +} + // handleListenSegment is called when a listening endpoint receives a segment // and needs to handle it. func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { @@ -328,17 +337,27 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { case header.TCPFlagSyn: opts := parseSynSegmentOptions(s) if incSynRcvdCount() { - // Drop the SYN if the listen endpoint's accept queue is - // overflowing. - if e.incSynRcvdCount() { + // Only handle the syn if the following conditions hold + // - accept queue is not full. + // - number of connections in synRcvd state is less than the + // backlog. + if !e.acceptQueueIsFull() && e.incSynRcvdCount() { s.incRef() go e.handleSynSegment(ctx, s, &opts) // S/R-SAFE: synRcvdCount is the barrier. return } + decSynRcvdCount() e.stack.Stats().TCP.ListenOverflowSynDrop.Increment() e.stack.Stats().DroppedPackets.Increment() return } else { + // If cookies are in use but the endpoint accept queue + // is full then drop the syn. + if e.acceptQueueIsFull() { + e.stack.Stats().TCP.ListenOverflowSynDrop.Increment() + e.stack.Stats().DroppedPackets.Increment() + return + } cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS)) // Send SYN with window scaling because we currently // dont't encode this information in the cookie. @@ -356,7 +375,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { } case header.TCPFlagAck: - if len(e.acceptedChan) == cap(e.acceptedChan) { + if e.acceptQueueIsFull() { // Silently drop the ack as the application can't accept // the connection at this point. The ack will be // retransmitted by the sender anyway and we can diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 0ad7bfb38..dd671f7ce 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -60,12 +60,11 @@ const ( // handshake holds the state used during a TCP 3-way handshake. type handshake struct { - ep *endpoint - listenEP *endpoint // only non nil when doing passive connects. - state handshakeState - active bool - flags uint8 - ackNum seqnum.Value + ep *endpoint + state handshakeState + active bool + flags uint8 + ackNum seqnum.Value // iss is the initial send sequence number, as defined in RFC 793. iss seqnum.Value @@ -142,7 +141,7 @@ func (h *handshake) effectiveRcvWndScale() uint8 { // resetToSynRcvd resets the state of the handshake object to the SYN-RCVD // state. -func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *header.TCPSynOptions, listenEP *endpoint) { +func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *header.TCPSynOptions) { h.active = false h.state = handshakeSynRcvd h.flags = header.TCPFlagSyn | header.TCPFlagAck @@ -150,7 +149,6 @@ func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *hea h.ackNum = irs + 1 h.mss = opts.MSS h.sndWndScale = opts.WS - h.listenEP = listenEP h.ep.mu.Lock() h.ep.state = StateSynRecv h.ep.mu.Unlock() @@ -287,23 +285,6 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { // We have previously received (and acknowledged) the peer's SYN. If the // peer acknowledges our SYN, the handshake is completed. if s.flagIsSet(header.TCPFlagAck) { - // listenContext is also used by a tcp.Forwarder and in that - // context we do not have a listening endpoint to check the - // backlog. So skip this check if listenEP is nil. - if h.listenEP != nil { - h.listenEP.mu.Lock() - if len(h.listenEP.acceptedChan) == cap(h.listenEP.acceptedChan) { - h.listenEP.mu.Unlock() - // If there is no space in the accept queue to accept - // this endpoint then silently drop this ACK. The peer - // will anyway resend the ack and we can complete the - // connection the next time it's retransmitted. - h.ep.stack.Stats().TCP.ListenOverflowAckDrop.Increment() - h.ep.stack.Stats().DroppedPackets.Increment() - return nil - } - h.listenEP.mu.Unlock() - } // If the timestamp option is negotiated and the segment does // not carry a timestamp option then the segment must be dropped // as per https://tools.ietf.org/html/rfc7323#section-3.2. diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 56b490aaa..779ca8b76 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -3405,7 +3405,7 @@ func executeHandshake(t *testing.T, c *context.Context, srcPort uint16, synCooki RcvWnd: 30000, }) - // Receive the SYN-ACK reply. + // Receive the SYN-ACK reply.w b := c.GetPacket() tcp := header.TCP(header.IPv4(b).Payload()) iss = seqnum.Value(tcp.SequenceNumber()) @@ -3469,12 +3469,18 @@ func TestListenBacklogFull(t *testing.T) { time.Sleep(50 * time.Millisecond) - // Now execute one more handshake. This should not be completed and - // delivered on an Accept() call as the backlog is full at this point. - irs, iss := executeHandshake(t, c, context.TestPort+uint16(listenBacklog), false /* synCookieInUse */) + // Now execute send one more SYN. The stack should not respond as the backlog + // is full at this point. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort + 2, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: seqnum.Value(789), + RcvWnd: 30000, + }) + c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond) - time.Sleep(50 * time.Millisecond) - // Try to accept the connection. + // Try to accept the connections in the backlog. we, ch := waiter.NewChannelEntry(nil) c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) @@ -3506,16 +3512,8 @@ func TestListenBacklogFull(t *testing.T) { } } - // Now craft the ACK again and verify that the connection is now ready - // to be accepted. - c.SendPacket(nil, &context.Headers{ - SrcPort: context.TestPort + uint16(listenBacklog), - DstPort: context.StackPort, - Flags: header.TCPFlagAck, - SeqNum: irs + 1, - AckNum: iss + 1, - RcvWnd: 30000, - }) + // Now a new handshake must succeed. + executeHandshake(t, c, context.TestPort+2, false /*synCookieInUse */) newEP, _, err := c.EP.Accept() if err == tcpip.ErrWouldBlock { @@ -3531,6 +3529,7 @@ func TestListenBacklogFull(t *testing.T) { t.Fatalf("Timed out waiting for accept") } } + // Now verify that the TCP socket is usable and in a connected state. data := "Don't panic" newEP.Write(tcpip.SlicePayload(buffer.NewViewFromBytes([]byte(data))), tcpip.WriteOptions{}) @@ -3541,13 +3540,7 @@ func TestListenBacklogFull(t *testing.T) { } } -func TestListenBacklogFullSynCookieInUse(t *testing.T) { - saved := tcp.SynRcvdCountThreshold - defer func() { - tcp.SynRcvdCountThreshold = saved - }() - tcp.SynRcvdCountThreshold = 1 - +func TestListenSynRcvdQueueFull(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() @@ -3566,48 +3559,72 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { // Test acceptance. // Start listening. listenBacklog := 1 - portOffset := uint16(0) if err := c.EP.Listen(listenBacklog); err != nil { t.Fatalf("Listen failed: %v", err) } - executeHandshake(t, c, context.TestPort+portOffset, false) - portOffset++ - // Wait for this to be delivered to the accept queue. - time.Sleep(50 * time.Millisecond) + // Send two SYN's the first one should get a SYN-ACK, the + // second one should not get any response and is dropped as + // the synRcvd count will be equal to backlog. + irs := seqnum.Value(789) + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: seqnum.Value(789), + RcvWnd: 30000, + }) - nonCookieIRS, nonCookieISS := executeHandshake(t, c, context.TestPort+portOffset, false) + // Receive the SYN-ACK reply. + b := c.GetPacket() + tcp := header.TCP(header.IPv4(b).Payload()) + iss := seqnum.Value(tcp.SequenceNumber()) + tcpCheckers := []checker.TransportChecker{ + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn), + checker.AckNum(uint32(irs) + 1), + } + checker.IPv4(t, b, checker.TCP(tcpCheckers...)) - // Since the backlog is full at this point this connection will not - // transition out of handshake and ignore the ACK. - // - // At this point there should be 1 completed connection in the backlog - // and one incomplete one pending for a final ACK and hence not ready to be - // delivered to the endpoint. + // Now execute send one more SYN. The stack should not respond as the backlog + // is full at this point. // - // Now execute one more handshake. This should not be completed and - // delivered on an Accept() call as the backlog is full at this point - // and there is already 1 pending endpoint. - // - // This one should use a SYN cookie as the synRcvdCount is equal to the - // SynRcvdCountThreshold. - time.Sleep(50 * time.Millisecond) - portOffset++ - irs, iss := executeHandshake(t, c, context.TestPort+portOffset, true) + // NOTE: we did not complete the handshake for the previous one so the + // accept backlog should be empty and there should be one connection in + // synRcvd state. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort + 1, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: seqnum.Value(889), + RcvWnd: 30000, + }) + c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond) - time.Sleep(50 * time.Millisecond) + // Now complete the previous connection and verify that there is a connection + // to accept. + // Send ACK. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: irs + 1, + AckNum: iss + 1, + RcvWnd: 30000, + }) - // Verify that there is only one acceptable connection at this point. + // Try to accept the connections in the backlog. we, ch := waiter.NewChannelEntry(nil) c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) - _, _, err = c.EP.Accept() + newEP, _, err := c.EP.Accept() if err == tcpip.ErrWouldBlock { // Wait for connection to be established. select { case <-ch: - _, _, err = c.EP.Accept() + newEP, _, err = c.EP.Accept() if err != nil { t.Fatalf("Accept failed: %v", err) } @@ -3617,27 +3634,68 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { } } - // Now verify that there are no more connections that can be accepted. - _, _, err = c.EP.Accept() - if err != tcpip.ErrWouldBlock { - select { - case <-ch: - t.Fatalf("unexpected endpoint delivered on Accept: %+v", c.EP) - case <-time.After(1 * time.Second): - } + // Now verify that the TCP socket is usable and in a connected state. + data := "Don't panic" + newEP.Write(tcpip.SlicePayload(buffer.NewViewFromBytes([]byte(data))), tcpip.WriteOptions{}) + pkt := c.GetPacket() + tcp = header.TCP(header.IPv4(pkt).Payload()) + if string(tcp.Payload()) != data { + t.Fatalf("Unexpected data: got %v, want %v", string(tcp.Payload()), data) } +} + +func TestListenBacklogFullSynCookieInUse(t *testing.T) { + saved := tcp.SynRcvdCountThreshold + defer func() { + tcp.SynRcvdCountThreshold = saved + }() + tcp.SynRcvdCountThreshold = 1 + + c := context.New(t, defaultMTU) + defer c.Cleanup() - // Now send an ACK for the half completed connection + // Create TCP endpoint. + var err *tcpip.Error + c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) + if err != nil { + t.Fatalf("NewEndpoint failed: %v", err) + } + + // Bind to wildcard. + if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatalf("Bind failed: %v", err) + } + + // Test acceptance. + // Start listening. + listenBacklog := 1 + portOffset := uint16(0) + if err := c.EP.Listen(listenBacklog); err != nil { + t.Fatalf("Listen failed: %v", err) + } + + executeHandshake(t, c, context.TestPort+portOffset, false) + portOffset++ + // Wait for this to be delivered to the accept queue. + time.Sleep(50 * time.Millisecond) + + // Send a SYN request. + irs := seqnum.Value(789) c.SendPacket(nil, &context.Headers{ - SrcPort: context.TestPort + portOffset - 1, + SrcPort: context.TestPort, DstPort: context.StackPort, - Flags: header.TCPFlagAck, - SeqNum: nonCookieIRS + 1, - AckNum: nonCookieISS + 1, + Flags: header.TCPFlagSyn, + SeqNum: irs, RcvWnd: 30000, }) + // The Syn should be dropped as the endpoint's backlog is full. + c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond) + + // Verify that there is only one acceptable connection at this point. + we, ch := waiter.NewChannelEntry(nil) + c.WQ.EventRegister(&we, waiter.EventIn) + defer c.WQ.EventUnregister(&we) - // Verify that the connection is now delivered to the backlog. _, _, err = c.EP.Accept() if err == tcpip.ErrWouldBlock { // Wait for connection to be established. @@ -3653,41 +3711,15 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { } } - // Finally send an ACK for the connection that used a cookie and verify that - // it's also completed and delivered. - c.SendPacket(nil, &context.Headers{ - SrcPort: context.TestPort + portOffset, - DstPort: context.StackPort, - Flags: header.TCPFlagAck, - SeqNum: irs, - AckNum: iss, - RcvWnd: 30000, - }) - - time.Sleep(50 * time.Millisecond) - newEP, _, err := c.EP.Accept() - if err == tcpip.ErrWouldBlock { - // Wait for connection to be established. + // Now verify that there are no more connections that can be accepted. + _, _, err = c.EP.Accept() + if err != tcpip.ErrWouldBlock { select { case <-ch: - newEP, _, err = c.EP.Accept() - if err != nil { - t.Fatalf("Accept failed: %v", err) - } - + t.Fatalf("unexpected endpoint delivered on Accept: %+v", c.EP) case <-time.After(1 * time.Second): - t.Fatalf("Timed out waiting for accept") } } - - // Now verify that the TCP socket is usable and in a connected state. - data := "Don't panic" - newEP.Write(tcpip.SlicePayload(buffer.NewViewFromBytes([]byte(data))), tcpip.WriteOptions{}) - b := c.GetPacket() - tcp := header.TCP(header.IPv4(b).Payload()) - if string(tcp.Payload()) != data { - t.Fatalf("Unexpected data: got %v, want %v", string(tcp.Payload()), data) - } } func TestPassiveConnectionAttemptIncrement(t *testing.T) { @@ -3761,18 +3793,12 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) { } srcPort := uint16(context.TestPort) - // Now attempt 3 handshakes, the first two will fill up the accept and the SYN-RCVD - // queue for the endpoint. + // Now attempt a handshakes it will fill up the accept backlog. executeHandshake(t, c, srcPort, false) // Give time for the final ACK to be processed as otherwise the next handshake could // get accepted before the previous one based on goroutine scheduling. time.Sleep(50 * time.Millisecond) - irs, iss := executeHandshake(t, c, srcPort+1, false) - - // Wait for a short while for the accepted connection to be delivered to - // the channel before trying to send the 3rd SYN. - time.Sleep(40 * time.Millisecond) want := stats.TCP.ListenOverflowSynDrop.Value() + 1 @@ -3810,49 +3836,6 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) { t.Fatalf("Timed out waiting for accept") } } - - // Now complete the next connection in SYN-RCVD state as it should - // have dropped the final ACK to the handshake due to accept queue - // being full. - c.SendPacket(nil, &context.Headers{ - SrcPort: srcPort + 1, - DstPort: context.StackPort, - Flags: header.TCPFlagAck, - SeqNum: irs + 1, - AckNum: iss + 1, - RcvWnd: 30000, - }) - - // Now check that there is one more acceptable connections. - _, _, err = c.EP.Accept() - if err == tcpip.ErrWouldBlock { - // Wait for connection to be established. - select { - case <-ch: - _, _, err = c.EP.Accept() - if err != nil { - t.Fatalf("Accept failed: %v", err) - } - - case <-time.After(1 * time.Second): - t.Fatalf("Timed out waiting for accept") - } - } - - // Try and accept a 3rd one this should fail. - _, _, err = c.EP.Accept() - if err == tcpip.ErrWouldBlock { - // Wait for connection to be established. - select { - case <-ch: - ep, _, err = c.EP.Accept() - if err == nil { - t.Fatalf("Accept succeeded when it should have failed got: %+v", ep) - } - - case <-time.After(1 * time.Second): - } - } } func TestEndpointBindListenAcceptState(t *testing.T) { diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 9b3b70b01..df31d25b5 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -14,6 +14,7 @@ #include <arpa/inet.h> #include <netinet/in.h> +#include <poll.h> #include <string.h> #include <sys/socket.h> @@ -144,6 +145,66 @@ TEST_P(SocketInetLoopbackTest, TCP) { ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds()); } +TEST_P(SocketInetLoopbackTest, TCPbacklog) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), 2), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + int i = 0; + while (1) { + int ret; + + // Connect to the listening socket. + const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ret = connect(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len); + if (ret != 0) { + EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); + struct pollfd pfd = { + .fd = conn_fd.get(), + .events = POLLOUT, + }; + ret = poll(&pfd, 1, 3000); + if (ret == 0) break; + EXPECT_THAT(ret, SyscallSucceedsWithValue(1)); + } + EXPECT_THAT(RetryEINTR(send)(conn_fd.get(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds()); + i++; + } + + for (; i != 0; i--) { + // Accept the connection. + // + // We have to assign a name to the accepted socket, as unamed temporary + // objects are destructed upon full evaluation of the expression it is in, + // potentially causing the connecting socket to fail to shutdown properly. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + } +} + INSTANTIATE_TEST_SUITE_P( All, SocketInetLoopbackTest, ::testing::Values( @@ -198,7 +259,7 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread) { ASSERT_THAT( bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len), SyscallSucceeds()); - ASSERT_THAT(listen(fd, 512), SyscallSucceeds()); + ASSERT_THAT(listen(fd, 40), SyscallSucceeds()); // On the first bind we need to determine which port was bound. if (i != 0) { |