From 3933dd5c04c56512eccb38657bb735f375db4de4 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Mon, 10 Jun 2019 15:39:35 -0700 Subject: Fixes to listen backlog handling. Changes netstack to confirm to current linux behaviour where if the backlog is full then we drop the SYN and do not send a SYN-ACK. Similarly we allow upto backlog connections to be in SYN-RCVD state as long as the backlog is not full. We also now drop a SYN if syn cookies are in use and the backlog for the listening endpoint is full. Added new tests to confirm the behaviour. Also reverted the change to increase the backlog in TcpPortReuseMultiThread syscall test. Fixes #236 PiperOrigin-RevId: 252500462 --- pkg/tcpip/transport/tcp/accept.go | 33 ++++- pkg/tcpip/transport/tcp/connect.go | 31 +---- pkg/tcpip/transport/tcp/tcp_test.go | 267 +++++++++++++++++------------------- 3 files changed, 157 insertions(+), 174 deletions(-) (limited to 'pkg/tcpip/transport') diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index a32e20b06..d05259c0a 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -251,7 +251,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head // Perform the 3-way handshake. h := newHandshake(ep, l.rcvWnd) - h.resetToSynRcvd(cookie, irs, opts, l.listenEP) + h.resetToSynRcvd(cookie, irs, opts) if err := h.execute(); err != nil { ep.stack.Stats().TCP.FailedConnectionAttempts.Increment() ep.Close() @@ -294,7 +294,6 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header defer decSynRcvdCount() defer e.decSynRcvdCount() defer s.decRef() - n, err := ctx.createEndpointAndPerformHandshake(s, opts) if err != nil { e.stack.Stats().TCP.FailedConnectionAttempts.Increment() @@ -306,7 +305,7 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header func (e *endpoint) incSynRcvdCount() bool { e.mu.Lock() - if l, c := len(e.acceptedChan), cap(e.acceptedChan); l == c && e.synRcvdCount >= c { + if e.synRcvdCount >= cap(e.acceptedChan) { e.mu.Unlock() return false } @@ -321,6 +320,16 @@ func (e *endpoint) decSynRcvdCount() { e.mu.Unlock() } +func (e *endpoint) acceptQueueIsFull() bool { + e.mu.Lock() + if l, c := len(e.acceptedChan)+e.synRcvdCount, cap(e.acceptedChan); l >= c { + e.mu.Unlock() + return true + } + e.mu.Unlock() + return false +} + // handleListenSegment is called when a listening endpoint receives a segment // and needs to handle it. func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { @@ -328,17 +337,27 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { case header.TCPFlagSyn: opts := parseSynSegmentOptions(s) if incSynRcvdCount() { - // Drop the SYN if the listen endpoint's accept queue is - // overflowing. - if e.incSynRcvdCount() { + // Only handle the syn if the following conditions hold + // - accept queue is not full. + // - number of connections in synRcvd state is less than the + // backlog. + if !e.acceptQueueIsFull() && e.incSynRcvdCount() { s.incRef() go e.handleSynSegment(ctx, s, &opts) // S/R-SAFE: synRcvdCount is the barrier. return } + decSynRcvdCount() e.stack.Stats().TCP.ListenOverflowSynDrop.Increment() e.stack.Stats().DroppedPackets.Increment() return } else { + // If cookies are in use but the endpoint accept queue + // is full then drop the syn. + if e.acceptQueueIsFull() { + e.stack.Stats().TCP.ListenOverflowSynDrop.Increment() + e.stack.Stats().DroppedPackets.Increment() + return + } cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS)) // Send SYN with window scaling because we currently // dont't encode this information in the cookie. @@ -356,7 +375,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { } case header.TCPFlagAck: - if len(e.acceptedChan) == cap(e.acceptedChan) { + if e.acceptQueueIsFull() { // Silently drop the ack as the application can't accept // the connection at this point. The ack will be // retransmitted by the sender anyway and we can diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 0ad7bfb38..dd671f7ce 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -60,12 +60,11 @@ const ( // handshake holds the state used during a TCP 3-way handshake. type handshake struct { - ep *endpoint - listenEP *endpoint // only non nil when doing passive connects. - state handshakeState - active bool - flags uint8 - ackNum seqnum.Value + ep *endpoint + state handshakeState + active bool + flags uint8 + ackNum seqnum.Value // iss is the initial send sequence number, as defined in RFC 793. iss seqnum.Value @@ -142,7 +141,7 @@ func (h *handshake) effectiveRcvWndScale() uint8 { // resetToSynRcvd resets the state of the handshake object to the SYN-RCVD // state. -func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *header.TCPSynOptions, listenEP *endpoint) { +func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *header.TCPSynOptions) { h.active = false h.state = handshakeSynRcvd h.flags = header.TCPFlagSyn | header.TCPFlagAck @@ -150,7 +149,6 @@ func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *hea h.ackNum = irs + 1 h.mss = opts.MSS h.sndWndScale = opts.WS - h.listenEP = listenEP h.ep.mu.Lock() h.ep.state = StateSynRecv h.ep.mu.Unlock() @@ -287,23 +285,6 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { // We have previously received (and acknowledged) the peer's SYN. If the // peer acknowledges our SYN, the handshake is completed. if s.flagIsSet(header.TCPFlagAck) { - // listenContext is also used by a tcp.Forwarder and in that - // context we do not have a listening endpoint to check the - // backlog. So skip this check if listenEP is nil. - if h.listenEP != nil { - h.listenEP.mu.Lock() - if len(h.listenEP.acceptedChan) == cap(h.listenEP.acceptedChan) { - h.listenEP.mu.Unlock() - // If there is no space in the accept queue to accept - // this endpoint then silently drop this ACK. The peer - // will anyway resend the ack and we can complete the - // connection the next time it's retransmitted. - h.ep.stack.Stats().TCP.ListenOverflowAckDrop.Increment() - h.ep.stack.Stats().DroppedPackets.Increment() - return nil - } - h.listenEP.mu.Unlock() - } // If the timestamp option is negotiated and the segment does // not carry a timestamp option then the segment must be dropped // as per https://tools.ietf.org/html/rfc7323#section-3.2. diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 56b490aaa..779ca8b76 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -3405,7 +3405,7 @@ func executeHandshake(t *testing.T, c *context.Context, srcPort uint16, synCooki RcvWnd: 30000, }) - // Receive the SYN-ACK reply. + // Receive the SYN-ACK reply.w b := c.GetPacket() tcp := header.TCP(header.IPv4(b).Payload()) iss = seqnum.Value(tcp.SequenceNumber()) @@ -3469,12 +3469,18 @@ func TestListenBacklogFull(t *testing.T) { time.Sleep(50 * time.Millisecond) - // Now execute one more handshake. This should not be completed and - // delivered on an Accept() call as the backlog is full at this point. - irs, iss := executeHandshake(t, c, context.TestPort+uint16(listenBacklog), false /* synCookieInUse */) + // Now execute send one more SYN. The stack should not respond as the backlog + // is full at this point. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort + 2, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: seqnum.Value(789), + RcvWnd: 30000, + }) + c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond) - time.Sleep(50 * time.Millisecond) - // Try to accept the connection. + // Try to accept the connections in the backlog. we, ch := waiter.NewChannelEntry(nil) c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) @@ -3506,16 +3512,8 @@ func TestListenBacklogFull(t *testing.T) { } } - // Now craft the ACK again and verify that the connection is now ready - // to be accepted. - c.SendPacket(nil, &context.Headers{ - SrcPort: context.TestPort + uint16(listenBacklog), - DstPort: context.StackPort, - Flags: header.TCPFlagAck, - SeqNum: irs + 1, - AckNum: iss + 1, - RcvWnd: 30000, - }) + // Now a new handshake must succeed. + executeHandshake(t, c, context.TestPort+2, false /*synCookieInUse */) newEP, _, err := c.EP.Accept() if err == tcpip.ErrWouldBlock { @@ -3531,6 +3529,7 @@ func TestListenBacklogFull(t *testing.T) { t.Fatalf("Timed out waiting for accept") } } + // Now verify that the TCP socket is usable and in a connected state. data := "Don't panic" newEP.Write(tcpip.SlicePayload(buffer.NewViewFromBytes([]byte(data))), tcpip.WriteOptions{}) @@ -3541,13 +3540,7 @@ func TestListenBacklogFull(t *testing.T) { } } -func TestListenBacklogFullSynCookieInUse(t *testing.T) { - saved := tcp.SynRcvdCountThreshold - defer func() { - tcp.SynRcvdCountThreshold = saved - }() - tcp.SynRcvdCountThreshold = 1 - +func TestListenSynRcvdQueueFull(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() @@ -3566,48 +3559,72 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { // Test acceptance. // Start listening. listenBacklog := 1 - portOffset := uint16(0) if err := c.EP.Listen(listenBacklog); err != nil { t.Fatalf("Listen failed: %v", err) } - executeHandshake(t, c, context.TestPort+portOffset, false) - portOffset++ - // Wait for this to be delivered to the accept queue. - time.Sleep(50 * time.Millisecond) + // Send two SYN's the first one should get a SYN-ACK, the + // second one should not get any response and is dropped as + // the synRcvd count will be equal to backlog. + irs := seqnum.Value(789) + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: seqnum.Value(789), + RcvWnd: 30000, + }) - nonCookieIRS, nonCookieISS := executeHandshake(t, c, context.TestPort+portOffset, false) + // Receive the SYN-ACK reply. + b := c.GetPacket() + tcp := header.TCP(header.IPv4(b).Payload()) + iss := seqnum.Value(tcp.SequenceNumber()) + tcpCheckers := []checker.TransportChecker{ + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn), + checker.AckNum(uint32(irs) + 1), + } + checker.IPv4(t, b, checker.TCP(tcpCheckers...)) - // Since the backlog is full at this point this connection will not - // transition out of handshake and ignore the ACK. - // - // At this point there should be 1 completed connection in the backlog - // and one incomplete one pending for a final ACK and hence not ready to be - // delivered to the endpoint. + // Now execute send one more SYN. The stack should not respond as the backlog + // is full at this point. // - // Now execute one more handshake. This should not be completed and - // delivered on an Accept() call as the backlog is full at this point - // and there is already 1 pending endpoint. - // - // This one should use a SYN cookie as the synRcvdCount is equal to the - // SynRcvdCountThreshold. - time.Sleep(50 * time.Millisecond) - portOffset++ - irs, iss := executeHandshake(t, c, context.TestPort+portOffset, true) + // NOTE: we did not complete the handshake for the previous one so the + // accept backlog should be empty and there should be one connection in + // synRcvd state. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort + 1, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: seqnum.Value(889), + RcvWnd: 30000, + }) + c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond) - time.Sleep(50 * time.Millisecond) + // Now complete the previous connection and verify that there is a connection + // to accept. + // Send ACK. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: irs + 1, + AckNum: iss + 1, + RcvWnd: 30000, + }) - // Verify that there is only one acceptable connection at this point. + // Try to accept the connections in the backlog. we, ch := waiter.NewChannelEntry(nil) c.WQ.EventRegister(&we, waiter.EventIn) defer c.WQ.EventUnregister(&we) - _, _, err = c.EP.Accept() + newEP, _, err := c.EP.Accept() if err == tcpip.ErrWouldBlock { // Wait for connection to be established. select { case <-ch: - _, _, err = c.EP.Accept() + newEP, _, err = c.EP.Accept() if err != nil { t.Fatalf("Accept failed: %v", err) } @@ -3617,27 +3634,68 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { } } - // Now verify that there are no more connections that can be accepted. - _, _, err = c.EP.Accept() - if err != tcpip.ErrWouldBlock { - select { - case <-ch: - t.Fatalf("unexpected endpoint delivered on Accept: %+v", c.EP) - case <-time.After(1 * time.Second): - } + // Now verify that the TCP socket is usable and in a connected state. + data := "Don't panic" + newEP.Write(tcpip.SlicePayload(buffer.NewViewFromBytes([]byte(data))), tcpip.WriteOptions{}) + pkt := c.GetPacket() + tcp = header.TCP(header.IPv4(pkt).Payload()) + if string(tcp.Payload()) != data { + t.Fatalf("Unexpected data: got %v, want %v", string(tcp.Payload()), data) } +} + +func TestListenBacklogFullSynCookieInUse(t *testing.T) { + saved := tcp.SynRcvdCountThreshold + defer func() { + tcp.SynRcvdCountThreshold = saved + }() + tcp.SynRcvdCountThreshold = 1 + + c := context.New(t, defaultMTU) + defer c.Cleanup() - // Now send an ACK for the half completed connection + // Create TCP endpoint. + var err *tcpip.Error + c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ) + if err != nil { + t.Fatalf("NewEndpoint failed: %v", err) + } + + // Bind to wildcard. + if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatalf("Bind failed: %v", err) + } + + // Test acceptance. + // Start listening. + listenBacklog := 1 + portOffset := uint16(0) + if err := c.EP.Listen(listenBacklog); err != nil { + t.Fatalf("Listen failed: %v", err) + } + + executeHandshake(t, c, context.TestPort+portOffset, false) + portOffset++ + // Wait for this to be delivered to the accept queue. + time.Sleep(50 * time.Millisecond) + + // Send a SYN request. + irs := seqnum.Value(789) c.SendPacket(nil, &context.Headers{ - SrcPort: context.TestPort + portOffset - 1, + SrcPort: context.TestPort, DstPort: context.StackPort, - Flags: header.TCPFlagAck, - SeqNum: nonCookieIRS + 1, - AckNum: nonCookieISS + 1, + Flags: header.TCPFlagSyn, + SeqNum: irs, RcvWnd: 30000, }) + // The Syn should be dropped as the endpoint's backlog is full. + c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond) + + // Verify that there is only one acceptable connection at this point. + we, ch := waiter.NewChannelEntry(nil) + c.WQ.EventRegister(&we, waiter.EventIn) + defer c.WQ.EventUnregister(&we) - // Verify that the connection is now delivered to the backlog. _, _, err = c.EP.Accept() if err == tcpip.ErrWouldBlock { // Wait for connection to be established. @@ -3653,41 +3711,15 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { } } - // Finally send an ACK for the connection that used a cookie and verify that - // it's also completed and delivered. - c.SendPacket(nil, &context.Headers{ - SrcPort: context.TestPort + portOffset, - DstPort: context.StackPort, - Flags: header.TCPFlagAck, - SeqNum: irs, - AckNum: iss, - RcvWnd: 30000, - }) - - time.Sleep(50 * time.Millisecond) - newEP, _, err := c.EP.Accept() - if err == tcpip.ErrWouldBlock { - // Wait for connection to be established. + // Now verify that there are no more connections that can be accepted. + _, _, err = c.EP.Accept() + if err != tcpip.ErrWouldBlock { select { case <-ch: - newEP, _, err = c.EP.Accept() - if err != nil { - t.Fatalf("Accept failed: %v", err) - } - + t.Fatalf("unexpected endpoint delivered on Accept: %+v", c.EP) case <-time.After(1 * time.Second): - t.Fatalf("Timed out waiting for accept") } } - - // Now verify that the TCP socket is usable and in a connected state. - data := "Don't panic" - newEP.Write(tcpip.SlicePayload(buffer.NewViewFromBytes([]byte(data))), tcpip.WriteOptions{}) - b := c.GetPacket() - tcp := header.TCP(header.IPv4(b).Payload()) - if string(tcp.Payload()) != data { - t.Fatalf("Unexpected data: got %v, want %v", string(tcp.Payload()), data) - } } func TestPassiveConnectionAttemptIncrement(t *testing.T) { @@ -3761,18 +3793,12 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) { } srcPort := uint16(context.TestPort) - // Now attempt 3 handshakes, the first two will fill up the accept and the SYN-RCVD - // queue for the endpoint. + // Now attempt a handshakes it will fill up the accept backlog. executeHandshake(t, c, srcPort, false) // Give time for the final ACK to be processed as otherwise the next handshake could // get accepted before the previous one based on goroutine scheduling. time.Sleep(50 * time.Millisecond) - irs, iss := executeHandshake(t, c, srcPort+1, false) - - // Wait for a short while for the accepted connection to be delivered to - // the channel before trying to send the 3rd SYN. - time.Sleep(40 * time.Millisecond) want := stats.TCP.ListenOverflowSynDrop.Value() + 1 @@ -3810,49 +3836,6 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) { t.Fatalf("Timed out waiting for accept") } } - - // Now complete the next connection in SYN-RCVD state as it should - // have dropped the final ACK to the handshake due to accept queue - // being full. - c.SendPacket(nil, &context.Headers{ - SrcPort: srcPort + 1, - DstPort: context.StackPort, - Flags: header.TCPFlagAck, - SeqNum: irs + 1, - AckNum: iss + 1, - RcvWnd: 30000, - }) - - // Now check that there is one more acceptable connections. - _, _, err = c.EP.Accept() - if err == tcpip.ErrWouldBlock { - // Wait for connection to be established. - select { - case <-ch: - _, _, err = c.EP.Accept() - if err != nil { - t.Fatalf("Accept failed: %v", err) - } - - case <-time.After(1 * time.Second): - t.Fatalf("Timed out waiting for accept") - } - } - - // Try and accept a 3rd one this should fail. - _, _, err = c.EP.Accept() - if err == tcpip.ErrWouldBlock { - // Wait for connection to be established. - select { - case <-ch: - ep, _, err = c.EP.Accept() - if err == nil { - t.Fatalf("Accept succeeded when it should have failed got: %+v", ep) - } - - case <-time.After(1 * time.Second): - } - } } func TestEndpointBindListenAcceptState(t *testing.T) { -- cgit v1.2.3