1 files changed, 215 insertions, 1 deletions
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index dbfbd5c4f..025d133be 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -16,22 +16,33 @@ package tcp_test
 
 import (
 	"fmt"
+	"log"
 	"reflect"
 	"testing"
+	"time"
 
+	"gvisor.googlesource.com/gvisor/pkg/tcpip"
+	"gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
 	"gvisor.googlesource.com/gvisor/pkg/tcpip/header"
 	"gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
+	"gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
 	"gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
 	"gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context"
 )
 
-// createConnectWithSACKPermittedOption creates and connects c.ep with the
+// createConnectedWithSACKPermittedOption creates and connects c.ep with the
 // SACKPermitted option enabled if the stack in the context has the SACK support
 // enabled.
 func createConnectedWithSACKPermittedOption(c *context.Context) *context.RawEndpoint {
 	return c.CreateConnectedWithOptions(header.TCPSynOptions{SACKPermitted: c.SACKEnabled()})
 }
 
+// createConnectedWithSACKAndTS creates and connects c.ep with the SACK & TS
+// option enabled if the stack in the context has SACK and TS enabled.
+func createConnectedWithSACKAndTS(c *context.Context) *context.RawEndpoint {
+	return c.CreateConnectedWithOptions(header.TCPSynOptions{SACKPermitted: c.SACKEnabled(), TS: true})
+}
+
 func setStackSACKPermitted(t *testing.T, c *context.Context, enable bool) {
 	t.Helper()
 	if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enable)); err != nil {
@@ -348,3 +359,206 @@ func TestTrimSackBlockList(t *testing.T) {
 		}
 	}
 }
+
+func TestSACKRecovery(t *testing.T) {
+	const maxPayload = 10
+	// See: tcp.makeOptions for why tsOptionSize is set to 12 here.
+	const tsOptionSize = 12
+	// Enabling SACK means the payload size is reduced to account
+	// for the extra space required for the TCP options.
+	//
+	// We increase the MTU by 40 bytes to account for SACK and Timestamp
+	// options.
+	const maxTCPOptionSize = 40
+
+	c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxTCPOptionSize+maxPayload))
+	defer c.Cleanup()
+
+	c.Stack().AddTCPProbe(func(s stack.TCPEndpointState) {
+		// We use log.Printf instead of t.Logf here because this probe
+		// can fire even when the test function has finished. This is
+		// because closing the endpoint in cleanup() does not mean the
+		// actual worker loop terminates immediately as it still has to
+		// do a full TCP shutdown. But this test can finish running
+		// before the shutdown is done. Using t.Logf in such a case
+		// causes the test to panic due to logging after test finished.
+		log.Printf("state: %+v\n", s)
+	})
+	setStackSACKPermitted(t, c, true)
+	createConnectedWithSACKAndTS(c)
+
+	const iterations = 7
+	data := buffer.NewView(2 * maxPayload * (tcp.InitialCwnd << (iterations + 1)))
+	for i := range data {
+		data[i] = byte(i)
+	}
+
+	// Write all the data in one shot. Packets will only be written at the
+	// MTU size though.
+	if _, _, err := c.EP.Write(tcpip.SlicePayload(data), tcpip.WriteOptions{}); err != nil {
+		t.Fatalf("Write failed: %v", err)
+	}
+
+	// Do slow start for a few iterations.
+	expected := tcp.InitialCwnd
+	bytesRead := 0
+	for i := 0; i < iterations; i++ {
+		expected = tcp.InitialCwnd << uint(i)
+		if i > 0 {
+			// Acknowledge all the data received so far if not on
+			// first iteration.
+			c.SendAck(790, bytesRead)
+		}
+
+		// Read all packets expected on this iteration. Don't
+		// acknowledge any of them just yet, so that we can measure the
+		// congestion window.
+		for j := 0; j < expected; j++ {
+			c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+			bytesRead += maxPayload
+		}
+
+		// Check we don't receive any more packets on this iteration.
+		// The timeout can't be too high or we'll trigger a timeout.
+		c.CheckNoPacketTimeout("More packets received than expected for this cwnd.", 50*time.Millisecond)
+	}
+
+	// Send 3 duplicate acks. This should force an immediate retransmit of
+	// the pending packet and put the sender into fast recovery.
+	rtxOffset := bytesRead - maxPayload*expected
+	start := c.IRS.Add(seqnum.Size(rtxOffset) + 30 + 1)
+	end := start.Add(10)
+	for i := 0; i < 3; i++ {
+		c.SendAckWithSACK(790, rtxOffset, []header.SACKBlock{{start, end}})
+		end = end.Add(10)
+	}
+
+	// Receive the retransmitted packet.
+	c.ReceiveAndCheckPacketWithOptions(data, rtxOffset, maxPayload, tsOptionSize)
+
+	tcpStats := c.Stack().Stats().TCP
+	stats := []struct {
+		stat *tcpip.StatCounter
+		name string
+		want uint64
+	}{
+		{tcpStats.FastRetransmit, "stats.TCP.FastRetransmit", 1},
+		{tcpStats.Retransmits, "stats.TCP.Retransmits", 1},
+		{tcpStats.SACKRecovery, "stats.TCP.SACKRecovery", 1},
+		{tcpStats.FastRecovery, "stats.TCP.FastRecovery", 0},
+	}
+	for _, s := range stats {
+		if got, want := s.stat.Value(), s.want; got != want {
+			t.Errorf("got %s.Value() = %v, want = %v", s.name, got, want)
+		}
+	}
+
+	// Now send 7 mode duplicate ACKs. In SACK TCP dupAcks do not cause
+	// window inflation and sending of packets is completely handled by the
+	// SACK Recovery algorithm. We should see no packets being released, as
+	// the cwnd at this point after entering recovery should be half of the
+	// outstanding number of packets in flight.
+	for i := 0; i < 7; i++ {
+		c.SendAckWithSACK(790, rtxOffset, []header.SACKBlock{{start, end}})
+		end = end.Add(10)
+	}
+
+	recover := bytesRead
+
+	// Ensure no new packets arrive.
+	c.CheckNoPacketTimeout("More packets received than expected during recovery after dupacks for this cwnd.",
+		50*time.Millisecond)
+
+	// Acknowledge half of the pending data. This along with the 10 sacked
+	// segments above should reduce the outstanding below the current
+	// congestion window allowing the sender to transmit data.
+	rtxOffset = bytesRead - expected*maxPayload/2
+
+	// Now send a partial ACK w/ a SACK block that indicates that the next 3
+	// segments are lost and we have received 6 segments after the lost
+	// segments. This should cause the sender to immediately transmit all 3
+	// segments in response to this ACK unlike in FastRecovery where only 1
+	// segment is retransmitted per ACK.
+	start = c.IRS.Add(seqnum.Size(rtxOffset) + 30 + 1)
+	end = start.Add(60)
+	c.SendAckWithSACK(790, rtxOffset, []header.SACKBlock{{start, end}})
+
+	// At this point, we acked expected/2 packets and we SACKED 6 packets and
+	// 3 segments were considered lost due to the SACK block we sent.
+	//
+	// So total packets outstanding can be calculated as follows after 7
+	// iterations of slow start -> 10/20/40/80/160/320/640. So expected
+	// should be 640 at start, then we went to recover at which point the
+	// cwnd should be set to 320 + 3 (for the 3 dupAcks which have left the
+	// network).
+	// Outstanding at this point after acking half the window
+	// (320 packets) will be:
+	//    outstanding = 640-320-6(due to SACK block)-3 = 311
+	//
+	// The last 3 is due to the fact that the first 3 packets after
+	// rtxOffset will be considered lost due to the SACK blocks sent.
+	// Receive the retransmit due to partial ack.
+
+	c.ReceiveAndCheckPacketWithOptions(data, rtxOffset, maxPayload, tsOptionSize)
+	// Receive the 2 extra packets that should have been retransmitted as
+	// those should be considered lost and immediately retransmitted based
+	// on the SACK information in the previous ACK sent above.
+	for i := 0; i < 2; i++ {
+		c.ReceiveAndCheckPacketWithOptions(data, rtxOffset+maxPayload*(i+1), maxPayload, tsOptionSize)
+	}
+
+	// Now we should get 9 more new unsent packets as the cwnd is 323 and
+	// outstanding is 311.
+	for i := 0; i < 9; i++ {
+		c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+		bytesRead += maxPayload
+	}
+
+	// In SACK recovery only the first segment is fast retransmitted when
+	// entering recovery.
+	if got, want := c.Stack().Stats().TCP.FastRetransmit.Value(), uint64(1); got != want {
+		t.Errorf("got stats.TCP.FastRetransmit.Value = %v, want = %v", got, want)
+	}
+
+	if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(4); got != want {
+		t.Errorf("got stats.TCP.Retransmits.Value = %v, want = %v", got, want)
+	}
+
+	c.CheckNoPacketTimeout("More packets received than expected during recovery after partial ack for this cwnd.", 50*time.Millisecond)
+
+	// Acknowledge all pending data to recover point.
+	c.SendAck(790, recover)
+
+	// At this point, the cwnd should reset to expected/2 and there are 9
+	// packets outstanding.
+	//
+	// Now in the first iteration since there are 9 packets outstanding.
+	// We would expect to get expected/2  - 9 packets. But subsequent
+	// iterations will send us expected/2  + 1 (per iteration).
+	expected = expected/2 - 9
+	for i := 0; i < iterations; i++ {
+		// Read all packets expected on this iteration. Don't
+		// acknowledge any of them just yet, so that we can measure the
+		// congestion window.
+		for j := 0; j < expected; j++ {
+			c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+			bytesRead += maxPayload
+		}
+		// Check we don't receive any more packets on this iteration.
+		// The timeout can't be too high or we'll trigger a timeout.
+		c.CheckNoPacketTimeout(fmt.Sprintf("More packets received(after deflation) than expected %d for this cwnd and iteration: %d.", expected, i), 50*time.Millisecond)
+
+		// Acknowledge all the data received so far.
+		c.SendAck(790, bytesRead)
+
+		// In cogestion avoidance, the packets trains increase by 1 in
+		// each iteration.
+		if i == 0 {
+			// After the first iteration we expect to get the full
+			// congestion window worth of packets in every
+			// iteration.
+			expected += 9
+		}
+		expected++
+	}
+}