4 files changed, 46 insertions, 106 deletions
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index acacb42e4..5862c32f2 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -833,25 +833,6 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
 			panic("Netstack queues FIN segments without data.")
 		}
 
-		segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size()))
-		// If the entire segment cannot be accomodated in the receiver
-		// advertized window, skip splitting and sending of the segment.
-		// ref: net/ipv4/tcp_output.c::tcp_snd_wnd_test()
-		//
-		// Linux checks this for all segment transmits not triggered
-		// by a probe timer. On this condition, it defers the segment
-		// split and transmit to a short probe timer.
-		// ref: include/net/tcp.h::tcp_check_probe_timer()
-		// ref: net/ipv4/tcp_output.c::tcp_write_wakeup()
-		//
-		// Instead of defining a new transmit timer, we attempt to split the
-		// segment right here if there are no pending segments.
-		// If there are pending segments, segment transmits are deferred
-		// to the retransmit timer handler.
-		if s.sndUna != s.sndNxt && !segEnd.LessThan(end) {
-			return false
-		}
-
 		if !seg.sequenceNumber.LessThan(end) {
 			return false
 		}
@@ -861,14 +842,48 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
 			return false
 		}
 
-		// The segment size limit is computed as a function of sender congestion
-		// window and MSS. When sender congestion window is > 1, this limit can
-		// be larger than MSS. Ensure that the currently available send space
-		// is not greater than minimum of this limit and MSS.
+		// If the whole segment or at least 1MSS sized segment cannot
+		// be accomodated in the receiver advertized window, skip
+		// splitting and sending of the segment. ref:
+		// net/ipv4/tcp_output.c::tcp_snd_wnd_test()
+		//
+		// Linux checks this for all segment transmits not triggered by
+		// a probe timer. On this condition, it defers the segment split
+		// and transmit to a short probe timer.
+		//
+		// ref: include/net/tcp.h::tcp_check_probe_timer()
+		// ref: net/ipv4/tcp_output.c::tcp_write_wakeup()
+		//
+		// Instead of defining a new transmit timer, we attempt to split
+		// the segment right here if there are no pending segments. If
+		// there are pending segments, segment transmits are deferred to
+		// the retransmit timer handler.
+		if s.sndUna != s.sndNxt {
+			switch {
+			case available >= seg.data.Size():
+				// OK to send, the whole segments fits in the
+				// receiver's advertised window.
+			case available >= s.maxPayloadSize:
+				// OK to send, at least 1 MSS sized segment fits
+				// in the receiver's advertised window.
+			default:
+				return false
+			}
+		}
+
+		// The segment size limit is computed as a function of sender
+		// congestion window and MSS. When sender congestion window is >
+		// 1, this limit can be larger than MSS. Ensure that the
+		// currently available send space is not greater than minimum of
+		// this limit and MSS.
 		if available > limit {
 			available = limit
 		}
-		if available > s.maxPayloadSize {
+
+		// If GSO is not in use then cap available to
+		// maxPayloadSize. When GSO is in use the gVisor GSO logic or
+		// the host GSO logic will cap the segment to the correct size.
+		if s.ep.gso == nil && available > s.maxPayloadSize {
 			available = s.maxPayloadSize
 		}
 
diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go
index 3cac5915f..c0a2620de 100644
--- a/test/packetimpact/runner/packetimpact_test.go
+++ b/test/packetimpact/runner/packetimpact_test.go
@@ -242,7 +242,13 @@ func TestOne(t *testing.T) {
 	}
 
 	// Kill so that it will flush output.
-	defer testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0])
+	defer func() {
+		// Wait 1 second before killing tcpdump to give it time to flush
+		// any packets.  On linux tests killing it immediately can
+		// sometimes result in partial pcaps.
+		time.Sleep(1 * time.Second)
+		testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0])
+	}()
 
 	if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil {
 		t.Fatalf("sniffer on %s never listened: %s", dut.Name, err)
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 1c9f59df5..4711c1ae6 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -213,16 +213,6 @@ packetimpact_go_test(
 )
 
 packetimpact_go_test(
-    name = "tcp_splitseg_mss",
-    srcs = ["tcp_splitseg_mss_test.go"],
-    deps = [
-        "//pkg/tcpip/header",
-        "//test/packetimpact/testbench",
-        "@org_golang_x_sys//unix:go_default_library",
-    ],
-)
-
-packetimpact_go_test(
     name = "tcp_cork_mss",
     srcs = ["tcp_cork_mss_test.go"],
     deps = [
diff --git a/test/packetimpact/tests/tcp_splitseg_mss_test.go b/test/packetimpact/tests/tcp_splitseg_mss_test.go
deleted file mode 100644
index 9350d0988..000000000
--- a/test/packetimpact/tests/tcp_splitseg_mss_test.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tcp_splitseg_mss_test
-
-import (
-	"flag"
-	"testing"
-	"time"
-
-	"golang.org/x/sys/unix"
-	"gvisor.dev/gvisor/pkg/tcpip/header"
-	"gvisor.dev/gvisor/test/packetimpact/testbench"
-)
-
-func init() {
-	testbench.RegisterFlags(flag.CommandLine)
-}
-
-// TestTCPSplitSegMSS lets the dut try to send segments larger than MSS.
-// It tests if the transmitted segments are capped at MSS and are split.
-func TestTCPSplitSegMSS(t *testing.T) {
-	dut := testbench.NewDUT(t)
-	defer dut.TearDown()
-	listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFD)
-	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
-
-	const mss = uint32(header.TCPDefaultMSS)
-	options := make([]byte, header.TCPOptionMSSLength)
-	header.EncodeMSSOption(mss, options)
-	conn.ConnectWithOptions(options)
-
-	acceptFD, _ := dut.Accept(listenFD)
-	defer dut.Close(acceptFD)
-
-	// Let the dut send a segment larger than MSS.
-	largeData := make([]byte, mss+1)
-	for i := 0; i < 2; i++ {
-		dut.Send(acceptFD, largeData, 0)
-		if i == 0 {
-			// On Linux, the initial segment goes out beyond MSS and the segment
-			// split occurs on retransmission. Call ExpectData to wait to
-			// receive the split segment.
-			if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
-				t.Fatalf("expected payload was not received: %s", err)
-			}
-		} else {
-			if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
-				t.Fatalf("expected payload was not received: %s", err)
-			}
-		}
-		conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-		if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: largeData[mss:]}, time.Second); err != nil {
-			t.Fatalf("expected payload was not received: %s", err)
-		}
-		conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-	}
-}