summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/tcpip/transport/tcp/snd.go63
-rw-r--r--test/packetimpact/runner/packetimpact_test.go8
-rw-r--r--test/packetimpact/tests/BUILD10
-rw-r--r--test/packetimpact/tests/tcp_splitseg_mss_test.go71
4 files changed, 46 insertions, 106 deletions
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index acacb42e4..5862c32f2 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -833,25 +833,6 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
panic("Netstack queues FIN segments without data.")
}
- segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size()))
- // If the entire segment cannot be accomodated in the receiver
- // advertized window, skip splitting and sending of the segment.
- // ref: net/ipv4/tcp_output.c::tcp_snd_wnd_test()
- //
- // Linux checks this for all segment transmits not triggered
- // by a probe timer. On this condition, it defers the segment
- // split and transmit to a short probe timer.
- // ref: include/net/tcp.h::tcp_check_probe_timer()
- // ref: net/ipv4/tcp_output.c::tcp_write_wakeup()
- //
- // Instead of defining a new transmit timer, we attempt to split the
- // segment right here if there are no pending segments.
- // If there are pending segments, segment transmits are deferred
- // to the retransmit timer handler.
- if s.sndUna != s.sndNxt && !segEnd.LessThan(end) {
- return false
- }
-
if !seg.sequenceNumber.LessThan(end) {
return false
}
@@ -861,14 +842,48 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
return false
}
- // The segment size limit is computed as a function of sender congestion
- // window and MSS. When sender congestion window is > 1, this limit can
- // be larger than MSS. Ensure that the currently available send space
- // is not greater than minimum of this limit and MSS.
+ // If the whole segment or at least 1MSS sized segment cannot
+ // be accomodated in the receiver advertized window, skip
+ // splitting and sending of the segment. ref:
+ // net/ipv4/tcp_output.c::tcp_snd_wnd_test()
+ //
+ // Linux checks this for all segment transmits not triggered by
+ // a probe timer. On this condition, it defers the segment split
+ // and transmit to a short probe timer.
+ //
+ // ref: include/net/tcp.h::tcp_check_probe_timer()
+ // ref: net/ipv4/tcp_output.c::tcp_write_wakeup()
+ //
+ // Instead of defining a new transmit timer, we attempt to split
+ // the segment right here if there are no pending segments. If
+ // there are pending segments, segment transmits are deferred to
+ // the retransmit timer handler.
+ if s.sndUna != s.sndNxt {
+ switch {
+ case available >= seg.data.Size():
+ // OK to send, the whole segments fits in the
+ // receiver's advertised window.
+ case available >= s.maxPayloadSize:
+ // OK to send, at least 1 MSS sized segment fits
+ // in the receiver's advertised window.
+ default:
+ return false
+ }
+ }
+
+ // The segment size limit is computed as a function of sender
+ // congestion window and MSS. When sender congestion window is >
+ // 1, this limit can be larger than MSS. Ensure that the
+ // currently available send space is not greater than minimum of
+ // this limit and MSS.
if available > limit {
available = limit
}
- if available > s.maxPayloadSize {
+
+ // If GSO is not in use then cap available to
+ // maxPayloadSize. When GSO is in use the gVisor GSO logic or
+ // the host GSO logic will cap the segment to the correct size.
+ if s.ep.gso == nil && available > s.maxPayloadSize {
available = s.maxPayloadSize
}
diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go
index 3cac5915f..c0a2620de 100644
--- a/test/packetimpact/runner/packetimpact_test.go
+++ b/test/packetimpact/runner/packetimpact_test.go
@@ -242,7 +242,13 @@ func TestOne(t *testing.T) {
}
// Kill so that it will flush output.
- defer testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0])
+ defer func() {
+ // Wait 1 second before killing tcpdump to give it time to flush
+ // any packets. On linux tests killing it immediately can
+ // sometimes result in partial pcaps.
+ time.Sleep(1 * time.Second)
+ testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0])
+ }()
if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil {
t.Fatalf("sniffer on %s never listened: %s", dut.Name, err)
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 1c9f59df5..4711c1ae6 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -213,16 +213,6 @@ packetimpact_go_test(
)
packetimpact_go_test(
- name = "tcp_splitseg_mss",
- srcs = ["tcp_splitseg_mss_test.go"],
- deps = [
- "//pkg/tcpip/header",
- "//test/packetimpact/testbench",
- "@org_golang_x_sys//unix:go_default_library",
- ],
-)
-
-packetimpact_go_test(
name = "tcp_cork_mss",
srcs = ["tcp_cork_mss_test.go"],
deps = [
diff --git a/test/packetimpact/tests/tcp_splitseg_mss_test.go b/test/packetimpact/tests/tcp_splitseg_mss_test.go
deleted file mode 100644
index 9350d0988..000000000
--- a/test/packetimpact/tests/tcp_splitseg_mss_test.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tcp_splitseg_mss_test
-
-import (
- "flag"
- "testing"
- "time"
-
- "golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/tcpip/header"
- "gvisor.dev/gvisor/test/packetimpact/testbench"
-)
-
-func init() {
- testbench.RegisterFlags(flag.CommandLine)
-}
-
-// TestTCPSplitSegMSS lets the dut try to send segments larger than MSS.
-// It tests if the transmitted segments are capped at MSS and are split.
-func TestTCPSplitSegMSS(t *testing.T) {
- dut := testbench.NewDUT(t)
- defer dut.TearDown()
- listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
- defer dut.Close(listenFD)
- conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
- defer conn.Close()
-
- const mss = uint32(header.TCPDefaultMSS)
- options := make([]byte, header.TCPOptionMSSLength)
- header.EncodeMSSOption(mss, options)
- conn.ConnectWithOptions(options)
-
- acceptFD, _ := dut.Accept(listenFD)
- defer dut.Close(acceptFD)
-
- // Let the dut send a segment larger than MSS.
- largeData := make([]byte, mss+1)
- for i := 0; i < 2; i++ {
- dut.Send(acceptFD, largeData, 0)
- if i == 0 {
- // On Linux, the initial segment goes out beyond MSS and the segment
- // split occurs on retransmission. Call ExpectData to wait to
- // receive the split segment.
- if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
- t.Fatalf("expected payload was not received: %s", err)
- }
- } else {
- if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
- t.Fatalf("expected payload was not received: %s", err)
- }
- }
- conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
- if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: largeData[mss:]}, time.Second); err != nil {
- t.Fatalf("expected payload was not received: %s", err)
- }
- conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
- }
-}