diff options
-rw-r--r-- | pkg/tcpip/transport/tcp/snd.go | 63 | ||||
-rw-r--r-- | test/packetimpact/runner/packetimpact_test.go | 8 | ||||
-rw-r--r-- | test/packetimpact/tests/BUILD | 10 | ||||
-rw-r--r-- | test/packetimpact/tests/tcp_splitseg_mss_test.go | 71 |
4 files changed, 46 insertions, 106 deletions
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index acacb42e4..5862c32f2 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -833,25 +833,6 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se panic("Netstack queues FIN segments without data.") } - segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size())) - // If the entire segment cannot be accomodated in the receiver - // advertized window, skip splitting and sending of the segment. - // ref: net/ipv4/tcp_output.c::tcp_snd_wnd_test() - // - // Linux checks this for all segment transmits not triggered - // by a probe timer. On this condition, it defers the segment - // split and transmit to a short probe timer. - // ref: include/net/tcp.h::tcp_check_probe_timer() - // ref: net/ipv4/tcp_output.c::tcp_write_wakeup() - // - // Instead of defining a new transmit timer, we attempt to split the - // segment right here if there are no pending segments. - // If there are pending segments, segment transmits are deferred - // to the retransmit timer handler. - if s.sndUna != s.sndNxt && !segEnd.LessThan(end) { - return false - } - if !seg.sequenceNumber.LessThan(end) { return false } @@ -861,14 +842,48 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se return false } - // The segment size limit is computed as a function of sender congestion - // window and MSS. When sender congestion window is > 1, this limit can - // be larger than MSS. Ensure that the currently available send space - // is not greater than minimum of this limit and MSS. + // If the whole segment or at least 1MSS sized segment cannot + // be accomodated in the receiver advertized window, skip + // splitting and sending of the segment. ref: + // net/ipv4/tcp_output.c::tcp_snd_wnd_test() + // + // Linux checks this for all segment transmits not triggered by + // a probe timer. On this condition, it defers the segment split + // and transmit to a short probe timer. + // + // ref: include/net/tcp.h::tcp_check_probe_timer() + // ref: net/ipv4/tcp_output.c::tcp_write_wakeup() + // + // Instead of defining a new transmit timer, we attempt to split + // the segment right here if there are no pending segments. If + // there are pending segments, segment transmits are deferred to + // the retransmit timer handler. + if s.sndUna != s.sndNxt { + switch { + case available >= seg.data.Size(): + // OK to send, the whole segments fits in the + // receiver's advertised window. + case available >= s.maxPayloadSize: + // OK to send, at least 1 MSS sized segment fits + // in the receiver's advertised window. + default: + return false + } + } + + // The segment size limit is computed as a function of sender + // congestion window and MSS. When sender congestion window is > + // 1, this limit can be larger than MSS. Ensure that the + // currently available send space is not greater than minimum of + // this limit and MSS. if available > limit { available = limit } - if available > s.maxPayloadSize { + + // If GSO is not in use then cap available to + // maxPayloadSize. When GSO is in use the gVisor GSO logic or + // the host GSO logic will cap the segment to the correct size. + if s.ep.gso == nil && available > s.maxPayloadSize { available = s.maxPayloadSize } diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index 3cac5915f..c0a2620de 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -242,7 +242,13 @@ func TestOne(t *testing.T) { } // Kill so that it will flush output. - defer testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0]) + defer func() { + // Wait 1 second before killing tcpdump to give it time to flush + // any packets. On linux tests killing it immediately can + // sometimes result in partial pcaps. + time.Sleep(1 * time.Second) + testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0]) + }() if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil { t.Fatalf("sniffer on %s never listened: %s", dut.Name, err) diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 1c9f59df5..4711c1ae6 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -213,16 +213,6 @@ packetimpact_go_test( ) packetimpact_go_test( - name = "tcp_splitseg_mss", - srcs = ["tcp_splitseg_mss_test.go"], - deps = [ - "//pkg/tcpip/header", - "//test/packetimpact/testbench", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -packetimpact_go_test( name = "tcp_cork_mss", srcs = ["tcp_cork_mss_test.go"], deps = [ diff --git a/test/packetimpact/tests/tcp_splitseg_mss_test.go b/test/packetimpact/tests/tcp_splitseg_mss_test.go deleted file mode 100644 index 9350d0988..000000000 --- a/test/packetimpact/tests/tcp_splitseg_mss_test.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tcp_splitseg_mss_test - -import ( - "flag" - "testing" - "time" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/test/packetimpact/testbench" -) - -func init() { - testbench.RegisterFlags(flag.CommandLine) -} - -// TestTCPSplitSegMSS lets the dut try to send segments larger than MSS. -// It tests if the transmitted segments are capped at MSS and are split. -func TestTCPSplitSegMSS(t *testing.T) { - dut := testbench.NewDUT(t) - defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) - conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() - - const mss = uint32(header.TCPDefaultMSS) - options := make([]byte, header.TCPOptionMSSLength) - header.EncodeMSSOption(mss, options) - conn.ConnectWithOptions(options) - - acceptFD, _ := dut.Accept(listenFD) - defer dut.Close(acceptFD) - - // Let the dut send a segment larger than MSS. - largeData := make([]byte, mss+1) - for i := 0; i < 2; i++ { - dut.Send(acceptFD, largeData, 0) - if i == 0 { - // On Linux, the initial segment goes out beyond MSS and the segment - // split occurs on retransmission. Call ExpectData to wait to - // receive the split segment. - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil { - t.Fatalf("expected payload was not received: %s", err) - } - } else { - if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil { - t.Fatalf("expected payload was not received: %s", err) - } - } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: largeData[mss:]}, time.Second); err != nil { - t.Fatalf("expected payload was not received: %s", err) - } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - } -} |