diff options
Diffstat (limited to 'pkg/tcpip')
33 files changed, 217 insertions, 160 deletions
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 1b56d2b72..e8e716db0 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package fdbased provides the implemention of data-link layer endpoints diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go index 8aad338b6..eccd21579 100644 --- a/pkg/tcpip/link/fdbased/endpoint_test.go +++ b/pkg/tcpip/link/fdbased/endpoint_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdbased diff --git a/pkg/tcpip/link/fdbased/endpoint_unsafe.go b/pkg/tcpip/link/fdbased/endpoint_unsafe.go index df14eaad1..904393faa 100644 --- a/pkg/tcpip/link/fdbased/endpoint_unsafe.go +++ b/pkg/tcpip/link/fdbased/endpoint_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdbased diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go index 5d698a5e9..bfae34ab9 100644 --- a/pkg/tcpip/link/fdbased/mmap.go +++ b/pkg/tcpip/link/fdbased/mmap.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build (linux && amd64) || (linux && arm64) // +build linux,amd64 linux,arm64 package fdbased diff --git a/pkg/tcpip/link/fdbased/mmap_stub.go b/pkg/tcpip/link/fdbased/mmap_stub.go index 67be52d67..9d8679502 100644 --- a/pkg/tcpip/link/fdbased/mmap_stub.go +++ b/pkg/tcpip/link/fdbased/mmap_stub.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !linux || (!amd64 && !arm64) // +build !linux !amd64,!arm64 package fdbased diff --git a/pkg/tcpip/link/fdbased/mmap_unsafe.go b/pkg/tcpip/link/fdbased/mmap_unsafe.go index 1293f68a2..58d5dfeef 100644 --- a/pkg/tcpip/link/fdbased/mmap_unsafe.go +++ b/pkg/tcpip/link/fdbased/mmap_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build (linux && amd64) || (linux && arm64) // +build linux,amd64 linux,arm64 package fdbased diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go index 4b7ef3aac..ab2855a63 100644 --- a/pkg/tcpip/link/fdbased/packet_dispatchers.go +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package fdbased diff --git a/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go index 2206fe0e6..c1438da21 100644 --- a/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go +++ b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux && !amd64 && !arm64 // +build linux,!amd64,!arm64 package rawfile diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go index 5002245a1..da900c24b 100644 --- a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go +++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build ((linux && amd64) || (linux && arm64)) && go1.12 && !go1.18 // +build linux,amd64 linux,arm64 // +build go1.12 // +build !go1.18 diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go index 9743e70ea..7e21a78d4 100644 --- a/pkg/tcpip/link/rawfile/errors.go +++ b/pkg/tcpip/link/rawfile/errors.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package rawfile diff --git a/pkg/tcpip/link/rawfile/errors_test.go b/pkg/tcpip/link/rawfile/errors_test.go index 8f4bd60da..1b88c309b 100644 --- a/pkg/tcpip/link/rawfile/errors_test.go +++ b/pkg/tcpip/link/rawfile/errors_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package rawfile diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go index 43fe57830..53448a641 100644 --- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go +++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package rawfile contains utilities for using the netstack with raw host diff --git a/pkg/tcpip/link/sharedmem/rx.go b/pkg/tcpip/link/sharedmem/rx.go index 8e6f3e5e3..e882a128c 100644 --- a/pkg/tcpip/link/sharedmem/rx.go +++ b/pkg/tcpip/link/sharedmem/rx.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package sharedmem diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index df9a0b90a..30cf659b8 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package sharedmem provides the implemention of data-link layer endpoints diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go index 0f72d4e95..d6d953085 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem_test.go +++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux package sharedmem diff --git a/pkg/tcpip/link/sniffer/pcap.go b/pkg/tcpip/link/sniffer/pcap.go index 45475dcf1..3bb864ed2 100644 --- a/pkg/tcpip/link/sniffer/pcap.go +++ b/pkg/tcpip/link/sniffer/pcap.go @@ -39,8 +39,6 @@ type pcapHeader struct { Network uint32 } -const pcapPacketHeaderLen = 16 - type pcapPacketHeader struct { // Seconds is the timestamp seconds. Seconds uint32 diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go index 232a26a18..3df826f3c 100644 --- a/pkg/tcpip/link/sniffer/sniffer.go +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -113,8 +113,9 @@ func writePCAPHeader(w io.Writer, maxLen uint32) error { // NewWithWriter creates a new sniffer link-layer endpoint. It wraps around // another endpoint and logs packets as they traverse the endpoint. // -// Packets are logged to writer in the pcap format. A sniffer created with this -// function will not emit packets using the standard log package. +// Each packet is written to writer in the pcap format in a single Write call +// without synchronization. A sniffer created with this function will not emit +// packets using the standard log package. // // snapLen is the maximum amount of a packet to be saved. Packets with a length // less than or equal to snapLen will be saved in their entirety. Longer @@ -155,27 +156,29 @@ func (e *endpoint) dumpPacket(dir direction, protocol tcpip.NetworkProtocolNumbe if max := int(e.maxPCAPLen); length > max { length = max } - if err := binary.Write(writer, binary.BigEndian, newPCAPPacketHeader(time.Now(), uint32(length), uint32(totalLength))); err != nil { - panic(err) - } - write := func(b []byte) { - if len(b) > length { - b = b[:length] + packetHeader := newPCAPPacketHeader(time.Now(), uint32(length), uint32(totalLength)) + packet := make([]byte, binary.Size(packetHeader)+length) + { + writer := tcpip.SliceWriter(packet) + if err := binary.Write(&writer, binary.BigEndian, packetHeader); err != nil { + panic(err) } - for len(b) != 0 { + for _, b := range pkt.Views() { + if length == 0 { + break + } + if len(b) > length { + b = b[:length] + } n, err := writer.Write(b) if err != nil { panic(err) } - b = b[n:] length -= n } } - for _, v := range pkt.Views() { - if length == 0 { - break - } - write(v) + if _, err := writer.Write(packet); err != nil { + panic(err) } } } diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index f3444e8b5..d23210503 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -126,7 +126,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE endpoint, ok := linkEP.(*tunEndpoint) if !ok { // Not a NIC created by tun device. - return nil, syserror.EOPNOTSUPP + return nil, linuxerr.EOPNOTSUPP } if !endpoint.TryIncRef() { // Race detected: NIC got deleted in between. diff --git a/pkg/tcpip/link/tun/tun_unsafe.go b/pkg/tcpip/link/tun/tun_unsafe.go index 0591fbd63..db4338e79 100644 --- a/pkg/tcpip/link/tun/tun_unsafe.go +++ b/pkg/tcpip/link/tun/tun_unsafe.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // Package tun contains methods to open TAP and TUN devices. diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go index b9a24ff56..009cab643 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/main.go +++ b/pkg/tcpip/sample/tun_tcp_connect/main.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // This sample creates a stack with TCP and IPv4 protocols on top of a TUN diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go index ef1bfc186..c10b19aa0 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/main.go +++ b/pkg/tcpip/sample/tun_tcp_echo/main.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux // +build linux // This sample creates a stack with TCP and IPv4 protocols on top of a TUN diff --git a/pkg/tcpip/socketops.go b/pkg/tcpip/socketops.go index 0ea85f9ed..5642c86f8 100644 --- a/pkg/tcpip/socketops.go +++ b/pkg/tcpip/socketops.go @@ -15,17 +15,11 @@ package tcpip import ( - "math" "sync/atomic" - "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/sync" ) -// PacketOverheadFactor is used to multiply the value provided by the user on a -// SetSockOpt for setting the send/receive buffer sizes sockets. -const PacketOverheadFactor = 2 - // SocketOptionsHandler holds methods that help define endpoint specific // behavior for socket level socket options. These must be implemented by // endpoints to get notified when socket level options are set. @@ -60,7 +54,7 @@ type SocketOptionsHandler interface { // buffer size. It also returns the newly set value. OnSetSendBufferSize(v int64) (newSz int64) - // OnSetReceiveBufferSize is invoked to set the SO_RCVBUFSIZE. + // OnSetReceiveBufferSize is invoked by SO_RCVBUF and SO_RCVBUFFORCE. OnSetReceiveBufferSize(v, oldSz int64) (newSz int64) } @@ -213,16 +207,24 @@ type SocketOptions struct { // will not change. getSendBufferLimits GetSendBufferLimits `state:"manual"` + // sendBufSizeMu protects sendBufferSize and calls to + // handler.OnSetSendBufferSize. + sendBufSizeMu sync.Mutex `state:"nosave"` + // sendBufferSize determines the send buffer size for this socket. - sendBufferSize atomicbitops.AlignedAtomicInt64 + sendBufferSize int64 // getReceiveBufferLimits provides the handler to get the min, default and // max size for receive buffer. It is initialized at the creation time and // will not change. getReceiveBufferLimits GetReceiveBufferLimits `state:"manual"` + // receiveBufSizeMu protects receiveBufferSize and calls to + // handler.OnSetReceiveBufferSize. + receiveBufSizeMu sync.Mutex `state:"nosave"` + // receiveBufferSize determines the receive buffer size for this socket. - receiveBufferSize atomicbitops.AlignedAtomicInt64 + receiveBufferSize int64 // mu protects the access to the below fields. mu sync.Mutex `state:"nosave"` @@ -612,81 +614,52 @@ func (so *SocketOptions) SetBindToDevice(bindToDevice int32) Error { return nil } +// SendBufferLimits returns the [min, max) range of allowable send buffer +// sizes. +func (so *SocketOptions) SendBufferLimits() (min, max int64) { + limits := so.getSendBufferLimits(so.stackHandler) + return int64(limits.Min), int64(limits.Max) +} + // GetSendBufferSize gets value for SO_SNDBUF option. func (so *SocketOptions) GetSendBufferSize() int64 { - return so.sendBufferSize.Load() + so.sendBufSizeMu.Lock() + defer so.sendBufSizeMu.Unlock() + return so.sendBufferSize } // SetSendBufferSize sets value for SO_SNDBUF option. notify indicates if the // stack handler should be invoked to set the send buffer size. func (so *SocketOptions) SetSendBufferSize(sendBufferSize int64, notify bool) { - v := sendBufferSize - - if !notify { - so.sendBufferSize.Store(v) - return - } - - // Make sure the send buffer size is within the min and max - // allowed. - ss := so.getSendBufferLimits(so.stackHandler) - min := int64(ss.Min) - max := int64(ss.Max) - // Validate the send buffer size with min and max values. - // Multiply it by factor of 2. - if v > max { - v = max - } - - if v < math.MaxInt32/PacketOverheadFactor { - v *= PacketOverheadFactor - if v < min { - v = min - } - } else { - v = math.MaxInt32 + so.sendBufSizeMu.Lock() + defer so.sendBufSizeMu.Unlock() + if notify { + sendBufferSize = so.handler.OnSetSendBufferSize(sendBufferSize) } + so.sendBufferSize = sendBufferSize +} - // Notify endpoint about change in buffer size. - newSz := so.handler.OnSetSendBufferSize(v) - so.sendBufferSize.Store(newSz) +// ReceiveBufferLimits returns the [min, max) range of allowable receive buffer +// sizes. +func (so *SocketOptions) ReceiveBufferLimits() (min, max int64) { + limits := so.getReceiveBufferLimits(so.stackHandler) + return int64(limits.Min), int64(limits.Max) } // GetReceiveBufferSize gets value for SO_RCVBUF option. func (so *SocketOptions) GetReceiveBufferSize() int64 { - return so.receiveBufferSize.Load() + so.receiveBufSizeMu.Lock() + defer so.receiveBufSizeMu.Unlock() + return so.receiveBufferSize } -// SetReceiveBufferSize sets value for SO_RCVBUF option. +// SetReceiveBufferSize sets the value of the SO_RCVBUF option, optionally +// notifying the owning endpoint. func (so *SocketOptions) SetReceiveBufferSize(receiveBufferSize int64, notify bool) { - if !notify { - so.receiveBufferSize.Store(receiveBufferSize) - return - } - - // Make sure the send buffer size is within the min and max - // allowed. - v := receiveBufferSize - ss := so.getReceiveBufferLimits(so.stackHandler) - min := int64(ss.Min) - max := int64(ss.Max) - // Validate the send buffer size with min and max values. - if v > max { - v = max - } - - // Multiply it by factor of 2. - if v < math.MaxInt32/PacketOverheadFactor { - v *= PacketOverheadFactor - if v < min { - v = min - } - } else { - v = math.MaxInt32 + so.receiveBufSizeMu.Lock() + defer so.receiveBufSizeMu.Unlock() + if notify { + receiveBufferSize = so.handler.OnSetReceiveBufferSize(receiveBufferSize, so.receiveBufferSize) } - - oldSz := so.receiveBufferSize.Load() - // Notify endpoint about change in buffer size. - newSz := so.handler.OnSetReceiveBufferSize(v, oldSz) - so.receiveBufferSize.Store(newSz) + so.receiveBufferSize = receiveBufferSize } diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index ca2250ad6..4d5431da1 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -5356,8 +5356,9 @@ func TestRouterSolicitation(t *testing.T) { RandSource: &randSource, }) - if err := s.CreateNIC(nicID, &e); err != nil { - t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + opts := stack.NICOptions{Disabled: true} + if err := s.CreateNICWithOptions(nicID, &e, opts); err != nil { + t.Fatalf("CreateNICWithOptions(%d, _, %#v) = %s", nicID, opts, err) } if addr := test.nicAddr; addr != "" { @@ -5366,6 +5367,10 @@ func TestRouterSolicitation(t *testing.T) { } } + if err := s.EnableNIC(nicID); err != nil { + t.Fatalf("EnableNIC(%d): %s", nicID, err) + } + // Make sure each RS is sent at the right time. remaining := test.maxRtrSolicit if remaining != 0 { diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 8f2658f64..55683b4fb 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -1845,6 +1845,10 @@ type TCPStats struct { // FailedPortReservations is the number of times TCP failed to reserve // a port. FailedPortReservations *StatCounter + + // SegmentsAckedWithDSACK is the number of segments acknowledged with + // DSACK. + SegmentsAckedWithDSACK *StatCounter } // UDPStats collects UDP-specific stats. diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index b6687911a..b3d8951ff 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -132,7 +132,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt // headers included. Because they're write-only, We don't need to // register with the stack. if !associated { - e.ops.SetReceiveBufferSize(0, false) + e.ops.SetReceiveBufferSize(0, false /* notify */) e.waiterQueue = nil return e, nil } @@ -455,8 +455,21 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error { } // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress. -func (*endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { - return tcpip.FullAddress{}, &tcpip.ErrNotSupported{} +func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) { + e.mu.RLock() + defer e.mu.RUnlock() + + addr := e.BindAddr + if e.connected { + addr = e.route.LocalAddress() + } + + return tcpip.FullAddress{ + NIC: e.RegisterNICID, + Addr: addr, + // Linux returns the protocol in the port field. + Port: uint16(e.TransProto), + }, nil } // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress. diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 1ed4ba419..044123185 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -478,7 +478,7 @@ type endpoint struct { // shutdownFlags represent the current shutdown state of the endpoint. shutdownFlags tcpip.ShutdownFlags - // tcpRecovery is the loss deteoction algorithm used by TCP. + // tcpRecovery is the loss recovery algorithm used by TCP. tcpRecovery tcpip.TCPRecovery // sack holds TCP SACK related information for this endpoint. @@ -754,7 +754,7 @@ func (e *endpoint) ResumeWork() { // // Precondition: e.mu must be held to call this method. func (e *endpoint) setEndpointState(state EndpointState) { - oldstate := EndpointState(atomic.LoadUint32(&e.state)) + oldstate := EndpointState(atomic.SwapUint32(&e.state, uint32(state))) switch state { case StateEstablished: e.stack.Stats().TCP.CurrentEstablished.Increment() @@ -771,7 +771,6 @@ func (e *endpoint) setEndpointState(state EndpointState) { e.stack.Stats().TCP.CurrentEstablished.Decrement() } } - atomic.StoreUint32(&e.state, uint32(state)) } // EndpointState returns the current state of the endpoint. @@ -870,8 +869,6 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue e.maxSynRetries = uint8(synRetries) } - s.TransportProtocolOption(ProtocolNumber, &e.tcpRecovery) - if p := s.GetTCPProbe(); p != nil { e.probe = p } @@ -1530,6 +1527,12 @@ func (e *endpoint) queueSegment(p tcpip.Payloader, opts tcpip.WriteOptions) (*se if err != nil { return nil, 0, err } + + // Do not queue zero length segments. + if len(v) == 0 { + return nil, 0, nil + } + if !opts.Atomic { // Since we released locks in between it's possible that the // endpoint transitioned to a CLOSED/ERROR states so make @@ -2917,6 +2920,7 @@ func (e *endpoint) maybeEnableSACKPermitted(synOpts *header.TCPSynOptions) { } if bool(v) && synOpts.SACKPermitted { e.SACKPermitted = true + e.stack.TransportProtocolOption(ProtocolNumber, &e.tcpRecovery) } } diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 2fc282e73..18b834243 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -478,8 +478,7 @@ func NewProtocol(s *stack.Stack) stack.TransportProtocol { minRTO: MinRTO, maxRTO: MaxRTO, maxRetries: MaxRetries, - // TODO(gvisor.dev/issue/5243): Set recovery to tcpip.TCPRACKLossDetection. - recovery: 0, + recovery: tcpip.TCPRACKLossDetection, } p.dispatcher.init(s.Rand(), runtime.GOMAXPROCS(0)) return &p diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 72d58dcff..92a66f17e 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -1154,6 +1154,13 @@ func (s *sender) walkSACK(rcvdSeg *segment) { idx := 0 n := len(rcvdSeg.parsedOptions.SACKBlocks) if checkDSACK(rcvdSeg) { + dsackBlock := rcvdSeg.parsedOptions.SACKBlocks[0] + numDSACK := uint64(dsackBlock.End-dsackBlock.Start) / uint64(s.MaxPayloadSize) + // numDSACK can be zero when DSACK is sent for subsegments. + if numDSACK < 1 { + numDSACK = 1 + } + s.ep.stack.Stats().TCP.SegmentsAckedWithDSACK.IncrementBy(numDSACK) s.rc.setDSACKSeen(true) idx = 1 n-- diff --git a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go index ced3a9c58..84fb1c416 100644 --- a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go +++ b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go @@ -16,6 +16,7 @@ // iterations taking long enough that the retransmit timer can kick in causing // the congestion window measurements to fail due to extra packets etc. // +//go:build !race // +build !race package tcp_test diff --git a/pkg/tcpip/transport/tcp/tcp_rack_test.go b/pkg/tcpip/transport/tcp/tcp_rack_test.go index d6cf786a1..89e9fb886 100644 --- a/pkg/tcpip/transport/tcp/tcp_rack_test.go +++ b/pkg/tcpip/transport/tcp/tcp_rack_test.go @@ -36,9 +36,9 @@ const ( latency = 5 * time.Millisecond ) -func setStackRACKPermitted(t *testing.T, c *context.Context) { +func setStackTCPRecovery(t *testing.T, c *context.Context, recovery int) { t.Helper() - opt := tcpip.TCPRACKLossDetection + opt := tcpip.TCPRecovery(recovery) if err := c.Stack().SetTransportProtocolOption(header.TCPProtocolNumber, &opt); err != nil { t.Fatalf("c.s.SetTransportProtocolOption(%d, &%v(%v)): %s", header.TCPProtocolNumber, opt, opt, err) } @@ -70,7 +70,6 @@ func TestRACKUpdate(t *testing.T) { close(probeDone) }) setStackSACKPermitted(t, c, true) - setStackRACKPermitted(t, c) createConnectedWithSACKAndTS(c) data := make([]byte, maxPayload) @@ -129,7 +128,6 @@ func TestRACKDetectReorder(t *testing.T) { close(probeDone) }) setStackSACKPermitted(t, c, true) - setStackRACKPermitted(t, c) createConnectedWithSACKAndTS(c) data := make([]byte, ackNumToVerify*maxPayload) for i := range data { @@ -162,8 +160,8 @@ func TestRACKDetectReorder(t *testing.T) { func sendAndReceiveWithSACK(t *testing.T, c *context.Context, numPackets int, enableRACK bool) []byte { setStackSACKPermitted(t, c, true) - if enableRACK { - setStackRACKPermitted(t, c) + if !enableRACK { + setStackTCPRecovery(t, c, 0) } createConnectedWithSACKAndTS(c) @@ -542,6 +540,28 @@ func TestRACKDetectDSACK(t *testing.T) { case invalidDSACKDetected: t.Fatalf("RACK DSACK detected when there is no duplicate SACK") } + + metricPollFn := func() error { + tcpStats := c.Stack().Stats().TCP + stats := []struct { + stat *tcpip.StatCounter + name string + want uint64 + }{ + // Check DSACK was received for one segment. + {tcpStats.SegmentsAckedWithDSACK, "stats.TCP.SegmentsAckedWithDSACK", 1}, + } + for _, s := range stats { + if got, want := s.stat.Value(), s.want; got != want { + return fmt.Errorf("got %s.Value() = %d, want = %d", s.name, got, want) + } + } + return nil + } + + if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil { + t.Error(err) + } } // TestRACKDetectDSACKWithOutOfOrder tests that RACK detects DSACK with out of @@ -682,6 +702,28 @@ func TestRACKDetectDSACKSingleDup(t *testing.T) { case invalidDSACKDetected: t.Fatalf("RACK DSACK detected when there is no duplicate SACK") } + + metricPollFn := func() error { + tcpStats := c.Stack().Stats().TCP + stats := []struct { + stat *tcpip.StatCounter + name string + want uint64 + }{ + // Check DSACK was received for a subsegment. + {tcpStats.SegmentsAckedWithDSACK, "stats.TCP.SegmentsAckedWithDSACK", 1}, + } + for _, s := range stats { + if got, want := s.stat.Value(), s.want; got != want { + return fmt.Errorf("got %s.Value() = %d, want = %d", s.name, got, want) + } + } + return nil + } + + if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil { + t.Error(err) + } } // TestRACKDetectDSACKDupWithCumulativeACK tests DSACK for two non-contiguous @@ -998,7 +1040,6 @@ func TestRACKWithWindowFull(t *testing.T) { defer c.Cleanup() setStackSACKPermitted(t, c, true) - setStackRACKPermitted(t, c) createConnectedWithSACKAndTS(c) seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1) diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go index 20c9761f2..83e0653b9 100644 --- a/pkg/tcpip/transport/tcp/tcp_sack_test.go +++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go @@ -61,6 +61,7 @@ func TestSackPermittedConnect(t *testing.T) { defer c.Cleanup() setStackSACKPermitted(t, c, sackEnabled) + setStackTCPRecovery(t, c, 0) rep := createConnectedWithSACKPermittedOption(c) data := []byte{1, 2, 3} @@ -105,6 +106,7 @@ func TestSackDisabledConnect(t *testing.T) { defer c.Cleanup() setStackSACKPermitted(t, c, sackEnabled) + setStackTCPRecovery(t, c, 0) rep := c.CreateConnectedWithOptions(header.TCPSynOptions{}) @@ -166,6 +168,7 @@ func TestSackPermittedAccept(t *testing.T) { } } setStackSACKPermitted(t, c, sackEnabled) + setStackTCPRecovery(t, c, 0) rep := c.AcceptWithOptions(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS, SACKPermitted: tc.sackPermitted}) // Now verify no SACK blocks are @@ -239,6 +242,7 @@ func TestSackDisabledAccept(t *testing.T) { } setStackSACKPermitted(t, c, sackEnabled) + setStackTCPRecovery(t, c, 0) rep := c.AcceptWithOptions(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS}) @@ -386,6 +390,7 @@ func TestSACKRecovery(t *testing.T) { log.Printf("state: %+v\n", s) }) setStackSACKPermitted(t, c, true) + setStackTCPRecovery(t, c, 0) createConnectedWithSACKAndTS(c) const iterations = 3 diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 71c4aa85d..031f01357 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -2147,7 +2147,7 @@ func TestSmallSegReceiveWindowAdvertisement(t *testing.T) { // Bump up the receive buffer size such that, when the receive window grows, // the scaled window exceeds maxUint16. - c.EP.SocketOptions().SetReceiveBufferSize(int64(opt.Max), true) + c.EP.SocketOptions().SetReceiveBufferSize(int64(opt.Max)*2, true /* notify */) // Keep the payload size < segment overhead and such that it is a multiple // of the window scaled value. This enables the test to perform equality @@ -2267,7 +2267,7 @@ func TestNoWindowShrinking(t *testing.T) { initialWnd := header.TCP(header.IPv4(pkt).Payload()).WindowSize() << c.RcvdWindowScale initialLastAcceptableSeq := iss.Add(seqnum.Size(initialWnd)) // Now shrink the receive buffer to half its original size. - c.EP.SocketOptions().SetReceiveBufferSize(int64(rcvBufSize/2), true) + c.EP.SocketOptions().SetReceiveBufferSize(int64(rcvBufSize), true /* notify */) data := generateRandomPayload(t, rcvBufSize) // Send a payload of half the size of rcvBufSize. @@ -2523,7 +2523,7 @@ func TestScaledWindowAccept(t *testing.T) { defer ep.Close() // Set the window size greater than the maximum non-scaled window. - ep.SocketOptions().SetReceiveBufferSize(65535*3, true) + ep.SocketOptions().SetReceiveBufferSize(65535*6, true /* notify */) if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) @@ -2595,7 +2595,7 @@ func TestNonScaledWindowAccept(t *testing.T) { defer ep.Close() // Set the window size greater than the maximum non-scaled window. - ep.SocketOptions().SetReceiveBufferSize(65535*3, true) + ep.SocketOptions().SetReceiveBufferSize(65535*6, true /* notify */) if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) @@ -3188,7 +3188,7 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) { // Set the buffer size to a deterministic size so that we can check the // window scaling option. const rcvBufferSize = 0x20000 - ep.SocketOptions().SetReceiveBufferSize(rcvBufferSize, true) + ep.SocketOptions().SetReceiveBufferSize(rcvBufferSize*2, true /* notify */) if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) @@ -3327,7 +3327,7 @@ func TestSynOptionsOnActiveConnect(t *testing.T) { // window scaling option. const rcvBufferSize = 0x20000 const wndScale = 3 - c.EP.SocketOptions().SetReceiveBufferSize(rcvBufferSize, true) + c.EP.SocketOptions().SetReceiveBufferSize(rcvBufferSize*2, true /* notify */) // Start connection attempt. we, ch := waiter.NewChannelEntry(nil) @@ -3624,6 +3624,38 @@ func TestMaxRTO(t *testing.T) { } } +// TestZeroSizedWriteRetransmit tests that a zero sized write should not +// result in a panic on an RTO as no segment should have been queued for +// a zero sized write. +func TestZeroSizedWriteRetransmit(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + c.CreateConnected(context.TestInitialSequenceNumber, 30000 /* rcvWnd */, -1 /* epRcvBuf */) + + var r bytes.Reader + _, err := c.EP.Write(&r, tcpip.WriteOptions{}) + if err != nil { + t.Fatalf("Write failed: %s", err) + } + // Now do a non-zero sized write to trigger actual sending of data. + r.Reset(make([]byte, 1)) + _, err = c.EP.Write(&r, tcpip.WriteOptions{}) + if err != nil { + t.Fatalf("Write failed: %s", err) + } + // Do not ACK the packet and expect an original transmit and a + // retransmit. This should not cause a panic. + for i := 0; i < 2; i++ { + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlagsMatch(header.TCPFlagAck, ^header.TCPFlagPsh), + ), + ) + } +} + // TestRetransmitIPv4IDUniqueness tests that the IPv4 Identification field is // unique on retransmits. func TestRetransmitIPv4IDUniqueness(t *testing.T) { @@ -4637,52 +4669,6 @@ func TestDefaultBufferSizes(t *testing.T) { checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*3) } -func TestMinMaxBufferSizes(t *testing.T) { - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol}, - TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol}, - }) - - // Check the default values. - ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{}) - if err != nil { - t.Fatalf("NewEndpoint failed; %s", err) - } - defer ep.Close() - - // Change the min/max values for send/receive - { - opt := tcpip.TCPReceiveBufferSizeRangeOption{Min: 200, Default: tcp.DefaultReceiveBufferSize * 2, Max: tcp.DefaultReceiveBufferSize * 20} - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err) - } - } - - { - opt := tcpip.TCPSendBufferSizeRangeOption{Min: 300, Default: tcp.DefaultSendBufferSize * 3, Max: tcp.DefaultSendBufferSize * 30} - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil { - t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err) - } - } - - // Set values below the min/2. - ep.SocketOptions().SetReceiveBufferSize(99, true) - checkRecvBufferSize(t, ep, 200) - - ep.SocketOptions().SetSendBufferSize(149, true) - - checkSendBufferSize(t, ep, 300) - - // Set values above the max. - ep.SocketOptions().SetReceiveBufferSize(1+tcp.DefaultReceiveBufferSize*20, true) - // Values above max are capped at max and then doubled. - checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*20*2) - - ep.SocketOptions().SetSendBufferSize(1+tcp.DefaultSendBufferSize*30, true) - // Values above max are capped at max and then doubled. - checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*30*2) -} - func TestBindToDeviceOption(t *testing.T) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol}, @@ -7720,7 +7706,7 @@ func TestIncreaseWindowOnBufferResize(t *testing.T) { // Increasing the buffer from should generate an ACK, // since window grew from small value to larger equal MSS - c.EP.SocketOptions().SetReceiveBufferSize(rcvBuf*2, true) + c.EP.SocketOptions().SetReceiveBufferSize(rcvBuf*4, true /* notify */) checker.IPv4(t, c.GetPacket(), checker.PayloadLen(header.TCPMinimumSize), checker.TCP( diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index 53efecc5a..96e4849d2 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -757,7 +757,7 @@ func (c *Context) Create(epRcvBuf int) { } if epRcvBuf != -1 { - c.EP.SocketOptions().SetReceiveBufferSize(int64(epRcvBuf), true /* notify */) + c.EP.SocketOptions().SetReceiveBufferSize(int64(epRcvBuf)*2, true /* notify */) } } |