summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip')
-rw-r--r--pkg/tcpip/link/fdbased/endpoint.go1
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_test.go1
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_unsafe.go1
-rw-r--r--pkg/tcpip/link/fdbased/mmap.go1
-rw-r--r--pkg/tcpip/link/fdbased/mmap_stub.go1
-rw-r--r--pkg/tcpip/link/fdbased/mmap_unsafe.go1
-rw-r--r--pkg/tcpip/link/fdbased/packet_dispatchers.go1
-rw-r--r--pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go1
-rw-r--r--pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go1
-rw-r--r--pkg/tcpip/link/rawfile/errors.go1
-rw-r--r--pkg/tcpip/link/rawfile/errors_test.go1
-rw-r--r--pkg/tcpip/link/rawfile/rawfile_unsafe.go1
-rw-r--r--pkg/tcpip/link/sharedmem/rx.go1
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem.go1
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem_test.go1
-rw-r--r--pkg/tcpip/link/sniffer/pcap.go2
-rw-r--r--pkg/tcpip/link/sniffer/sniffer.go33
-rw-r--r--pkg/tcpip/link/tun/device.go2
-rw-r--r--pkg/tcpip/link/tun/tun_unsafe.go1
-rw-r--r--pkg/tcpip/sample/tun_tcp_connect/main.go1
-rw-r--r--pkg/tcpip/sample/tun_tcp_echo/main.go1
-rw-r--r--pkg/tcpip/socketops.go111
-rw-r--r--pkg/tcpip/stack/ndp_test.go9
-rw-r--r--pkg/tcpip/tcpip.go4
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go19
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go14
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go3
-rw-r--r--pkg/tcpip/transport/tcp/snd.go7
-rw-r--r--pkg/tcpip/transport/tcp/tcp_noracedetector_test.go1
-rw-r--r--pkg/tcpip/transport/tcp/tcp_rack_test.go55
-rw-r--r--pkg/tcpip/transport/tcp/tcp_sack_test.go5
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go92
-rw-r--r--pkg/tcpip/transport/tcp/testing/context/context.go2
33 files changed, 217 insertions, 160 deletions
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index 1b56d2b72..e8e716db0 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
// Package fdbased provides the implemention of data-link layer endpoints
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index 8aad338b6..eccd21579 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
package fdbased
diff --git a/pkg/tcpip/link/fdbased/endpoint_unsafe.go b/pkg/tcpip/link/fdbased/endpoint_unsafe.go
index df14eaad1..904393faa 100644
--- a/pkg/tcpip/link/fdbased/endpoint_unsafe.go
+++ b/pkg/tcpip/link/fdbased/endpoint_unsafe.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
package fdbased
diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go
index 5d698a5e9..bfae34ab9 100644
--- a/pkg/tcpip/link/fdbased/mmap.go
+++ b/pkg/tcpip/link/fdbased/mmap.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build (linux && amd64) || (linux && arm64)
// +build linux,amd64 linux,arm64
package fdbased
diff --git a/pkg/tcpip/link/fdbased/mmap_stub.go b/pkg/tcpip/link/fdbased/mmap_stub.go
index 67be52d67..9d8679502 100644
--- a/pkg/tcpip/link/fdbased/mmap_stub.go
+++ b/pkg/tcpip/link/fdbased/mmap_stub.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build !linux || (!amd64 && !arm64)
// +build !linux !amd64,!arm64
package fdbased
diff --git a/pkg/tcpip/link/fdbased/mmap_unsafe.go b/pkg/tcpip/link/fdbased/mmap_unsafe.go
index 1293f68a2..58d5dfeef 100644
--- a/pkg/tcpip/link/fdbased/mmap_unsafe.go
+++ b/pkg/tcpip/link/fdbased/mmap_unsafe.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build (linux && amd64) || (linux && arm64)
// +build linux,amd64 linux,arm64
package fdbased
diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go
index 4b7ef3aac..ab2855a63 100644
--- a/pkg/tcpip/link/fdbased/packet_dispatchers.go
+++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
package fdbased
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go
index 2206fe0e6..c1438da21 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go
+++ b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux && !amd64 && !arm64
// +build linux,!amd64,!arm64
package rawfile
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
index 5002245a1..da900c24b 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
+++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build ((linux && amd64) || (linux && arm64)) && go1.12 && !go1.18
// +build linux,amd64 linux,arm64
// +build go1.12
// +build !go1.18
diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go
index 9743e70ea..7e21a78d4 100644
--- a/pkg/tcpip/link/rawfile/errors.go
+++ b/pkg/tcpip/link/rawfile/errors.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
package rawfile
diff --git a/pkg/tcpip/link/rawfile/errors_test.go b/pkg/tcpip/link/rawfile/errors_test.go
index 8f4bd60da..1b88c309b 100644
--- a/pkg/tcpip/link/rawfile/errors_test.go
+++ b/pkg/tcpip/link/rawfile/errors_test.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
package rawfile
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 43fe57830..53448a641 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
// Package rawfile contains utilities for using the netstack with raw host
diff --git a/pkg/tcpip/link/sharedmem/rx.go b/pkg/tcpip/link/sharedmem/rx.go
index 8e6f3e5e3..e882a128c 100644
--- a/pkg/tcpip/link/sharedmem/rx.go
+++ b/pkg/tcpip/link/sharedmem/rx.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
package sharedmem
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index df9a0b90a..30cf659b8 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
// Package sharedmem provides the implemention of data-link layer endpoints
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 0f72d4e95..d6d953085 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
package sharedmem
diff --git a/pkg/tcpip/link/sniffer/pcap.go b/pkg/tcpip/link/sniffer/pcap.go
index 45475dcf1..3bb864ed2 100644
--- a/pkg/tcpip/link/sniffer/pcap.go
+++ b/pkg/tcpip/link/sniffer/pcap.go
@@ -39,8 +39,6 @@ type pcapHeader struct {
Network uint32
}
-const pcapPacketHeaderLen = 16
-
type pcapPacketHeader struct {
// Seconds is the timestamp seconds.
Seconds uint32
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index 232a26a18..3df826f3c 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -113,8 +113,9 @@ func writePCAPHeader(w io.Writer, maxLen uint32) error {
// NewWithWriter creates a new sniffer link-layer endpoint. It wraps around
// another endpoint and logs packets as they traverse the endpoint.
//
-// Packets are logged to writer in the pcap format. A sniffer created with this
-// function will not emit packets using the standard log package.
+// Each packet is written to writer in the pcap format in a single Write call
+// without synchronization. A sniffer created with this function will not emit
+// packets using the standard log package.
//
// snapLen is the maximum amount of a packet to be saved. Packets with a length
// less than or equal to snapLen will be saved in their entirety. Longer
@@ -155,27 +156,29 @@ func (e *endpoint) dumpPacket(dir direction, protocol tcpip.NetworkProtocolNumbe
if max := int(e.maxPCAPLen); length > max {
length = max
}
- if err := binary.Write(writer, binary.BigEndian, newPCAPPacketHeader(time.Now(), uint32(length), uint32(totalLength))); err != nil {
- panic(err)
- }
- write := func(b []byte) {
- if len(b) > length {
- b = b[:length]
+ packetHeader := newPCAPPacketHeader(time.Now(), uint32(length), uint32(totalLength))
+ packet := make([]byte, binary.Size(packetHeader)+length)
+ {
+ writer := tcpip.SliceWriter(packet)
+ if err := binary.Write(&writer, binary.BigEndian, packetHeader); err != nil {
+ panic(err)
}
- for len(b) != 0 {
+ for _, b := range pkt.Views() {
+ if length == 0 {
+ break
+ }
+ if len(b) > length {
+ b = b[:length]
+ }
n, err := writer.Write(b)
if err != nil {
panic(err)
}
- b = b[n:]
length -= n
}
}
- for _, v := range pkt.Views() {
- if length == 0 {
- break
- }
- write(v)
+ if _, err := writer.Write(packet); err != nil {
+ panic(err)
}
}
}
diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go
index f3444e8b5..d23210503 100644
--- a/pkg/tcpip/link/tun/device.go
+++ b/pkg/tcpip/link/tun/device.go
@@ -126,7 +126,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE
endpoint, ok := linkEP.(*tunEndpoint)
if !ok {
// Not a NIC created by tun device.
- return nil, syserror.EOPNOTSUPP
+ return nil, linuxerr.EOPNOTSUPP
}
if !endpoint.TryIncRef() {
// Race detected: NIC got deleted in between.
diff --git a/pkg/tcpip/link/tun/tun_unsafe.go b/pkg/tcpip/link/tun/tun_unsafe.go
index 0591fbd63..db4338e79 100644
--- a/pkg/tcpip/link/tun/tun_unsafe.go
+++ b/pkg/tcpip/link/tun/tun_unsafe.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
// Package tun contains methods to open TAP and TUN devices.
diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go
index b9a24ff56..009cab643 100644
--- a/pkg/tcpip/sample/tun_tcp_connect/main.go
+++ b/pkg/tcpip/sample/tun_tcp_connect/main.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
// This sample creates a stack with TCP and IPv4 protocols on top of a TUN
diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go
index ef1bfc186..c10b19aa0 100644
--- a/pkg/tcpip/sample/tun_tcp_echo/main.go
+++ b/pkg/tcpip/sample/tun_tcp_echo/main.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build linux
// +build linux
// This sample creates a stack with TCP and IPv4 protocols on top of a TUN
diff --git a/pkg/tcpip/socketops.go b/pkg/tcpip/socketops.go
index 0ea85f9ed..5642c86f8 100644
--- a/pkg/tcpip/socketops.go
+++ b/pkg/tcpip/socketops.go
@@ -15,17 +15,11 @@
package tcpip
import (
- "math"
"sync/atomic"
- "gvisor.dev/gvisor/pkg/atomicbitops"
"gvisor.dev/gvisor/pkg/sync"
)
-// PacketOverheadFactor is used to multiply the value provided by the user on a
-// SetSockOpt for setting the send/receive buffer sizes sockets.
-const PacketOverheadFactor = 2
-
// SocketOptionsHandler holds methods that help define endpoint specific
// behavior for socket level socket options. These must be implemented by
// endpoints to get notified when socket level options are set.
@@ -60,7 +54,7 @@ type SocketOptionsHandler interface {
// buffer size. It also returns the newly set value.
OnSetSendBufferSize(v int64) (newSz int64)
- // OnSetReceiveBufferSize is invoked to set the SO_RCVBUFSIZE.
+ // OnSetReceiveBufferSize is invoked by SO_RCVBUF and SO_RCVBUFFORCE.
OnSetReceiveBufferSize(v, oldSz int64) (newSz int64)
}
@@ -213,16 +207,24 @@ type SocketOptions struct {
// will not change.
getSendBufferLimits GetSendBufferLimits `state:"manual"`
+ // sendBufSizeMu protects sendBufferSize and calls to
+ // handler.OnSetSendBufferSize.
+ sendBufSizeMu sync.Mutex `state:"nosave"`
+
// sendBufferSize determines the send buffer size for this socket.
- sendBufferSize atomicbitops.AlignedAtomicInt64
+ sendBufferSize int64
// getReceiveBufferLimits provides the handler to get the min, default and
// max size for receive buffer. It is initialized at the creation time and
// will not change.
getReceiveBufferLimits GetReceiveBufferLimits `state:"manual"`
+ // receiveBufSizeMu protects receiveBufferSize and calls to
+ // handler.OnSetReceiveBufferSize.
+ receiveBufSizeMu sync.Mutex `state:"nosave"`
+
// receiveBufferSize determines the receive buffer size for this socket.
- receiveBufferSize atomicbitops.AlignedAtomicInt64
+ receiveBufferSize int64
// mu protects the access to the below fields.
mu sync.Mutex `state:"nosave"`
@@ -612,81 +614,52 @@ func (so *SocketOptions) SetBindToDevice(bindToDevice int32) Error {
return nil
}
+// SendBufferLimits returns the [min, max) range of allowable send buffer
+// sizes.
+func (so *SocketOptions) SendBufferLimits() (min, max int64) {
+ limits := so.getSendBufferLimits(so.stackHandler)
+ return int64(limits.Min), int64(limits.Max)
+}
+
// GetSendBufferSize gets value for SO_SNDBUF option.
func (so *SocketOptions) GetSendBufferSize() int64 {
- return so.sendBufferSize.Load()
+ so.sendBufSizeMu.Lock()
+ defer so.sendBufSizeMu.Unlock()
+ return so.sendBufferSize
}
// SetSendBufferSize sets value for SO_SNDBUF option. notify indicates if the
// stack handler should be invoked to set the send buffer size.
func (so *SocketOptions) SetSendBufferSize(sendBufferSize int64, notify bool) {
- v := sendBufferSize
-
- if !notify {
- so.sendBufferSize.Store(v)
- return
- }
-
- // Make sure the send buffer size is within the min and max
- // allowed.
- ss := so.getSendBufferLimits(so.stackHandler)
- min := int64(ss.Min)
- max := int64(ss.Max)
- // Validate the send buffer size with min and max values.
- // Multiply it by factor of 2.
- if v > max {
- v = max
- }
-
- if v < math.MaxInt32/PacketOverheadFactor {
- v *= PacketOverheadFactor
- if v < min {
- v = min
- }
- } else {
- v = math.MaxInt32
+ so.sendBufSizeMu.Lock()
+ defer so.sendBufSizeMu.Unlock()
+ if notify {
+ sendBufferSize = so.handler.OnSetSendBufferSize(sendBufferSize)
}
+ so.sendBufferSize = sendBufferSize
+}
- // Notify endpoint about change in buffer size.
- newSz := so.handler.OnSetSendBufferSize(v)
- so.sendBufferSize.Store(newSz)
+// ReceiveBufferLimits returns the [min, max) range of allowable receive buffer
+// sizes.
+func (so *SocketOptions) ReceiveBufferLimits() (min, max int64) {
+ limits := so.getReceiveBufferLimits(so.stackHandler)
+ return int64(limits.Min), int64(limits.Max)
}
// GetReceiveBufferSize gets value for SO_RCVBUF option.
func (so *SocketOptions) GetReceiveBufferSize() int64 {
- return so.receiveBufferSize.Load()
+ so.receiveBufSizeMu.Lock()
+ defer so.receiveBufSizeMu.Unlock()
+ return so.receiveBufferSize
}
-// SetReceiveBufferSize sets value for SO_RCVBUF option.
+// SetReceiveBufferSize sets the value of the SO_RCVBUF option, optionally
+// notifying the owning endpoint.
func (so *SocketOptions) SetReceiveBufferSize(receiveBufferSize int64, notify bool) {
- if !notify {
- so.receiveBufferSize.Store(receiveBufferSize)
- return
- }
-
- // Make sure the send buffer size is within the min and max
- // allowed.
- v := receiveBufferSize
- ss := so.getReceiveBufferLimits(so.stackHandler)
- min := int64(ss.Min)
- max := int64(ss.Max)
- // Validate the send buffer size with min and max values.
- if v > max {
- v = max
- }
-
- // Multiply it by factor of 2.
- if v < math.MaxInt32/PacketOverheadFactor {
- v *= PacketOverheadFactor
- if v < min {
- v = min
- }
- } else {
- v = math.MaxInt32
+ so.receiveBufSizeMu.Lock()
+ defer so.receiveBufSizeMu.Unlock()
+ if notify {
+ receiveBufferSize = so.handler.OnSetReceiveBufferSize(receiveBufferSize, so.receiveBufferSize)
}
-
- oldSz := so.receiveBufferSize.Load()
- // Notify endpoint about change in buffer size.
- newSz := so.handler.OnSetReceiveBufferSize(v, oldSz)
- so.receiveBufferSize.Store(newSz)
+ so.receiveBufferSize = receiveBufferSize
}
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index ca2250ad6..4d5431da1 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -5356,8 +5356,9 @@ func TestRouterSolicitation(t *testing.T) {
RandSource: &randSource,
})
- if err := s.CreateNIC(nicID, &e); err != nil {
- t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+ opts := stack.NICOptions{Disabled: true}
+ if err := s.CreateNICWithOptions(nicID, &e, opts); err != nil {
+ t.Fatalf("CreateNICWithOptions(%d, _, %#v) = %s", nicID, opts, err)
}
if addr := test.nicAddr; addr != "" {
@@ -5366,6 +5367,10 @@ func TestRouterSolicitation(t *testing.T) {
}
}
+ if err := s.EnableNIC(nicID); err != nil {
+ t.Fatalf("EnableNIC(%d): %s", nicID, err)
+ }
+
// Make sure each RS is sent at the right time.
remaining := test.maxRtrSolicit
if remaining != 0 {
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 8f2658f64..55683b4fb 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -1845,6 +1845,10 @@ type TCPStats struct {
// FailedPortReservations is the number of times TCP failed to reserve
// a port.
FailedPortReservations *StatCounter
+
+ // SegmentsAckedWithDSACK is the number of segments acknowledged with
+ // DSACK.
+ SegmentsAckedWithDSACK *StatCounter
}
// UDPStats collects UDP-specific stats.
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index b6687911a..b3d8951ff 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -132,7 +132,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt
// headers included. Because they're write-only, We don't need to
// register with the stack.
if !associated {
- e.ops.SetReceiveBufferSize(0, false)
+ e.ops.SetReceiveBufferSize(0, false /* notify */)
e.waiterQueue = nil
return e, nil
}
@@ -455,8 +455,21 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error {
}
// GetLocalAddress implements tcpip.Endpoint.GetLocalAddress.
-func (*endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
- return tcpip.FullAddress{}, &tcpip.ErrNotSupported{}
+func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
+ e.mu.RLock()
+ defer e.mu.RUnlock()
+
+ addr := e.BindAddr
+ if e.connected {
+ addr = e.route.LocalAddress()
+ }
+
+ return tcpip.FullAddress{
+ NIC: e.RegisterNICID,
+ Addr: addr,
+ // Linux returns the protocol in the port field.
+ Port: uint16(e.TransProto),
+ }, nil
}
// GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress.
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 1ed4ba419..044123185 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -478,7 +478,7 @@ type endpoint struct {
// shutdownFlags represent the current shutdown state of the endpoint.
shutdownFlags tcpip.ShutdownFlags
- // tcpRecovery is the loss deteoction algorithm used by TCP.
+ // tcpRecovery is the loss recovery algorithm used by TCP.
tcpRecovery tcpip.TCPRecovery
// sack holds TCP SACK related information for this endpoint.
@@ -754,7 +754,7 @@ func (e *endpoint) ResumeWork() {
//
// Precondition: e.mu must be held to call this method.
func (e *endpoint) setEndpointState(state EndpointState) {
- oldstate := EndpointState(atomic.LoadUint32(&e.state))
+ oldstate := EndpointState(atomic.SwapUint32(&e.state, uint32(state)))
switch state {
case StateEstablished:
e.stack.Stats().TCP.CurrentEstablished.Increment()
@@ -771,7 +771,6 @@ func (e *endpoint) setEndpointState(state EndpointState) {
e.stack.Stats().TCP.CurrentEstablished.Decrement()
}
}
- atomic.StoreUint32(&e.state, uint32(state))
}
// EndpointState returns the current state of the endpoint.
@@ -870,8 +869,6 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
e.maxSynRetries = uint8(synRetries)
}
- s.TransportProtocolOption(ProtocolNumber, &e.tcpRecovery)
-
if p := s.GetTCPProbe(); p != nil {
e.probe = p
}
@@ -1530,6 +1527,12 @@ func (e *endpoint) queueSegment(p tcpip.Payloader, opts tcpip.WriteOptions) (*se
if err != nil {
return nil, 0, err
}
+
+ // Do not queue zero length segments.
+ if len(v) == 0 {
+ return nil, 0, nil
+ }
+
if !opts.Atomic {
// Since we released locks in between it's possible that the
// endpoint transitioned to a CLOSED/ERROR states so make
@@ -2917,6 +2920,7 @@ func (e *endpoint) maybeEnableSACKPermitted(synOpts *header.TCPSynOptions) {
}
if bool(v) && synOpts.SACKPermitted {
e.SACKPermitted = true
+ e.stack.TransportProtocolOption(ProtocolNumber, &e.tcpRecovery)
}
}
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 2fc282e73..18b834243 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -478,8 +478,7 @@ func NewProtocol(s *stack.Stack) stack.TransportProtocol {
minRTO: MinRTO,
maxRTO: MaxRTO,
maxRetries: MaxRetries,
- // TODO(gvisor.dev/issue/5243): Set recovery to tcpip.TCPRACKLossDetection.
- recovery: 0,
+ recovery: tcpip.TCPRACKLossDetection,
}
p.dispatcher.init(s.Rand(), runtime.GOMAXPROCS(0))
return &p
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 72d58dcff..92a66f17e 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -1154,6 +1154,13 @@ func (s *sender) walkSACK(rcvdSeg *segment) {
idx := 0
n := len(rcvdSeg.parsedOptions.SACKBlocks)
if checkDSACK(rcvdSeg) {
+ dsackBlock := rcvdSeg.parsedOptions.SACKBlocks[0]
+ numDSACK := uint64(dsackBlock.End-dsackBlock.Start) / uint64(s.MaxPayloadSize)
+ // numDSACK can be zero when DSACK is sent for subsegments.
+ if numDSACK < 1 {
+ numDSACK = 1
+ }
+ s.ep.stack.Stats().TCP.SegmentsAckedWithDSACK.IncrementBy(numDSACK)
s.rc.setDSACKSeen(true)
idx = 1
n--
diff --git a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go
index ced3a9c58..84fb1c416 100644
--- a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go
@@ -16,6 +16,7 @@
// iterations taking long enough that the retransmit timer can kick in causing
// the congestion window measurements to fail due to extra packets etc.
//
+//go:build !race
// +build !race
package tcp_test
diff --git a/pkg/tcpip/transport/tcp/tcp_rack_test.go b/pkg/tcpip/transport/tcp/tcp_rack_test.go
index d6cf786a1..89e9fb886 100644
--- a/pkg/tcpip/transport/tcp/tcp_rack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_rack_test.go
@@ -36,9 +36,9 @@ const (
latency = 5 * time.Millisecond
)
-func setStackRACKPermitted(t *testing.T, c *context.Context) {
+func setStackTCPRecovery(t *testing.T, c *context.Context, recovery int) {
t.Helper()
- opt := tcpip.TCPRACKLossDetection
+ opt := tcpip.TCPRecovery(recovery)
if err := c.Stack().SetTransportProtocolOption(header.TCPProtocolNumber, &opt); err != nil {
t.Fatalf("c.s.SetTransportProtocolOption(%d, &%v(%v)): %s", header.TCPProtocolNumber, opt, opt, err)
}
@@ -70,7 +70,6 @@ func TestRACKUpdate(t *testing.T) {
close(probeDone)
})
setStackSACKPermitted(t, c, true)
- setStackRACKPermitted(t, c)
createConnectedWithSACKAndTS(c)
data := make([]byte, maxPayload)
@@ -129,7 +128,6 @@ func TestRACKDetectReorder(t *testing.T) {
close(probeDone)
})
setStackSACKPermitted(t, c, true)
- setStackRACKPermitted(t, c)
createConnectedWithSACKAndTS(c)
data := make([]byte, ackNumToVerify*maxPayload)
for i := range data {
@@ -162,8 +160,8 @@ func TestRACKDetectReorder(t *testing.T) {
func sendAndReceiveWithSACK(t *testing.T, c *context.Context, numPackets int, enableRACK bool) []byte {
setStackSACKPermitted(t, c, true)
- if enableRACK {
- setStackRACKPermitted(t, c)
+ if !enableRACK {
+ setStackTCPRecovery(t, c, 0)
}
createConnectedWithSACKAndTS(c)
@@ -542,6 +540,28 @@ func TestRACKDetectDSACK(t *testing.T) {
case invalidDSACKDetected:
t.Fatalf("RACK DSACK detected when there is no duplicate SACK")
}
+
+ metricPollFn := func() error {
+ tcpStats := c.Stack().Stats().TCP
+ stats := []struct {
+ stat *tcpip.StatCounter
+ name string
+ want uint64
+ }{
+ // Check DSACK was received for one segment.
+ {tcpStats.SegmentsAckedWithDSACK, "stats.TCP.SegmentsAckedWithDSACK", 1},
+ }
+ for _, s := range stats {
+ if got, want := s.stat.Value(), s.want; got != want {
+ return fmt.Errorf("got %s.Value() = %d, want = %d", s.name, got, want)
+ }
+ }
+ return nil
+ }
+
+ if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
+ t.Error(err)
+ }
}
// TestRACKDetectDSACKWithOutOfOrder tests that RACK detects DSACK with out of
@@ -682,6 +702,28 @@ func TestRACKDetectDSACKSingleDup(t *testing.T) {
case invalidDSACKDetected:
t.Fatalf("RACK DSACK detected when there is no duplicate SACK")
}
+
+ metricPollFn := func() error {
+ tcpStats := c.Stack().Stats().TCP
+ stats := []struct {
+ stat *tcpip.StatCounter
+ name string
+ want uint64
+ }{
+ // Check DSACK was received for a subsegment.
+ {tcpStats.SegmentsAckedWithDSACK, "stats.TCP.SegmentsAckedWithDSACK", 1},
+ }
+ for _, s := range stats {
+ if got, want := s.stat.Value(), s.want; got != want {
+ return fmt.Errorf("got %s.Value() = %d, want = %d", s.name, got, want)
+ }
+ }
+ return nil
+ }
+
+ if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
+ t.Error(err)
+ }
}
// TestRACKDetectDSACKDupWithCumulativeACK tests DSACK for two non-contiguous
@@ -998,7 +1040,6 @@ func TestRACKWithWindowFull(t *testing.T) {
defer c.Cleanup()
setStackSACKPermitted(t, c, true)
- setStackRACKPermitted(t, c)
createConnectedWithSACKAndTS(c)
seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index 20c9761f2..83e0653b9 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -61,6 +61,7 @@ func TestSackPermittedConnect(t *testing.T) {
defer c.Cleanup()
setStackSACKPermitted(t, c, sackEnabled)
+ setStackTCPRecovery(t, c, 0)
rep := createConnectedWithSACKPermittedOption(c)
data := []byte{1, 2, 3}
@@ -105,6 +106,7 @@ func TestSackDisabledConnect(t *testing.T) {
defer c.Cleanup()
setStackSACKPermitted(t, c, sackEnabled)
+ setStackTCPRecovery(t, c, 0)
rep := c.CreateConnectedWithOptions(header.TCPSynOptions{})
@@ -166,6 +168,7 @@ func TestSackPermittedAccept(t *testing.T) {
}
}
setStackSACKPermitted(t, c, sackEnabled)
+ setStackTCPRecovery(t, c, 0)
rep := c.AcceptWithOptions(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS, SACKPermitted: tc.sackPermitted})
// Now verify no SACK blocks are
@@ -239,6 +242,7 @@ func TestSackDisabledAccept(t *testing.T) {
}
setStackSACKPermitted(t, c, sackEnabled)
+ setStackTCPRecovery(t, c, 0)
rep := c.AcceptWithOptions(tc.wndScale, header.TCPSynOptions{MSS: defaultIPv4MSS})
@@ -386,6 +390,7 @@ func TestSACKRecovery(t *testing.T) {
log.Printf("state: %+v\n", s)
})
setStackSACKPermitted(t, c, true)
+ setStackTCPRecovery(t, c, 0)
createConnectedWithSACKAndTS(c)
const iterations = 3
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 71c4aa85d..031f01357 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -2147,7 +2147,7 @@ func TestSmallSegReceiveWindowAdvertisement(t *testing.T) {
// Bump up the receive buffer size such that, when the receive window grows,
// the scaled window exceeds maxUint16.
- c.EP.SocketOptions().SetReceiveBufferSize(int64(opt.Max), true)
+ c.EP.SocketOptions().SetReceiveBufferSize(int64(opt.Max)*2, true /* notify */)
// Keep the payload size < segment overhead and such that it is a multiple
// of the window scaled value. This enables the test to perform equality
@@ -2267,7 +2267,7 @@ func TestNoWindowShrinking(t *testing.T) {
initialWnd := header.TCP(header.IPv4(pkt).Payload()).WindowSize() << c.RcvdWindowScale
initialLastAcceptableSeq := iss.Add(seqnum.Size(initialWnd))
// Now shrink the receive buffer to half its original size.
- c.EP.SocketOptions().SetReceiveBufferSize(int64(rcvBufSize/2), true)
+ c.EP.SocketOptions().SetReceiveBufferSize(int64(rcvBufSize), true /* notify */)
data := generateRandomPayload(t, rcvBufSize)
// Send a payload of half the size of rcvBufSize.
@@ -2523,7 +2523,7 @@ func TestScaledWindowAccept(t *testing.T) {
defer ep.Close()
// Set the window size greater than the maximum non-scaled window.
- ep.SocketOptions().SetReceiveBufferSize(65535*3, true)
+ ep.SocketOptions().SetReceiveBufferSize(65535*6, true /* notify */)
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %s", err)
@@ -2595,7 +2595,7 @@ func TestNonScaledWindowAccept(t *testing.T) {
defer ep.Close()
// Set the window size greater than the maximum non-scaled window.
- ep.SocketOptions().SetReceiveBufferSize(65535*3, true)
+ ep.SocketOptions().SetReceiveBufferSize(65535*6, true /* notify */)
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %s", err)
@@ -3188,7 +3188,7 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) {
// Set the buffer size to a deterministic size so that we can check the
// window scaling option.
const rcvBufferSize = 0x20000
- ep.SocketOptions().SetReceiveBufferSize(rcvBufferSize, true)
+ ep.SocketOptions().SetReceiveBufferSize(rcvBufferSize*2, true /* notify */)
if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %s", err)
@@ -3327,7 +3327,7 @@ func TestSynOptionsOnActiveConnect(t *testing.T) {
// window scaling option.
const rcvBufferSize = 0x20000
const wndScale = 3
- c.EP.SocketOptions().SetReceiveBufferSize(rcvBufferSize, true)
+ c.EP.SocketOptions().SetReceiveBufferSize(rcvBufferSize*2, true /* notify */)
// Start connection attempt.
we, ch := waiter.NewChannelEntry(nil)
@@ -3624,6 +3624,38 @@ func TestMaxRTO(t *testing.T) {
}
}
+// TestZeroSizedWriteRetransmit tests that a zero sized write should not
+// result in a panic on an RTO as no segment should have been queued for
+// a zero sized write.
+func TestZeroSizedWriteRetransmit(t *testing.T) {
+ c := context.New(t, defaultMTU)
+ defer c.Cleanup()
+
+ c.CreateConnected(context.TestInitialSequenceNumber, 30000 /* rcvWnd */, -1 /* epRcvBuf */)
+
+ var r bytes.Reader
+ _, err := c.EP.Write(&r, tcpip.WriteOptions{})
+ if err != nil {
+ t.Fatalf("Write failed: %s", err)
+ }
+ // Now do a non-zero sized write to trigger actual sending of data.
+ r.Reset(make([]byte, 1))
+ _, err = c.EP.Write(&r, tcpip.WriteOptions{})
+ if err != nil {
+ t.Fatalf("Write failed: %s", err)
+ }
+ // Do not ACK the packet and expect an original transmit and a
+ // retransmit. This should not cause a panic.
+ for i := 0; i < 2; i++ {
+ checker.IPv4(t, c.GetPacket(),
+ checker.TCP(
+ checker.DstPort(context.TestPort),
+ checker.TCPFlagsMatch(header.TCPFlagAck, ^header.TCPFlagPsh),
+ ),
+ )
+ }
+}
+
// TestRetransmitIPv4IDUniqueness tests that the IPv4 Identification field is
// unique on retransmits.
func TestRetransmitIPv4IDUniqueness(t *testing.T) {
@@ -4637,52 +4669,6 @@ func TestDefaultBufferSizes(t *testing.T) {
checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*3)
}
-func TestMinMaxBufferSizes(t *testing.T) {
- s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
- TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
- })
-
- // Check the default values.
- ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &waiter.Queue{})
- if err != nil {
- t.Fatalf("NewEndpoint failed; %s", err)
- }
- defer ep.Close()
-
- // Change the min/max values for send/receive
- {
- opt := tcpip.TCPReceiveBufferSizeRangeOption{Min: 200, Default: tcp.DefaultReceiveBufferSize * 2, Max: tcp.DefaultReceiveBufferSize * 20}
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
- }
- }
-
- {
- opt := tcpip.TCPSendBufferSizeRangeOption{Min: 300, Default: tcp.DefaultSendBufferSize * 3, Max: tcp.DefaultSendBufferSize * 30}
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%#v): %s", tcp.ProtocolNumber, opt, err)
- }
- }
-
- // Set values below the min/2.
- ep.SocketOptions().SetReceiveBufferSize(99, true)
- checkRecvBufferSize(t, ep, 200)
-
- ep.SocketOptions().SetSendBufferSize(149, true)
-
- checkSendBufferSize(t, ep, 300)
-
- // Set values above the max.
- ep.SocketOptions().SetReceiveBufferSize(1+tcp.DefaultReceiveBufferSize*20, true)
- // Values above max are capped at max and then doubled.
- checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*20*2)
-
- ep.SocketOptions().SetSendBufferSize(1+tcp.DefaultSendBufferSize*30, true)
- // Values above max are capped at max and then doubled.
- checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*30*2)
-}
-
func TestBindToDeviceOption(t *testing.T) {
s := stack.New(stack.Options{
NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
@@ -7720,7 +7706,7 @@ func TestIncreaseWindowOnBufferResize(t *testing.T) {
// Increasing the buffer from should generate an ACK,
// since window grew from small value to larger equal MSS
- c.EP.SocketOptions().SetReceiveBufferSize(rcvBuf*2, true)
+ c.EP.SocketOptions().SetReceiveBufferSize(rcvBuf*4, true /* notify */)
checker.IPv4(t, c.GetPacket(),
checker.PayloadLen(header.TCPMinimumSize),
checker.TCP(
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 53efecc5a..96e4849d2 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -757,7 +757,7 @@ func (c *Context) Create(epRcvBuf int) {
}
if epRcvBuf != -1 {
- c.EP.SocketOptions().SetReceiveBufferSize(int64(epRcvBuf), true /* notify */)
+ c.EP.SocketOptions().SetReceiveBufferSize(int64(epRcvBuf)*2, true /* notify */)
}
}