summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip')
-rw-r--r--pkg/tcpip/header/icmpv4.go14
-rw-r--r--pkg/tcpip/network/ip_test.go231
-rw-r--r--pkg/tcpip/network/ipv4/icmp.go112
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go2
-rw-r--r--pkg/tcpip/network/ipv6/icmp.go137
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go2
-rw-r--r--pkg/tcpip/socketops.go63
-rw-r--r--pkg/tcpip/stack/nic.go7
-rw-r--r--pkg/tcpip/stack/registration.go77
-rw-r--r--pkg/tcpip/stack/stack.go13
-rw-r--r--pkg/tcpip/stack/stack_options.go25
-rw-r--r--pkg/tcpip/stack/stack_test.go7
-rw-r--r--pkg/tcpip/stack/transport_demuxer.go17
-rw-r--r--pkg/tcpip/stack/transport_test.go2
-rw-r--r--pkg/tcpip/tests/integration/link_resolution_test.go54
-rw-r--r--pkg/tcpip/transport/icmp/endpoint.go5
-rw-r--r--pkg/tcpip/transport/tcp/connect.go7
-rw-r--r--pkg/tcpip/transport/tcp/cubic.go4
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go56
-rw-r--r--pkg/tcpip/transport/tcp/endpoint_state.go10
-rw-r--r--pkg/tcpip/transport/tcp/rack.go2
-rw-r--r--pkg/tcpip/transport/tcp/rcv.go4
-rw-r--r--pkg/tcpip/transport/tcp/reno.go8
-rw-r--r--pkg/tcpip/transport/tcp/snd.go19
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go7
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go34
26 files changed, 710 insertions, 209 deletions
diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go
index 5f9b8e9e2..f840a4322 100644
--- a/pkg/tcpip/header/icmpv4.go
+++ b/pkg/tcpip/header/icmpv4.go
@@ -16,7 +16,6 @@ package header
import (
"encoding/binary"
- "fmt"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
@@ -208,16 +207,3 @@ func ICMPv4Checksum(h ICMPv4, vv buffer.VectorisedView) uint16 {
return ^xsum
}
-
-// ICMPOriginFromNetProto returns the appropriate SockErrOrigin to use when
-// a packet having a `net` header causing an ICMP error.
-func ICMPOriginFromNetProto(net tcpip.NetworkProtocolNumber) tcpip.SockErrOrigin {
- switch net {
- case IPv4ProtocolNumber:
- return tcpip.SockExtErrorOriginICMP
- case IPv6ProtocolNumber:
- return tcpip.SockExtErrorOriginICMP6
- default:
- panic(fmt.Sprintf("unsupported net proto to extract ICMP error origin: %d", net))
- }
-}
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index 8d155344b..6a1f11a36 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -59,6 +59,14 @@ var localIPv6AddrWithPrefix = tcpip.AddressWithPrefix{
PrefixLen: 120,
}
+type transportError struct {
+ origin tcpip.SockErrOrigin
+ typ uint8
+ code uint8
+ info uint32
+ kind stack.TransportErrorKind
+}
+
// testObject implements two interfaces: LinkEndpoint and TransportDispatcher.
// The former is used to pretend that it's a link endpoint so that we can
// inspect packets written by the network endpoints. The latter is used to
@@ -74,8 +82,7 @@ type testObject struct {
srcAddr tcpip.Address
dstAddr tcpip.Address
v4 bool
- typ stack.ControlType
- extra uint32
+ transErr transportError
dataCalls int
controlCalls int
@@ -119,16 +126,23 @@ func (t *testObject) DeliverTransportPacket(protocol tcpip.TransportProtocolNumb
return stack.TransportPacketHandled
}
-// DeliverTransportControlPacket is called by network endpoints after parsing
+// DeliverTransportError is called by network endpoints after parsing
// incoming control (ICMP) packets. This is used by the test object to verify
// that the results of the parsing are expected.
-func (t *testObject) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
+func (t *testObject) DeliverTransportError(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, transErr stack.TransportError, pkt *stack.PacketBuffer) {
t.checkValues(trans, pkt.Data, remote, local)
- if typ != t.typ {
- t.t.Errorf("typ = %v, want %v", typ, t.typ)
- }
- if extra != t.extra {
- t.t.Errorf("extra = %v, want %v", extra, t.extra)
+ if diff := cmp.Diff(
+ t.transErr,
+ transportError{
+ origin: transErr.Origin(),
+ typ: transErr.Type(),
+ code: transErr.Code(),
+ info: transErr.Info(),
+ kind: transErr.Kind(),
+ },
+ cmp.AllowUnexported(transportError{}),
+ ); diff != "" {
+ t.t.Errorf("transport error mismatch (-want +got):\n%s", diff)
}
t.controlCalls++
}
@@ -702,24 +716,81 @@ func TestReceive(t *testing.T) {
}
func TestIPv4ReceiveControl(t *testing.T) {
- const mtu = 0xbeef - header.IPv4MinimumSize
+ const (
+ mtu = 0xbeef - header.IPv4MinimumSize
+ dataLen = 8
+ )
+
cases := []struct {
name string
expectedCount int
fragmentOffset uint16
code header.ICMPv4Code
- expectedTyp stack.ControlType
- expectedExtra uint32
+ transErr transportError
trunc int
}{
- {"FragmentationNeeded", 1, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, 0},
- {"Truncated (10 bytes missing)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, 10},
- {"Truncated (missing IPv4 header)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, header.IPv4MinimumSize + 8},
- {"Truncated (missing 'extra info')", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, 4 + header.IPv4MinimumSize + 8},
- {"Truncated (missing ICMP header)", 0, 0, header.ICMPv4FragmentationNeeded, stack.ControlPacketTooBig, mtu, header.ICMPv4MinimumSize + header.IPv4MinimumSize + 8},
- {"Port unreachable", 1, 0, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 0},
- {"Non-zero fragment offset", 0, 100, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 0},
- {"Zero-length packet", 0, 0, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 2*header.IPv4MinimumSize + header.ICMPv4MinimumSize + 8},
+ {
+ name: "FragmentationNeeded",
+ expectedCount: 1,
+ fragmentOffset: 0,
+ code: header.ICMPv4FragmentationNeeded,
+ transErr: transportError{
+ origin: tcpip.SockExtErrorOriginICMP,
+ typ: uint8(header.ICMPv4DstUnreachable),
+ code: uint8(header.ICMPv4FragmentationNeeded),
+ info: mtu,
+ kind: stack.PacketTooBigTransportError,
+ },
+ trunc: 0,
+ },
+ {
+ name: "Truncated (missing IPv4 header)",
+ expectedCount: 0,
+ fragmentOffset: 0,
+ code: header.ICMPv4FragmentationNeeded,
+ trunc: header.IPv4MinimumSize + header.ICMPv4MinimumSize,
+ },
+ {
+ name: "Truncated (partial offending packet's IP header)",
+ expectedCount: 0,
+ fragmentOffset: 0,
+ code: header.ICMPv4FragmentationNeeded,
+ trunc: header.IPv4MinimumSize + header.ICMPv4MinimumSize + header.IPv4MinimumSize - 1,
+ },
+ {
+ name: "Truncated (partial offending packet's data)",
+ expectedCount: 0,
+ fragmentOffset: 0,
+ code: header.ICMPv4FragmentationNeeded,
+ trunc: header.ICMPv4MinimumSize + header.ICMPv4MinimumSize + header.IPv4MinimumSize + dataLen - 1,
+ },
+ {
+ name: "Port unreachable",
+ expectedCount: 1,
+ fragmentOffset: 0,
+ code: header.ICMPv4PortUnreachable,
+ transErr: transportError{
+ origin: tcpip.SockExtErrorOriginICMP,
+ typ: uint8(header.ICMPv4DstUnreachable),
+ code: uint8(header.ICMPv4PortUnreachable),
+ kind: stack.DestinationPortUnreachableTransportError,
+ },
+ trunc: 0,
+ },
+ {
+ name: "Non-zero fragment offset",
+ expectedCount: 0,
+ fragmentOffset: 100,
+ code: header.ICMPv4PortUnreachable,
+ trunc: 0,
+ },
+ {
+ name: "Zero-length packet",
+ expectedCount: 0,
+ fragmentOffset: 100,
+ code: header.ICMPv4PortUnreachable,
+ trunc: 2*header.IPv4MinimumSize + header.ICMPv4MinimumSize + dataLen,
+ },
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
@@ -738,7 +809,7 @@ func TestIPv4ReceiveControl(t *testing.T) {
}
const dataOffset = header.IPv4MinimumSize*2 + header.ICMPv4MinimumSize
- view := buffer.NewView(dataOffset + 8)
+ view := buffer.NewView(dataOffset + dataLen)
// Create the outer IPv4 header.
ip := header.IPv4(view)
@@ -785,8 +856,7 @@ func TestIPv4ReceiveControl(t *testing.T) {
nic.testObject.srcAddr = remoteIPv4Addr
nic.testObject.dstAddr = localIPv4Addr
nic.testObject.contents = view[dataOffset:]
- nic.testObject.typ = c.expectedTyp
- nic.testObject.extra = c.expectedExtra
+ nic.testObject.transErr = c.transErr
addressableEndpoint, ok := ep.(stack.AddressableEndpoint)
if !ok {
@@ -953,30 +1023,112 @@ func TestIPv6Send(t *testing.T) {
}
func TestIPv6ReceiveControl(t *testing.T) {
+ const (
+ mtu = 0xffff
+ outerSrcAddr = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaa"
+ dataLen = 8
+ )
+
newUint16 := func(v uint16) *uint16 { return &v }
- const mtu = 0xffff
- const outerSrcAddr = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaa"
+ portUnreachableTransErr := transportError{
+ origin: tcpip.SockExtErrorOriginICMP6,
+ typ: uint8(header.ICMPv6DstUnreachable),
+ code: uint8(header.ICMPv6PortUnreachable),
+ kind: stack.DestinationPortUnreachableTransportError,
+ }
+
cases := []struct {
name string
expectedCount int
fragmentOffset *uint16
typ header.ICMPv6Type
code header.ICMPv6Code
- expectedTyp stack.ControlType
- expectedExtra uint32
+ transErr transportError
trunc int
}{
- {"PacketTooBig", 1, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, 0},
- {"Truncated (10 bytes missing)", 0, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, 10},
- {"Truncated (missing IPv6 header)", 0, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, header.IPv6MinimumSize + 8},
- {"Truncated PacketTooBig (missing 'extra info')", 0, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, 4 + header.IPv6MinimumSize + 8},
- {"Truncated (missing ICMP header)", 0, nil, header.ICMPv6PacketTooBig, 0, stack.ControlPacketTooBig, mtu, header.ICMPv6PacketTooBigMinimumSize + header.IPv6MinimumSize + 8},
- {"Port unreachable", 1, nil, header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 0},
- {"Truncated DstUnreachable (missing 'extra info')", 0, nil, header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 4 + header.IPv6MinimumSize + 8},
- {"Fragmented, zero offset", 1, newUint16(0), header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 0},
- {"Non-zero fragment offset", 0, newUint16(100), header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 0},
- {"Zero-length packet", 0, nil, header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 2*header.IPv6MinimumSize + header.ICMPv6DstUnreachableMinimumSize + 8},
+ {
+ name: "PacketTooBig",
+ expectedCount: 1,
+ fragmentOffset: nil,
+ typ: header.ICMPv6PacketTooBig,
+ code: header.ICMPv6UnusedCode,
+ transErr: transportError{
+ origin: tcpip.SockExtErrorOriginICMP6,
+ typ: uint8(header.ICMPv6PacketTooBig),
+ code: uint8(header.ICMPv6UnusedCode),
+ info: mtu,
+ kind: stack.PacketTooBigTransportError,
+ },
+ trunc: 0,
+ },
+ {
+ name: "Truncated (missing offending packet's IPv6 header)",
+ expectedCount: 0,
+ fragmentOffset: nil,
+ typ: header.ICMPv6PacketTooBig,
+ code: header.ICMPv6UnusedCode,
+ trunc: header.IPv6MinimumSize + header.ICMPv6PacketTooBigMinimumSize,
+ },
+ {
+ name: "Truncated PacketTooBig (partial offending packet's IPv6 header)",
+ expectedCount: 0,
+ fragmentOffset: nil,
+ typ: header.ICMPv6PacketTooBig,
+ code: header.ICMPv6UnusedCode,
+ trunc: header.IPv6MinimumSize + header.ICMPv6PacketTooBigMinimumSize + header.IPv6MinimumSize - 1,
+ },
+ {
+ name: "Truncated (partial offending packet's data)",
+ expectedCount: 0,
+ fragmentOffset: nil,
+ typ: header.ICMPv6PacketTooBig,
+ code: header.ICMPv6UnusedCode,
+ trunc: header.IPv6MinimumSize + header.ICMPv6PacketTooBigMinimumSize + header.IPv6MinimumSize + dataLen - 1,
+ },
+ {
+ name: "Port unreachable",
+ expectedCount: 1,
+ fragmentOffset: nil,
+ typ: header.ICMPv6DstUnreachable,
+ code: header.ICMPv6PortUnreachable,
+ transErr: portUnreachableTransErr,
+ trunc: 0,
+ },
+ {
+ name: "Truncated DstPortUnreachable (partial offending packet's IP header)",
+ expectedCount: 0,
+ fragmentOffset: nil,
+ typ: header.ICMPv6DstUnreachable,
+ code: header.ICMPv6PortUnreachable,
+ trunc: header.IPv6MinimumSize + header.ICMPv6DstUnreachableMinimumSize + header.IPv6MinimumSize - 1,
+ },
+ {
+ name: "DstPortUnreachable for Fragmented, zero offset",
+ expectedCount: 1,
+ fragmentOffset: newUint16(0),
+ typ: header.ICMPv6DstUnreachable,
+ code: header.ICMPv6PortUnreachable,
+ transErr: portUnreachableTransErr,
+ trunc: 0,
+ },
+ {
+ name: "DstPortUnreachable for Non-zero fragment offset",
+ expectedCount: 0,
+ fragmentOffset: newUint16(100),
+ typ: header.ICMPv6DstUnreachable,
+ code: header.ICMPv6PortUnreachable,
+ transErr: portUnreachableTransErr,
+ trunc: 0,
+ },
+ {
+ name: "Zero-length packet",
+ expectedCount: 0,
+ fragmentOffset: nil,
+ typ: header.ICMPv6DstUnreachable,
+ code: header.ICMPv6PortUnreachable,
+ trunc: 2*header.IPv6MinimumSize + header.ICMPv6DstUnreachableMinimumSize + dataLen,
+ },
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
@@ -998,7 +1150,7 @@ func TestIPv6ReceiveControl(t *testing.T) {
if c.fragmentOffset != nil {
dataOffset += header.IPv6FragmentHeaderSize
}
- view := buffer.NewView(dataOffset + 8)
+ view := buffer.NewView(dataOffset + dataLen)
// Create the outer IPv6 header.
ip := header.IPv6(view)
@@ -1049,8 +1201,7 @@ func TestIPv6ReceiveControl(t *testing.T) {
nic.testObject.srcAddr = remoteIPv6Addr
nic.testObject.dstAddr = localIPv6Addr
nic.testObject.contents = view[dataOffset:]
- nic.testObject.typ = c.expectedTyp
- nic.testObject.extra = c.expectedExtra
+ nic.testObject.transErr = c.transErr
// Set ICMPv6 checksum.
icmp.SetChecksum(header.ICMPv6Checksum(icmp, outerSrcAddr, localIPv6Addr, buffer.VectorisedView{}))
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 3d93a2cd0..74e70e283 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -23,11 +23,108 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
+// icmpv4DestinationUnreachableSockError is a general ICMPv4 Destination
+// Unreachable error.
+//
+// +stateify savable
+type icmpv4DestinationUnreachableSockError struct{}
+
+// Origin implements tcpip.SockErrorCause.
+func (*icmpv4DestinationUnreachableSockError) Origin() tcpip.SockErrOrigin {
+ return tcpip.SockExtErrorOriginICMP
+}
+
+// Type implements tcpip.SockErrorCause.
+func (*icmpv4DestinationUnreachableSockError) Type() uint8 {
+ return uint8(header.ICMPv4DstUnreachable)
+}
+
+// Info implements tcpip.SockErrorCause.
+func (*icmpv4DestinationUnreachableSockError) Info() uint32 {
+ return 0
+}
+
+var _ stack.TransportError = (*icmpv4DestinationHostUnreachableSockError)(nil)
+
+// icmpv4DestinationHostUnreachableSockError is an ICMPv4 Destination Host
+// Unreachable error.
+//
+// It indicates that a packet was not able to reach the destination host.
+//
+// +stateify savable
+type icmpv4DestinationHostUnreachableSockError struct {
+ icmpv4DestinationUnreachableSockError
+}
+
+// Code implements tcpip.SockErrorCause.
+func (*icmpv4DestinationHostUnreachableSockError) Code() uint8 {
+ return uint8(header.ICMPv4HostUnreachable)
+}
+
+// Kind implements stack.TransportError.
+func (*icmpv4DestinationHostUnreachableSockError) Kind() stack.TransportErrorKind {
+ return stack.DestinationHostUnreachableTransportError
+}
+
+var _ stack.TransportError = (*icmpv4DestinationPortUnreachableSockError)(nil)
+
+// icmpv4DestinationPortUnreachableSockError is an ICMPv4 Destination Port
+// Unreachable error.
+//
+// It indicates that a packet reached the destination host, but the transport
+// protocol was not active on the destination port.
+//
+// +stateify savable
+type icmpv4DestinationPortUnreachableSockError struct {
+ icmpv4DestinationUnreachableSockError
+}
+
+// Code implements tcpip.SockErrorCause.
+func (*icmpv4DestinationPortUnreachableSockError) Code() uint8 {
+ return uint8(header.ICMPv4PortUnreachable)
+}
+
+// Kind implements stack.TransportError.
+func (*icmpv4DestinationPortUnreachableSockError) Kind() stack.TransportErrorKind {
+ return stack.DestinationPortUnreachableTransportError
+}
+
+var _ stack.TransportError = (*icmpv4FragmentationNeededSockError)(nil)
+
+// icmpv4FragmentationNeededSockError is an ICMPv4 Destination Unreachable error
+// due to fragmentation being required but the packet was set to not be
+// fragmented.
+//
+// It indicates that a link exists on the path to the destination with an MTU
+// that is too small to carry the packet.
+//
+// +stateify savable
+type icmpv4FragmentationNeededSockError struct {
+ icmpv4DestinationUnreachableSockError
+
+ mtu uint32
+}
+
+// Code implements tcpip.SockErrorCause.
+func (*icmpv4FragmentationNeededSockError) Code() uint8 {
+ return uint8(header.ICMPv4FragmentationNeeded)
+}
+
+// Info implements tcpip.SockErrorCause.
+func (e *icmpv4FragmentationNeededSockError) Info() uint32 {
+ return e.mtu
+}
+
+// Kind implements stack.TransportError.
+func (*icmpv4FragmentationNeededSockError) Kind() stack.TransportErrorKind {
+ return stack.PacketTooBigTransportError
+}
+
// handleControl handles the case when an ICMP error packet contains the headers
// of the original packet that caused the ICMP one to be sent. This information
// is used to find out which transport endpoint must be notified about the ICMP
// packet. We only expect the payload, not the enclosing ICMP packet.
-func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
+func (e *endpoint) handleControl(errInfo stack.TransportError, pkt *stack.PacketBuffer) {
h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
if !ok {
return
@@ -54,10 +151,10 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
return
}
- // Skip the ip header, then deliver control message.
+ // Skip the ip header, then deliver the error.
pkt.Data.TrimFront(hlen)
p := hdr.TransportProtocol()
- e.dispatcher.DeliverTransportControlPacket(srcAddr, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
+ e.dispatcher.DeliverTransportError(srcAddr, hdr.DestinationAddress(), ProtocolNumber, p, errInfo, pkt)
}
func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) {
@@ -222,19 +319,16 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) {
pkt.Data.TrimFront(header.ICMPv4MinimumSize)
switch h.Code() {
case header.ICMPv4HostUnreachable:
- e.handleControl(stack.ControlNoRoute, 0, pkt)
-
+ e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt)
case header.ICMPv4PortUnreachable:
- e.handleControl(stack.ControlPortUnreachable, 0, pkt)
-
+ e.handleControl(&icmpv4DestinationPortUnreachableSockError{}, pkt)
case header.ICMPv4FragmentationNeeded:
networkMTU, err := calculateNetworkMTU(uint32(h.MTU()), header.IPv4MinimumSize)
if err != nil {
networkMTU = 0
}
- e.handleControl(stack.ControlPacketTooBig, networkMTU, pkt)
+ e.handleControl(&icmpv4FragmentationNeededSockError{mtu: networkMTU}, pkt)
}
-
case header.ICMPv4SrcQuench:
received.srcQuench.Increment()
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index e146844c2..b2d626107 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -101,7 +101,7 @@ func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) {
// Use the same control type as an ICMPv4 destination host unreachable error
// since the host is considered unreachable if we cannot resolve the link
// address to the next hop.
- e.handleControl(stack.ControlNoRoute, 0, pkt)
+ e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt)
}
// NewEndpoint creates a new ipv4 endpoint.
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 12e5ead5e..dcfd93bab 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -23,11 +23,136 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
+// icmpv6DestinationUnreachableSockError is a general ICMPv6 Destination
+// Unreachable error.
+//
+// +stateify savable
+type icmpv6DestinationUnreachableSockError struct{}
+
+// Origin implements tcpip.SockErrorCause.
+func (*icmpv6DestinationUnreachableSockError) Origin() tcpip.SockErrOrigin {
+ return tcpip.SockExtErrorOriginICMP6
+}
+
+// Type implements tcpip.SockErrorCause.
+func (*icmpv6DestinationUnreachableSockError) Type() uint8 {
+ return uint8(header.ICMPv6DstUnreachable)
+}
+
+// Info implements tcpip.SockErrorCause.
+func (*icmpv6DestinationUnreachableSockError) Info() uint32 {
+ return 0
+}
+
+var _ stack.TransportError = (*icmpv6DestinationNetworkUnreachableSockError)(nil)
+
+// icmpv6DestinationNetworkUnreachableSockError is an ICMPv6 Destination Network
+// Unreachable error.
+//
+// It indicates that the destination network is unreachable.
+//
+// +stateify savable
+type icmpv6DestinationNetworkUnreachableSockError struct {
+ icmpv6DestinationUnreachableSockError
+}
+
+// Code implements tcpip.SockErrorCause.
+func (*icmpv6DestinationNetworkUnreachableSockError) Code() uint8 {
+ return uint8(header.ICMPv6NetworkUnreachable)
+}
+
+// Kind implements stack.TransportError.
+func (*icmpv6DestinationNetworkUnreachableSockError) Kind() stack.TransportErrorKind {
+ return stack.DestinationNetworkUnreachableTransportError
+}
+
+var _ stack.TransportError = (*icmpv6DestinationPortUnreachableSockError)(nil)
+
+// icmpv6DestinationPortUnreachableSockError is an ICMPv6 Destination Port
+// Unreachable error.
+//
+// It indicates that a packet reached the destination host, but the transport
+// protocol was not active on the destination port.
+//
+// +stateify savable
+type icmpv6DestinationPortUnreachableSockError struct {
+ icmpv6DestinationUnreachableSockError
+}
+
+// Code implements tcpip.SockErrorCause.
+func (*icmpv6DestinationPortUnreachableSockError) Code() uint8 {
+ return uint8(header.ICMPv6PortUnreachable)
+}
+
+// Kind implements stack.TransportError.
+func (*icmpv6DestinationPortUnreachableSockError) Kind() stack.TransportErrorKind {
+ return stack.DestinationPortUnreachableTransportError
+}
+
+var _ stack.TransportError = (*icmpv6DestinationAddressUnreachableSockError)(nil)
+
+// icmpv6DestinationAddressUnreachableSockError is an ICMPv6 Destination Address
+// Unreachable error.
+//
+// It indicates that a packet was not able to reach the destination.
+//
+// +stateify savable
+type icmpv6DestinationAddressUnreachableSockError struct {
+ icmpv6DestinationUnreachableSockError
+}
+
+// Code implements tcpip.SockErrorCause.
+func (*icmpv6DestinationAddressUnreachableSockError) Code() uint8 {
+ return uint8(header.ICMPv6AddressUnreachable)
+}
+
+// Kind implements stack.TransportError.
+func (*icmpv6DestinationAddressUnreachableSockError) Kind() stack.TransportErrorKind {
+ return stack.DestinationHostUnreachableTransportError
+}
+
+var _ stack.TransportError = (*icmpv6PacketTooBigSockError)(nil)
+
+// icmpv6PacketTooBigSockError is an ICMPv6 Packet Too Big error.
+//
+// It indicates that a link exists on the path to the destination with an MTU
+// that is too small to carry the packet.
+//
+// +stateify savable
+type icmpv6PacketTooBigSockError struct {
+ mtu uint32
+}
+
+// Origin implements tcpip.SockErrorCause.
+func (*icmpv6PacketTooBigSockError) Origin() tcpip.SockErrOrigin {
+ return tcpip.SockExtErrorOriginICMP6
+}
+
+// Type implements tcpip.SockErrorCause.
+func (*icmpv6PacketTooBigSockError) Type() uint8 {
+ return uint8(header.ICMPv6PacketTooBig)
+}
+
+// Code implements tcpip.SockErrorCause.
+func (*icmpv6PacketTooBigSockError) Code() uint8 {
+ return uint8(header.ICMPv6UnusedCode)
+}
+
+// Info implements tcpip.SockErrorCause.
+func (e *icmpv6PacketTooBigSockError) Info() uint32 {
+ return e.mtu
+}
+
+// Kind implements stack.TransportError.
+func (*icmpv6PacketTooBigSockError) Kind() stack.TransportErrorKind {
+ return stack.PacketTooBigTransportError
+}
+
// handleControl handles the case when an ICMP packet contains the headers of
// the original packet that caused the ICMP one to be sent. This information is
// used to find out which transport endpoint must be notified about the ICMP
// packet.
-func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
+func (e *endpoint) handleControl(transErr stack.TransportError, pkt *stack.PacketBuffer) {
h, ok := pkt.Data.PullUp(header.IPv6MinimumSize)
if !ok {
return
@@ -67,8 +192,7 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
p = fragHdr.TransportProtocol()
}
- // Deliver the control packet to the transport endpoint.
- e.dispatcher.DeliverTransportControlPacket(src, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
+ e.dispatcher.DeliverTransportError(src, hdr.DestinationAddress(), ProtocolNumber, p, transErr, pkt)
}
// getLinkAddrOption searches NDP options for a given link address option using
@@ -175,7 +299,7 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) {
if err != nil {
networkMTU = 0
}
- e.handleControl(stack.ControlPacketTooBig, networkMTU, pkt)
+ e.handleControl(&icmpv6PacketTooBigSockError{mtu: networkMTU}, pkt)
case header.ICMPv6DstUnreachable:
received.dstUnreachable.Increment()
@@ -187,11 +311,10 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) {
pkt.Data.TrimFront(header.ICMPv6DstUnreachableMinimumSize)
switch header.ICMPv6(hdr).Code() {
case header.ICMPv6NetworkUnreachable:
- e.handleControl(stack.ControlNetworkUnreachable, 0, pkt)
+ e.handleControl(&icmpv6DestinationNetworkUnreachableSockError{}, pkt)
case header.ICMPv6PortUnreachable:
- e.handleControl(stack.ControlPortUnreachable, 0, pkt)
+ e.handleControl(&icmpv6DestinationPortUnreachableSockError{}, pkt)
}
-
case header.ICMPv6NeighborSolicit:
received.neighborSolicit.Increment()
if !isNDPValid() || pkt.Data.Size() < header.ICMPv6NeighborSolicitMinimumSize {
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index e56eb5796..c2e8c3ea7 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -235,7 +235,7 @@ func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) {
})
pkt.NICID = e.nic.ID()
pkt.NetworkProtocolNumber = ProtocolNumber
- e.handleControl(stack.ControlAddressUnreachable, 0, pkt)
+ e.handleControl(&icmpv6DestinationAddressUnreachableSockError{}, pkt)
}
// onAddressAssignedLocked handles an address being assigned.
diff --git a/pkg/tcpip/socketops.go b/pkg/tcpip/socketops.go
index 019d6a63c..1e00144a5 100644
--- a/pkg/tcpip/socketops.go
+++ b/pkg/tcpip/socketops.go
@@ -473,6 +473,48 @@ func (origin SockErrOrigin) IsICMPErr() bool {
return origin == SockExtErrorOriginICMP || origin == SockExtErrorOriginICMP6
}
+// SockErrorCause is the cause of a socket error.
+type SockErrorCause interface {
+ // Origin is the source of the error.
+ Origin() SockErrOrigin
+
+ // Type is the origin specific type of error.
+ Type() uint8
+
+ // Code is the origin and type specific error code.
+ Code() uint8
+
+ // Info is any extra information about the error.
+ Info() uint32
+}
+
+// LocalSockError is a socket error that originated from the local host.
+//
+// +stateify savable
+type LocalSockError struct {
+ info uint32
+}
+
+// Origin implements SockErrorCause.
+func (*LocalSockError) Origin() SockErrOrigin {
+ return SockExtErrorOriginLocal
+}
+
+// Type implements SockErrorCause.
+func (*LocalSockError) Type() uint8 {
+ return 0
+}
+
+// Code implements SockErrorCause.
+func (*LocalSockError) Code() uint8 {
+ return 0
+}
+
+// Info implements SockErrorCause.
+func (l *LocalSockError) Info() uint32 {
+ return l.info
+}
+
// SockError represents a queue entry in the per-socket error queue.
//
// +stateify savable
@@ -481,14 +523,8 @@ type SockError struct {
// Err is the error caused by the errant packet.
Err Error
- // ErrOrigin indicates the error origin.
- ErrOrigin SockErrOrigin
- // ErrType is the type in the ICMP header.
- ErrType uint8
- // ErrCode is the code in the ICMP header.
- ErrCode uint8
- // ErrInfo is additional info about the error.
- ErrInfo uint32
+ // Cause is the detailed cause of the error.
+ Cause SockErrorCause
// Payload is the errant packet's payload.
Payload []byte
@@ -540,12 +576,11 @@ func (so *SocketOptions) QueueErr(err *SockError) {
// QueueLocalErr queues a local error onto the local queue.
func (so *SocketOptions) QueueLocalErr(err Error, net NetworkProtocolNumber, info uint32, dst FullAddress, payload []byte) {
so.QueueErr(&SockError{
- Err: err,
- ErrOrigin: SockExtErrorOriginLocal,
- ErrInfo: info,
- Payload: payload,
- Dst: dst,
- NetProto: net,
+ Err: err,
+ Cause: &LocalSockError{info: info},
+ Payload: payload,
+ Dst: dst,
+ NetProto: net,
})
}
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 693ea064a..41a489047 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -911,9 +911,8 @@ func (n *NIC) DeliverTransportPacket(protocol tcpip.TransportProtocolNumber, pkt
}
}
-// DeliverTransportControlPacket delivers control packets to the appropriate
-// transport protocol endpoint.
-func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer) {
+// DeliverTransportError implements TransportDispatcher.
+func (n *NIC) DeliverTransportError(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, transErr TransportError, pkt *PacketBuffer) {
state, ok := n.stack.transportProtocols[trans]
if !ok {
return
@@ -935,7 +934,7 @@ func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcp
}
id := TransportEndpointID{srcPort, local, dstPort, remote}
- if n.stack.demux.deliverControlPacket(n, net, trans, typ, extra, pkt, id) {
+ if n.stack.demux.deliverError(n, net, trans, transErr, pkt, id) {
return
}
}
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index e02f7190c..d589f798d 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -49,31 +49,6 @@ type TransportEndpointID struct {
RemoteAddress tcpip.Address
}
-// ControlType is the type of network control message.
-type ControlType int
-
-// The following are the allowed values for ControlType values.
-// TODO(http://gvisor.dev/issue/3210): Support time exceeded messages.
-const (
- // ControlAddressUnreachable indicates that an IPv6 packet did not reach its
- // destination as the destination address was unreachable.
- //
- // This maps to the ICMPv6 Destination Ureachable Code 3 error; see
- // RFC 4443 section 3.1 for more details.
- ControlAddressUnreachable ControlType = iota
- ControlNetworkUnreachable
- // ControlNoRoute indicates that an IPv4 packet did not reach its destination
- // because the destination host was unreachable.
- //
- // This maps to the ICMPv4 Destination Ureachable Code 1 error; see
- // RFC 791's Destination Unreachable Message section (page 4) for more
- // details.
- ControlNoRoute
- ControlPacketTooBig
- ControlPortUnreachable
- ControlUnknown
-)
-
// NetworkPacketInfo holds information about a network layer packet.
type NetworkPacketInfo struct {
// LocalAddressBroadcast is true if the packet's local address is a broadcast
@@ -81,6 +56,39 @@ type NetworkPacketInfo struct {
LocalAddressBroadcast bool
}
+// TransportErrorKind enumerates error types that are handled by the transport
+// layer.
+type TransportErrorKind int
+
+const (
+ // PacketTooBigTransportError indicates that a packet did not reach its
+ // destination because a link on the path to the destination had an MTU that
+ // was too small to carry the packet.
+ PacketTooBigTransportError TransportErrorKind = iota
+
+ // DestinationHostUnreachableTransportError indicates that the destination
+ // host was unreachable.
+ DestinationHostUnreachableTransportError
+
+ // DestinationPortUnreachableTransportError indicates that a packet reached
+ // the destination host, but the transport protocol was not active on the
+ // destination port.
+ DestinationPortUnreachableTransportError
+
+ // DestinationNetworkUnreachableTransportError indicates that the destination
+ // network was unreachable.
+ DestinationNetworkUnreachableTransportError
+)
+
+// TransportError is a marker interface for errors that may be handled by the
+// transport layer.
+type TransportError interface {
+ tcpip.SockErrorCause
+
+ // Kind returns the type of the transport error.
+ Kind() TransportErrorKind
+}
+
// TransportEndpoint is the interface that needs to be implemented by transport
// protocol (e.g., tcp, udp) endpoints that can handle packets.
type TransportEndpoint interface {
@@ -93,10 +101,10 @@ type TransportEndpoint interface {
// HandlePacket takes ownership of the packet.
HandlePacket(TransportEndpointID, *PacketBuffer)
- // HandleControlPacket is called by the stack when new control (e.g.
- // ICMP) packets arrive to this transport endpoint.
- // HandleControlPacket takes ownership of pkt.
- HandleControlPacket(typ ControlType, extra uint32, pkt *PacketBuffer)
+ // HandleError is called when the transport endpoint receives an error.
+ //
+ // HandleError takes ownership of the packet buffer.
+ HandleError(TransportError, *PacketBuffer)
// Abort initiates an expedited endpoint teardown. It puts the endpoint
// in a closed state and frees all resources associated with it. This
@@ -248,14 +256,11 @@ type TransportDispatcher interface {
// DeliverTransportPacket takes ownership of the packet.
DeliverTransportPacket(tcpip.TransportProtocolNumber, *PacketBuffer) TransportPacketDisposition
- // DeliverTransportControlPacket delivers control packets to the
- // appropriate transport protocol endpoint.
- //
- // pkt.NetworkHeader must be set before calling
- // DeliverTransportControlPacket.
+ // DeliverTransportError delivers an error to the appropriate transport
+ // endpoint.
//
- // DeliverTransportControlPacket takes ownership of pkt.
- DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer)
+ // DeliverTransportError takes ownership of the packet buffer.
+ DeliverTransportError(local, remote tcpip.Address, _ tcpip.NetworkProtocolNumber, _ tcpip.TransportProtocolNumber, _ TransportError, _ *PacketBuffer)
}
// PacketLooping specifies where an outbound packet should be sent.
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 57ad412a1..a51d758d0 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -458,6 +458,18 @@ type Stack struct {
// receiveBufferSize holds the min/default/max receive buffer sizes for
// endpoints other than TCP.
receiveBufferSize ReceiveBufferSizeOption
+
+ // tcpInvalidRateLimit is the maximal rate for sending duplicate
+ // acknowledgements in response to incoming TCP packets that are for an existing
+ // connection but that are invalid due to any of the following reasons:
+ //
+ // a) out-of-window sequence number.
+ // b) out-of-window acknowledgement number.
+ // c) PAWS check failure (when implemented).
+ //
+ // This is required to prevent potential ACK loops.
+ // Setting this to 0 will disable all rate limiting.
+ tcpInvalidRateLimit time.Duration
}
// UniqueID is an abstract generator of unique identifiers.
@@ -668,6 +680,7 @@ func New(opts Options) *Stack {
Default: DefaultBufferSize,
Max: DefaultMaxBufferSize,
},
+ tcpInvalidRateLimit: defaultTCPInvalidRateLimit,
}
// Add specified network protocols.
diff --git a/pkg/tcpip/stack/stack_options.go b/pkg/tcpip/stack/stack_options.go
index 8d9b20b7e..3066f4ffd 100644
--- a/pkg/tcpip/stack/stack_options.go
+++ b/pkg/tcpip/stack/stack_options.go
@@ -15,6 +15,8 @@
package stack
import (
+ "time"
+
"gvisor.dev/gvisor/pkg/tcpip"
)
@@ -29,6 +31,10 @@ const (
// DefaultMaxBufferSize is the default maximum permitted size of a
// send/receive buffer.
DefaultMaxBufferSize = 4 << 20 // 4 MiB
+
+ // defaultTCPInvalidRateLimit is the default value for
+ // stack.TCPInvalidRateLimit.
+ defaultTCPInvalidRateLimit = 500 * time.Millisecond
)
// ReceiveBufferSizeOption is used by stack.(Stack*).Option/SetOption to
@@ -39,6 +45,10 @@ type ReceiveBufferSizeOption struct {
Max int
}
+// TCPInvalidRateLimitOption is used by stack.(Stack*).Option/SetOption to get/set
+// stack.tcpInvalidRateLimit.
+type TCPInvalidRateLimitOption time.Duration
+
// SetOption allows setting stack wide options.
func (s *Stack) SetOption(option interface{}) tcpip.Error {
switch v := option.(type) {
@@ -74,6 +84,15 @@ func (s *Stack) SetOption(option interface{}) tcpip.Error {
s.mu.Unlock()
return nil
+ case TCPInvalidRateLimitOption:
+ if v < 0 {
+ return &tcpip.ErrInvalidOptionValue{}
+ }
+ s.mu.Lock()
+ s.tcpInvalidRateLimit = time.Duration(v)
+ s.mu.Unlock()
+ return nil
+
default:
return &tcpip.ErrUnknownProtocolOption{}
}
@@ -94,6 +113,12 @@ func (s *Stack) Option(option interface{}) tcpip.Error {
s.mu.RUnlock()
return nil
+ case *TCPInvalidRateLimitOption:
+ s.mu.RLock()
+ *v = TCPInvalidRateLimitOption(s.tcpInvalidRateLimit)
+ s.mu.RUnlock()
+ return nil
+
default:
return &tcpip.ErrUnknownProtocolOption{}
}
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 375cd3080..b641a4aaa 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -138,12 +138,15 @@ func (f *fakeNetworkEndpoint) HandlePacket(pkt *stack.PacketBuffer) {
return
}
pkt.Data.TrimFront(fakeNetHeaderLen)
- f.dispatcher.DeliverTransportControlPacket(
+ f.dispatcher.DeliverTransportError(
tcpip.Address(nb[srcAddrOffset:srcAddrOffset+1]),
tcpip.Address(nb[dstAddrOffset:dstAddrOffset+1]),
fakeNetNumber,
tcpip.TransportProtocolNumber(nb[protocolNumberOffset]),
- stack.ControlPortUnreachable, 0, pkt)
+ // Nothing checks the error.
+ nil, /* transport error */
+ pkt,
+ )
return
}
diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go
index 26eceb804..7d8d0851e 100644
--- a/pkg/tcpip/stack/transport_demuxer.go
+++ b/pkg/tcpip/stack/transport_demuxer.go
@@ -182,9 +182,8 @@ func (epsByNIC *endpointsByNIC) handlePacket(id TransportEndpointID, pkt *Packet
epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
}
-// handleControlPacket delivers a control packet to the transport endpoint
-// identified by id.
-func (epsByNIC *endpointsByNIC) handleControlPacket(n *NIC, id TransportEndpointID, typ ControlType, extra uint32, pkt *PacketBuffer) {
+// handleError delivers an error to the transport endpoint identified by id.
+func (epsByNIC *endpointsByNIC) handleError(n *NIC, id TransportEndpointID, transErr TransportError, pkt *PacketBuffer) {
epsByNIC.mu.RLock()
defer epsByNIC.mu.RUnlock()
@@ -200,7 +199,7 @@ func (epsByNIC *endpointsByNIC) handleControlPacket(n *NIC, id TransportEndpoint
// broadcast like we are doing with handlePacket above?
// multiPortEndpoints are guaranteed to have at least one element.
- selectEndpoint(id, mpep, epsByNIC.seed).HandleControlPacket(typ, extra, pkt)
+ selectEndpoint(id, mpep, epsByNIC.seed).HandleError(transErr, pkt)
}
// registerEndpoint returns true if it succeeds. It fails and returns
@@ -596,9 +595,11 @@ func (d *transportDemuxer) deliverRawPacket(protocol tcpip.TransportProtocolNumb
return foundRaw
}
-// deliverControlPacket attempts to deliver the given control packet. Returns
-// true if it found an endpoint, false otherwise.
-func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer, id TransportEndpointID) bool {
+// deliverError attempts to deliver the given error to the appropriate transport
+// endpoint.
+//
+// Returns true if the error was delivered.
+func (d *transportDemuxer) deliverError(n *NIC, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, transErr TransportError, pkt *PacketBuffer, id TransportEndpointID) bool {
eps, ok := d.protocol[protocolIDs{net, trans}]
if !ok {
return false
@@ -611,7 +612,7 @@ func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtoco
return false
}
- ep.handleControlPacket(n, id, typ, extra, pkt)
+ ep.handleError(n, id, transErr, pkt)
return true
}
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index cf5de747b..bebf4e6b5 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -237,7 +237,7 @@ func (f *fakeTransportEndpoint) HandlePacket(id stack.TransportEndpointID, pkt *
f.acceptQueue = append(f.acceptQueue, ep)
}
-func (f *fakeTransportEndpoint) HandleControlPacket(stack.ControlType, uint32, *stack.PacketBuffer) {
+func (f *fakeTransportEndpoint) HandleError(stack.TransportError, *stack.PacketBuffer) {
// Increment the number of received control packets.
f.proto.controlCount++
}
diff --git a/pkg/tcpip/tests/integration/link_resolution_test.go b/pkg/tcpip/tests/integration/link_resolution_test.go
index b3a5d49d7..f2301a9e6 100644
--- a/pkg/tcpip/tests/integration/link_resolution_test.go
+++ b/pkg/tcpip/tests/integration/link_resolution_test.go
@@ -247,6 +247,14 @@ func TestPing(t *testing.T) {
}
}
+type transportError struct {
+ origin tcpip.SockErrOrigin
+ typ uint8
+ code uint8
+ info uint32
+ kind stack.TransportErrorKind
+}
+
func TestTCPLinkResolutionFailure(t *testing.T) {
const (
host1NICID = 1
@@ -259,6 +267,7 @@ func TestTCPLinkResolutionFailure(t *testing.T) {
remoteAddr tcpip.Address
expectedWriteErr tcpip.Error
sockError tcpip.SockError
+ transErr transportError
}{
{
name: "IPv4 with resolvable remote",
@@ -278,10 +287,7 @@ func TestTCPLinkResolutionFailure(t *testing.T) {
remoteAddr: ipv4Addr3.AddressWithPrefix.Address,
expectedWriteErr: &tcpip.ErrNoRoute{},
sockError: tcpip.SockError{
- Err: &tcpip.ErrNoRoute{},
- ErrType: byte(header.ICMPv4DstUnreachable),
- ErrCode: byte(header.ICMPv4HostUnreachable),
- ErrOrigin: tcpip.SockExtErrorOriginICMP,
+ Err: &tcpip.ErrNoRoute{},
Dst: tcpip.FullAddress{
NIC: host1NICID,
Addr: ipv4Addr3.AddressWithPrefix.Address,
@@ -293,6 +299,12 @@ func TestTCPLinkResolutionFailure(t *testing.T) {
},
NetProto: ipv4.ProtocolNumber,
},
+ transErr: transportError{
+ origin: tcpip.SockExtErrorOriginICMP,
+ typ: uint8(header.ICMPv4DstUnreachable),
+ code: uint8(header.ICMPv4HostUnreachable),
+ kind: stack.DestinationHostUnreachableTransportError,
+ },
},
{
name: "IPv6 without resolvable remote",
@@ -300,10 +312,7 @@ func TestTCPLinkResolutionFailure(t *testing.T) {
remoteAddr: ipv6Addr3.AddressWithPrefix.Address,
expectedWriteErr: &tcpip.ErrNoRoute{},
sockError: tcpip.SockError{
- Err: &tcpip.ErrNoRoute{},
- ErrType: byte(header.ICMPv6DstUnreachable),
- ErrCode: byte(header.ICMPv6AddressUnreachable),
- ErrOrigin: tcpip.SockExtErrorOriginICMP6,
+ Err: &tcpip.ErrNoRoute{},
Dst: tcpip.FullAddress{
NIC: host1NICID,
Addr: ipv6Addr3.AddressWithPrefix.Address,
@@ -315,6 +324,12 @@ func TestTCPLinkResolutionFailure(t *testing.T) {
},
NetProto: ipv6.ProtocolNumber,
},
+ transErr: transportError{
+ origin: tcpip.SockExtErrorOriginICMP6,
+ typ: uint8(header.ICMPv6DstUnreachable),
+ code: uint8(header.ICMPv6AddressUnreachable),
+ kind: stack.DestinationHostUnreachableTransportError,
+ },
},
}
@@ -393,9 +408,12 @@ func TestTCPLinkResolutionFailure(t *testing.T) {
// are pre defined so we can simply compare pointers.
return a == b
}),
- // Ignore the payload since we do not know the TCP seq/ack numbers.
checker.IgnoreCmpPath(
+ // Ignore the payload since we do not know the TCP seq/ack numbers.
"Payload",
+ // Ignore the cause since we will compare its properties separately
+ // since the concrete type of the cause is unknown.
+ "Cause",
),
}
@@ -407,6 +425,24 @@ func TestTCPLinkResolutionFailure(t *testing.T) {
if diff := cmp.Diff(&test.sockError, sockErr, sockErrCmpOpts...); diff != "" {
t.Errorf("socket error mismatch (-want +got):\n%s", diff)
}
+
+ transErr, ok := sockErr.Cause.(stack.TransportError)
+ if !ok {
+ t.Fatalf("socket error cause is not a transport error; cause = %#v", sockErr.Cause)
+ }
+ if diff := cmp.Diff(
+ test.transErr,
+ transportError{
+ origin: transErr.Origin(),
+ typ: transErr.Type(),
+ code: transErr.Code(),
+ info: transErr.Info(),
+ kind: transErr.Kind(),
+ },
+ cmp.AllowUnexported(transportError{}),
+ ); diff != "" {
+ t.Errorf("socket error mismatch (-want +got):\n%s", diff)
+ }
})
}
}
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index 3cf05520d..f5e1a6e45 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -778,9 +778,8 @@ func (e *endpoint) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketB
}
}
-// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
-func (e *endpoint) HandleControlPacket(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
-}
+// HandleError implements stack.TransportEndpoint.
+func (*endpoint) HandleError(stack.TransportError, *stack.PacketBuffer) {}
// State implements tcpip.Endpoint.State. The ICMP endpoint currently doesn't
// expose internal socket state.
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 4695b66d6..34a631b53 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -333,7 +333,9 @@ func (h *handshake) synRcvdState(s *segment) tcpip.Error {
// number and "After sending the acknowledgment, drop the unacceptable
// segment and return."
if !s.sequenceNumber.InWindow(h.ackNum, h.rcvWnd) {
- h.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck, h.iss+1, h.ackNum, h.rcvWnd)
+ if h.ep.allowOutOfWindowAck() {
+ h.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck, h.iss+1, h.ackNum, h.rcvWnd)
+ }
return nil
}
@@ -1185,8 +1187,7 @@ func (e *endpoint) handleSegment(s *segment) (cont bool, err tcpip.Error) {
// endpoint MUST terminate its connection. The local TCP endpoint
// should then rely on SYN retransmission from the remote end to
// re-establish the connection.
-
- e.snd.sendAck()
+ e.snd.maybeSendOutOfWindowAck(s)
} else if s.flagIsSet(header.TCPFlagAck) {
// Patch the window size in the segment according to the
// send window scale.
diff --git a/pkg/tcpip/transport/tcp/cubic.go b/pkg/tcpip/transport/tcp/cubic.go
index 7b1f5e763..1975f1a44 100644
--- a/pkg/tcpip/transport/tcp/cubic.go
+++ b/pkg/tcpip/transport/tcp/cubic.go
@@ -178,8 +178,8 @@ func (c *cubicState) getCwnd(packetsAcked, sndCwnd int, srtt time.Duration) int
return int(cwnd)
}
-// HandleNDupAcks implements congestionControl.HandleNDupAcks.
-func (c *cubicState) HandleNDupAcks() {
+// HandleLossDetected implements congestionControl.HandleLossDetected.
+func (c *cubicState) HandleLossDetected() {
// See: https://tools.ietf.org/html/rfc8312#section-4.5
c.numCongestionEvents++
c.t = time.Now()
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 6e4e26c39..4e5a6089f 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -688,6 +688,10 @@ type endpoint struct {
// ops is used to get socket level options.
ops tcpip.SocketOptions
+
+ // lastOutOfWindowAckTime is the time at which the an ACK was sent in response
+ // to an out of window segment being received by this endpoint.
+ lastOutOfWindowAckTime time.Time `state:".(unixTime)"`
}
// UniqueID implements stack.TransportEndpoint.UniqueID.
@@ -2683,7 +2687,7 @@ func (e *endpoint) enqueueSegment(s *segment) bool {
return true
}
-func (e *endpoint) onICMPError(err tcpip.Error, errType byte, errCode byte, extra uint32, pkt *stack.PacketBuffer) {
+func (e *endpoint) onICMPError(err tcpip.Error, transErr stack.TransportError, pkt *stack.PacketBuffer) {
// Update last error first.
e.lastErrorMu.Lock()
e.lastError = err
@@ -2692,11 +2696,8 @@ func (e *endpoint) onICMPError(err tcpip.Error, errType byte, errCode byte, extr
// Update the error queue if IP_RECVERR is enabled.
if e.SocketOptions().GetRecvError() {
e.SocketOptions().QueueErr(&tcpip.SockError{
- Err: err,
- ErrOrigin: header.ICMPOriginFromNetProto(pkt.NetworkProtocolNumber),
- ErrType: errType,
- ErrCode: errCode,
- ErrInfo: extra,
+ Err: err,
+ Cause: transErr,
// Linux passes the payload with the TCP header. We don't know if the TCP
// header even exists, it may not for fragmented packets.
Payload: pkt.Data.ToView(),
@@ -2718,27 +2719,26 @@ func (e *endpoint) onICMPError(err tcpip.Error, errType byte, errCode byte, extr
e.notifyProtocolGoroutine(notifyError)
}
-// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
-func (e *endpoint) HandleControlPacket(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
- switch typ {
- case stack.ControlPacketTooBig:
+// HandleError implements stack.TransportEndpoint.
+func (e *endpoint) HandleError(transErr stack.TransportError, pkt *stack.PacketBuffer) {
+ handlePacketTooBig := func(mtu uint32) {
e.sndBufMu.Lock()
e.packetTooBigCount++
- if v := int(extra); v < e.sndMTU {
+ if v := int(mtu); v < e.sndMTU {
e.sndMTU = v
}
e.sndBufMu.Unlock()
-
e.notifyProtocolGoroutine(notifyMTUChanged)
+ }
- case stack.ControlNoRoute:
- e.onICMPError(&tcpip.ErrNoRoute{}, byte(header.ICMPv4DstUnreachable), byte(header.ICMPv4HostUnreachable), extra, pkt)
-
- case stack.ControlAddressUnreachable:
- e.onICMPError(&tcpip.ErrNoRoute{}, byte(header.ICMPv6DstUnreachable), byte(header.ICMPv6AddressUnreachable), extra, pkt)
-
- case stack.ControlNetworkUnreachable:
- e.onICMPError(&tcpip.ErrNetworkUnreachable{}, byte(header.ICMPv6DstUnreachable), byte(header.ICMPv6NetworkUnreachable), extra, pkt)
+ // TODO(gvisor.dev/issues/5270): Handle all transport errors.
+ switch transErr.Kind() {
+ case stack.PacketTooBigTransportError:
+ handlePacketTooBig(transErr.Info())
+ case stack.DestinationHostUnreachableTransportError:
+ e.onICMPError(&tcpip.ErrNoRoute{}, transErr, pkt)
+ case stack.DestinationNetworkUnreachableTransportError:
+ e.onICMPError(&tcpip.ErrNetworkUnreachable{}, transErr, pkt)
}
}
@@ -3129,3 +3129,19 @@ func GetTCPSendBufferLimits(s tcpip.StackHandler) tcpip.SendBufferSizeOption {
Max: ss.Max,
}
}
+
+// allowOutOfWindowAck returns true if an out-of-window ACK can be sent now.
+func (e *endpoint) allowOutOfWindowAck() bool {
+ var limit stack.TCPInvalidRateLimitOption
+ if err := e.stack.Option(&limit); err != nil {
+ panic(fmt.Sprintf("e.stack.Option(%+v) failed with error: %s", limit, err))
+ }
+
+ now := time.Now()
+ if now.Sub(e.lastOutOfWindowAckTime) < time.Duration(limit) {
+ return false
+ }
+
+ e.lastOutOfWindowAckTime = now
+ return true
+}
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index c21dbc682..e4368026f 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -308,6 +308,16 @@ func (e *endpoint) loadRecentTSTime(unix unixTime) {
e.recentTSTime = time.Unix(unix.second, unix.nano)
}
+// saveLastOutOfWindowAckTime is invoked by stateify.
+func (e *endpoint) saveLastOutOfWindowAckTime() unixTime {
+ return unixTime{e.lastOutOfWindowAckTime.Unix(), e.lastOutOfWindowAckTime.UnixNano()}
+}
+
+// loadLastOutOfWindowAckTime is invoked by stateify.
+func (e *endpoint) loadLastOutOfWindowAckTime(unix unixTime) {
+ e.lastOutOfWindowAckTime = time.Unix(unix.second, unix.nano)
+}
+
// saveMeasureTime is invoked by stateify.
func (r *rcvBufAutoTuneParams) saveMeasureTime() unixTime {
return unixTime{r.measureTime.Unix(), r.measureTime.UnixNano()}
diff --git a/pkg/tcpip/transport/tcp/rack.go b/pkg/tcpip/transport/tcp/rack.go
index d85cb405a..e862f159e 100644
--- a/pkg/tcpip/transport/tcp/rack.go
+++ b/pkg/tcpip/transport/tcp/rack.go
@@ -301,7 +301,7 @@ func (s *sender) detectTLPRecovery(ack seqnum.Value, rcvdSeg *segment) {
// Step 2. Either the original packet or the retransmission (in the
// form of a probe) was lost. Invoke a congestion control response
// equivalent to fast recovery.
- s.cc.HandleNDupAcks()
+ s.cc.HandleLossDetected()
s.enterRecovery()
s.leaveRecovery()
}
diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index 7a7c402c4..a5c82b8fa 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -385,7 +385,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo
// fails, we ignore the packet:
// https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L5591
if r.ep.snd.sndNxt.LessThan(s.ackNumber) {
- r.ep.snd.sendAck()
+ r.ep.snd.maybeSendOutOfWindowAck(s)
return true, nil
}
@@ -454,7 +454,7 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err tcpip.Error) {
// send an ACK and stop further processing of the segment.
// This is according to RFC 793, page 68.
if !r.acceptable(segSeq, segLen) {
- r.ep.snd.sendAck()
+ r.ep.snd.maybeSendOutOfWindowAck(s)
return true, nil
}
diff --git a/pkg/tcpip/transport/tcp/reno.go b/pkg/tcpip/transport/tcp/reno.go
index f83ebc717..ff39780a5 100644
--- a/pkg/tcpip/transport/tcp/reno.go
+++ b/pkg/tcpip/transport/tcp/reno.go
@@ -79,10 +79,10 @@ func (r *renoState) Update(packetsAcked int) {
r.updateCongestionAvoidance(packetsAcked)
}
-// HandleNDupAcks implements congestionControl.HandleNDupAcks.
-func (r *renoState) HandleNDupAcks() {
- // A retransmit was triggered due to nDupAckThreshold
- // being hit. Reduce our slow start threshold.
+// HandleLossDetected implements congestionControl.HandleLossDetected.
+func (r *renoState) HandleLossDetected() {
+ // A retransmit was triggered due to nDupAckThreshold or when RACK
+ // detected loss. Reduce our slow start threshold.
r.reduceSlowStartThreshold()
}
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index dfc8fd248..463a259b7 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -51,9 +51,10 @@ const (
// congestionControl is an interface that must be implemented by any supported
// congestion control algorithm.
type congestionControl interface {
- // HandleNDupAcks is invoked when sender.dupAckCount >= nDupAckThreshold
- // just before entering fast retransmit.
- HandleNDupAcks()
+ // HandleLossDetected is invoked when the loss is detected by RACK or
+ // sender.dupAckCount >= nDupAckThreshold just before entering fast
+ // retransmit.
+ HandleLossDetected()
// HandleRTOExpired is invoked when the retransmit timer expires.
HandleRTOExpired()
@@ -1152,7 +1153,7 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) {
s.dupAckCount = 0
return false
}
- s.cc.HandleNDupAcks()
+ s.cc.HandleLossDetected()
s.enterRecovery()
s.dupAckCount = 0
return true
@@ -1548,3 +1549,13 @@ func (s *sender) sendSegmentFromView(data buffer.VectorisedView, flags byte, seq
return s.ep.sendRaw(data, flags, seq, rcvNxt, rcvWnd)
}
+
+// maybeSendOutOfWindowAck sends an ACK if we are not being rate limited
+// currently.
+func (s *sender) maybeSendOutOfWindowAck(seg *segment) {
+ // Data packets are unlikely to be part of an ACK loop. So always send
+ // an ACK for a packet w/ data.
+ if seg.payloadSize() > 0 || s.ep.allowOutOfWindowAck() {
+ s.sendAck()
+ }
+}
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index da2730e27..cd3c4a027 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -6302,6 +6302,13 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
// Enable Auto-tuning.
stk := c.Stack()
+ // Disable out of window rate limiting for this test by setting it to 0 as we
+ // use out of window ACKs to measure the advertised window.
+ var tcpInvalidRateLimit stack.TCPInvalidRateLimitOption
+ if err := stk.SetOption(tcpInvalidRateLimit); err != nil {
+ t.Fatalf("e.stack.SetOption(%#v) = %s", tcpInvalidRateLimit, err)
+ }
+
const receiveBufferSize = 80 << 10 // 80KB.
const maxReceiveBufferSize = receiveBufferSize * 10
{
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 31a5ddce9..afd8f4d39 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -1322,7 +1322,7 @@ func (e *endpoint) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketB
}
}
-func (e *endpoint) onICMPError(err tcpip.Error, errType byte, errCode byte, extra uint32, pkt *stack.PacketBuffer) {
+func (e *endpoint) onICMPError(err tcpip.Error, transErr stack.TransportError, pkt *stack.PacketBuffer) {
// Update last error first.
e.lastErrorMu.Lock()
e.lastError = err
@@ -1338,12 +1338,9 @@ func (e *endpoint) onICMPError(err tcpip.Error, errType byte, errCode byte, extr
}
e.SocketOptions().QueueErr(&tcpip.SockError{
- Err: err,
- ErrOrigin: header.ICMPOriginFromNetProto(pkt.NetworkProtocolNumber),
- ErrType: errType,
- ErrCode: errCode,
- ErrInfo: extra,
- Payload: payload,
+ Err: err,
+ Cause: transErr,
+ Payload: payload,
Dst: tcpip.FullAddress{
NIC: pkt.NICID,
Addr: e.ID.RemoteAddress,
@@ -1362,24 +1359,13 @@ func (e *endpoint) onICMPError(err tcpip.Error, errType byte, errCode byte, extr
e.waiterQueue.Notify(waiter.EventErr)
}
-// HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
-func (e *endpoint) HandleControlPacket(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
- if typ == stack.ControlPortUnreachable {
+// HandleError implements stack.TransportEndpoint.
+func (e *endpoint) HandleError(transErr stack.TransportError, pkt *stack.PacketBuffer) {
+ // TODO(gvisor.dev/issues/5270): Handle all transport errors.
+ switch transErr.Kind() {
+ case stack.DestinationPortUnreachableTransportError:
if e.EndpointState() == StateConnected {
- var errType byte
- var errCode byte
- switch pkt.NetworkProtocolNumber {
- case header.IPv4ProtocolNumber:
- errType = byte(header.ICMPv4DstUnreachable)
- errCode = byte(header.ICMPv4PortUnreachable)
- case header.IPv6ProtocolNumber:
- errType = byte(header.ICMPv6DstUnreachable)
- errCode = byte(header.ICMPv6PortUnreachable)
- default:
- panic(fmt.Sprintf("unsupported net proto for infering ICMP type and code: %d", pkt.NetworkProtocolNumber))
- }
- e.onICMPError(&tcpip.ErrConnectionRefused{}, errType, errCode, extra, pkt)
- return
+ e.onICMPError(&tcpip.ErrConnectionRefused{}, transErr, pkt)
}
}
}