summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTony Gong <gongt@google.com>2020-07-07 16:13:21 -0700
committergVisor bot <gvisor-bot@google.com>2020-07-07 16:14:49 -0700
commit76c7bc51b7b02c4ba83c0a064c3629bb5ee91340 (patch)
tree48c1525ebbb54fc47a3cbd46c4e3efee957300aa
parent7e4d2d63eef3f643222181b3a227bfc7ebe44db2 (diff)
Set IPv4 ID on all non-atomic datagrams
RFC 6864 imposes various restrictions on the uniqueness of the IPv4 Identification field for non-atomic datagrams, defined as an IP datagram that either can be fragmented (DF=0) or is already a fragment (MF=1 or positive fragment offset). In order to be compliant, the ID field is assigned for all non-atomic datagrams. Add a TCP unit test that induces retransmissions and checks that the IPv4 ID field is unique every time. Add basic handling of the IP_MTU_DISCOVER socket option so that the option can be used to disable PMTU discovery, effectively setting DF=0. Attempting to set the sockopt to anything other than disabled will fail because PMTU discovery is currently not implemented, and the default behavior matches that of disabled. PiperOrigin-RevId: 320081842
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go21
-rw-r--r--pkg/tcpip/tcpip.go25
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go12
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go57
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go11
-rw-r--r--test/packetimpact/tests/BUILD2
-rw-r--r--test/syscalls/linux/raw_socket_hdrincl.cc51
7 files changed, 117 insertions, 62 deletions
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 7e9f16c90..b1776e5ee 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -225,12 +225,10 @@ func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int,
func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadSize int, params stack.NetworkHeaderParams) header.IPv4 {
ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
length := uint16(hdr.UsedLength() + payloadSize)
- id := uint32(0)
- if length > header.IPv4MaximumHeaderSize+8 {
- // Packets of 68 bytes or less are required by RFC 791 to not be
- // fragmented, so we only assign ids to larger packets.
- id = atomic.AddUint32(&e.protocol.ids[hashRoute(r, params.Protocol, e.protocol.hashIV)%buckets], 1)
- }
+ // RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic
+ // datagrams. Since the DF bit is never being set here, all datagrams
+ // are non-atomic and need an ID.
+ id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, params.Protocol, e.protocol.hashIV)%buckets], 1)
ip.Encode(&header.IPv4Fields{
IHL: header.IPv4MinimumSize,
TotalLength: length,
@@ -376,13 +374,12 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
// Set the packet ID when zero.
if ip.ID() == 0 {
- id := uint32(0)
- if pkt.Data.Size() > header.IPv4MaximumHeaderSize+8 {
- // Packets of 68 bytes or less are required by RFC 791 to not be
- // fragmented, so we only assign ids to larger packets.
- id = atomic.AddUint32(&e.protocol.ids[hashRoute(r, 0 /* protocol */, e.protocol.hashIV)%buckets], 1)
+ // RFC 6864 section 4.3 mandates uniqueness of ID values for
+ // non-atomic datagrams, so assign an ID to all such datagrams
+ // according to the definition given in RFC 6864 section 4.
+ if ip.Flags()&header.IPv4FlagDontFragment == 0 || ip.Flags()&header.IPv4FlagMoreFragments != 0 || ip.FragmentOffset() > 0 {
+ ip.SetID(uint16(atomic.AddUint32(&e.protocol.ids[hashRoute(r, 0 /* protocol */, e.protocol.hashIV)%buckets], 1)))
}
- ip.SetID(uint16(id))
}
// Always set the checksum.
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 2be1c107a..2f9872dc6 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -673,6 +673,13 @@ const (
// TCP_MAXSEG option.
MaxSegOption
+ // MTUDiscoverOption is used to set/get the path MTU discovery setting.
+ //
+ // NOTE: Setting this option to any other value than PMTUDiscoveryDont
+ // is not supported and will fail as such, and getting this option will
+ // always return PMTUDiscoveryDont.
+ MTUDiscoverOption
+
// MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control
// the default TTL value for multicast messages. The default is 1.
MulticastTTLOption
@@ -714,6 +721,24 @@ const (
TCPWindowClampOption
)
+const (
+ // PMTUDiscoveryWant is a setting of the MTUDiscoverOption to use
+ // per-route settings.
+ PMTUDiscoveryWant int = iota
+
+ // PMTUDiscoveryDont is a setting of the MTUDiscoverOption to disable
+ // path MTU discovery.
+ PMTUDiscoveryDont
+
+ // PMTUDiscoveryDo is a setting of the MTUDiscoverOption to always do
+ // path MTU discovery.
+ PMTUDiscoveryDo
+
+ // PMTUDiscoveryProbe is a setting of the MTUDiscoverOption to set DF
+ // but ignore path MTU.
+ PMTUDiscoveryProbe
+)
+
// ErrorOption is used in GetSockOpt to specify that the last error reported by
// the endpoint should be cleared and returned.
type ErrorOption struct{}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index bd3ec5a8d..caac6ef57 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1589,6 +1589,13 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
e.UnlockUser()
e.notifyProtocolGoroutine(notifyMSSChanged)
+ case tcpip.MTUDiscoverOption:
+ // Return not supported if attempting to set this option to
+ // anything other than path MTU discovery disabled.
+ if v != tcpip.PMTUDiscoveryDont {
+ return tcpip.ErrNotSupported
+ }
+
case tcpip.ReceiveBufferSizeOption:
// Make sure the receive buffer size is within the min and max
// allowed.
@@ -1896,6 +1903,11 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
v := header.TCPDefaultMSS
return v, nil
+ case tcpip.MTUDiscoverOption:
+ // Always return the path MTU discovery disabled setting since
+ // it's the only one supported.
+ return tcpip.PMTUDiscoveryDont, nil
+
case tcpip.ReceiveQueueSizeOption:
return e.readyReceiveSize()
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 169adb16b..e67ec42b1 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -3095,6 +3095,63 @@ func TestMaxRTO(t *testing.T) {
}
}
+// TestRetransmitIPv4IDUniqueness tests that the IPv4 Identification field is
+// unique on retransmits.
+func TestRetransmitIPv4IDUniqueness(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ size int
+ }{
+ {"1Byte", 1},
+ {"512Bytes", 512},
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ c := context.New(t, defaultMTU)
+ defer c.Cleanup()
+
+ c.CreateConnected(789 /* iss */, 30000 /* rcvWnd */, -1 /* epRcvBuf */)
+
+ // Disabling PMTU discovery causes all packets sent from this socket to
+ // have DF=0. This needs to be done because the IPv4 ID uniqueness
+ // applies only to non-atomic IPv4 datagrams as defined in RFC 6864
+ // Section 4, and datagrams with DF=0 are non-atomic.
+ if err := c.EP.SetSockOptInt(tcpip.MTUDiscoverOption, tcpip.PMTUDiscoveryDont); err != nil {
+ t.Fatalf("disabling PMTU discovery via sockopt to force DF=0 failed: %s", err)
+ }
+
+ if _, _, err := c.EP.Write(tcpip.SlicePayload(buffer.NewView(tc.size)), tcpip.WriteOptions{}); err != nil {
+ t.Fatalf("Write failed: %s", err)
+ }
+ pkt := c.GetPacket()
+ checker.IPv4(t, pkt,
+ checker.FragmentFlags(0),
+ checker.TCP(
+ checker.DstPort(context.TestPort),
+ checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
+ ),
+ )
+ idSet := map[uint16]struct{}{header.IPv4(pkt).ID(): struct{}{}}
+ // Expect two retransmitted packets, and that all packets received have
+ // unique IPv4 ID values.
+ for i := 0; i <= 2; i++ {
+ pkt := c.GetPacket()
+ checker.IPv4(t, pkt,
+ checker.FragmentFlags(0),
+ checker.TCP(
+ checker.DstPort(context.TestPort),
+ checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
+ ),
+ )
+ id := header.IPv4(pkt).ID()
+ if _, exists := idSet[id]; exists {
+ t.Fatalf("duplicate IPv4 ID=%d found in retransmitted packet", id)
+ }
+ idSet[id] = struct{}{}
+ }
+ })
+ }
+}
+
func TestFinImmediately(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index cae29fbff..0584ec8dc 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -612,6 +612,13 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
switch opt {
+ case tcpip.MTUDiscoverOption:
+ // Return not supported if the value is not disabling path
+ // MTU discovery.
+ if v != tcpip.PMTUDiscoveryDont {
+ return tcpip.ErrNotSupported
+ }
+
case tcpip.MulticastTTLOption:
e.mu.Lock()
e.multicastTTL = uint8(v)
@@ -906,6 +913,10 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
e.mu.RUnlock()
return v, nil
+ case tcpip.MTUDiscoverOption:
+ // The only supported setting is path MTU discovery disabled.
+ return tcpip.PMTUDiscoveryDont, nil
+
case tcpip.MulticastTTLOption:
e.mu.Lock()
v := int(e.multicastTTL)
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 85749c559..3ecbe83eb 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -18,8 +18,6 @@ packetimpact_go_test(
packetimpact_go_test(
name = "ipv4_id_uniqueness",
srcs = ["ipv4_id_uniqueness_test.go"],
- # TODO(b/157506701) Fix netstack then remove the line below.
- expect_netstack_failure = True,
deps = [
"//pkg/abi/linux",
"//pkg/tcpip/header",
diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc
index 0a27506aa..16cfc1d75 100644
--- a/test/syscalls/linux/raw_socket_hdrincl.cc
+++ b/test/syscalls/linux/raw_socket_hdrincl.cc
@@ -273,52 +273,7 @@ TEST_F(RawHDRINCL, SendAndReceive) {
// The network stack should have set the source address.
EXPECT_EQ(src.sin_family, AF_INET);
EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK);
- // The packet ID should be 0, as the packet is less than 68 bytes.
- struct iphdr iphdr = {};
- memcpy(&iphdr, recv_buf, sizeof(iphdr));
- EXPECT_EQ(iphdr.id, 0);
-}
-
-// Send and receive a packet with nonzero IP ID.
-TEST_F(RawHDRINCL, SendAndReceiveNonzeroID) {
- int port = 40000;
- if (!IsRunningOnGvisor()) {
- port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE(
- PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false)));
- }
-
- // IPPROTO_RAW sockets are write-only. We'll have to open another socket to
- // read what we write.
- FileDescriptor udp_sock =
- ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
-
- // Construct a packet with an IP header, UDP header, and payload. Make the
- // payload large enough to force an IP ID to be assigned.
- constexpr char kPayload[128] = {};
- char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)];
- ASSERT_TRUE(
- FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload)));
-
- socklen_t addrlen = sizeof(addr_);
- ASSERT_NO_FATAL_FAILURE(sendto(socket_, &packet, sizeof(packet), 0,
- reinterpret_cast<struct sockaddr*>(&addr_),
- addrlen));
-
- // Receive the payload.
- char recv_buf[sizeof(packet)];
- struct sockaddr_in src;
- socklen_t src_size = sizeof(src);
- ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), 0,
- reinterpret_cast<struct sockaddr*>(&src), &src_size),
- SyscallSucceedsWithValue(sizeof(packet)));
- EXPECT_EQ(
- memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr),
- sizeof(kPayload)),
- 0);
- // The network stack should have set the source address.
- EXPECT_EQ(src.sin_family, AF_INET);
- EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK);
- // The packet ID should not be 0, as the packet was more than 68 bytes.
+ // The packet ID should not be 0, as the packet has DF=0.
struct iphdr* iphdr = reinterpret_cast<struct iphdr*>(recv_buf);
EXPECT_NE(iphdr->id, 0);
}
@@ -368,10 +323,10 @@ TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) {
// The network stack should have set the source address.
EXPECT_EQ(src.sin_family, AF_INET);
EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK);
- // The packet ID should be 0, as the packet is less than 68 bytes.
+ // The packet ID should not be 0, as the packet has DF=0.
struct iphdr recv_iphdr = {};
memcpy(&recv_iphdr, recv_buf, sizeof(recv_iphdr));
- EXPECT_EQ(recv_iphdr.id, 0);
+ EXPECT_NE(recv_iphdr.id, 0);
// The destination address should be localhost, not the bad IP we set
// initially.
EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK);