From 29f4b71eb3db3d082735bd4316006d6bcc3230a1 Mon Sep 17 00:00:00 2001 From: Nick Brown Date: Wed, 12 May 2021 16:51:06 -0700 Subject: Send ICMP errors when unable to forward fragmented packets Before this change, we would silently drop packets when the packet was too big to be sent out through the NIC (and, for IPv4 packets, if DF was set). This change brings us into line with RFC 792 (IPv4) and RFC 4443 (IPv6), both of which specify that gateways should return an ICMP error to the sender when the packet can't be fragmented. PiperOrigin-RevId: 373480078 --- pkg/sentry/socket/netstack/netstack.go | 1 + pkg/tcpip/network/internal/ip/errors.go | 12 +- pkg/tcpip/network/internal/ip/stats.go | 9 +- pkg/tcpip/network/ipv4/icmp.go | 22 ++++ pkg/tcpip/network/ipv4/ipv4.go | 29 ++++- pkg/tcpip/network/ipv4/ipv4_test.go | 208 +++++++++++++++++++++++++------- pkg/tcpip/network/ipv6/icmp.go | 78 +++++++++--- pkg/tcpip/network/ipv6/ipv6.go | 29 ++++- pkg/tcpip/network/ipv6/ipv6_test.go | 78 +++++++++--- pkg/tcpip/stack/packet_buffer.go | 7 ++ pkg/tcpip/stack/registration.go | 3 + pkg/tcpip/tcpip.go | 4 + 12 files changed, 392 insertions(+), 88 deletions(-) (limited to 'pkg') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 3fd22f936..0b64a24c3 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -205,6 +205,7 @@ var Metrics = tcpip.Stats{ LinkLocalSource: mustCreateMetric("/netstack/ip/forwarding/link_local_source_address", "Number of IP packets received which could not be forwarded due to a link-local source address."), LinkLocalDestination: mustCreateMetric("/netstack/ip/forwarding/link_local_destination_address", "Number of IP packets received which could not be forwarded due to a link-local destination address."), ExtensionHeaderProblem: mustCreateMetric("/netstack/ip/forwarding/extension_header_problem", "Number of IP packets received which could not be forwarded due to a problem processing their IPv6 extension headers."), + PacketTooBig: mustCreateMetric("/netstack/ip/forwarding/packet_too_big", "Number of IP packets received which could not fit within the outgoing MTU."), Errors: mustCreateMetric("/netstack/ip/forwarding/errors", "Number of IP packets which couldn't be forwarded."), }, }, diff --git a/pkg/tcpip/network/internal/ip/errors.go b/pkg/tcpip/network/internal/ip/errors.go index d3577b377..94f1cd1cb 100644 --- a/pkg/tcpip/network/internal/ip/errors.go +++ b/pkg/tcpip/network/internal/ip/errors.go @@ -58,14 +58,22 @@ func (*ErrLinkLocalDestinationAddress) isForwardingError() {} func (*ErrLinkLocalDestinationAddress) String() string { return "link local destination address" } -// ErrNoRoute indicates the Netstack couldn't find a route for the -// received packet. +// ErrNoRoute indicates that a route for the received packet couldn't be found. type ErrNoRoute struct{} func (*ErrNoRoute) isForwardingError() {} func (*ErrNoRoute) String() string { return "no route" } +// ErrMessageTooLong indicates the packet was too big for the outgoing MTU. +// +// +stateify savable +type ErrMessageTooLong struct{} + +func (*ErrMessageTooLong) isForwardingError() {} + +func (*ErrMessageTooLong) String() string { return "message too long" } + // ErrOther indicates the packet coould not be forwarded for a reason // captured by the contained error. type ErrOther struct { diff --git a/pkg/tcpip/network/internal/ip/stats.go b/pkg/tcpip/network/internal/ip/stats.go index 68b8b550e..444515d40 100644 --- a/pkg/tcpip/network/internal/ip/stats.go +++ b/pkg/tcpip/network/internal/ip/stats.go @@ -38,6 +38,10 @@ type MultiCounterIPForwardingStats struct { // because they contained a link-local destination address. LinkLocalDestination tcpip.MultiCounterStat + // PacketTooBig is the number of IP packets which were dropped because they + // were too big for the outgoing MTU. + PacketTooBig tcpip.MultiCounterStat + // ExtensionHeaderProblem is the number of IP packets which were dropped // because of a problem encountered when processing an IPv6 extension // header. @@ -55,6 +59,7 @@ func (m *MultiCounterIPForwardingStats) Init(a, b *tcpip.IPForwardingStats) { m.LinkLocalSource.Init(a.LinkLocalSource, b.LinkLocalSource) m.LinkLocalDestination.Init(a.LinkLocalDestination, b.LinkLocalDestination) m.ExtensionHeaderProblem.Init(a.ExtensionHeaderProblem, b.ExtensionHeaderProblem) + m.PacketTooBig.Init(a.PacketTooBig, b.PacketTooBig) m.ExhaustedTTL.Init(a.ExhaustedTTL, b.ExhaustedTTL) } @@ -82,8 +87,8 @@ type MultiCounterIPStats struct { // wire. InvalidSourceAddressesReceived tcpip.MultiCounterStat - // PacketsDelivered is the number of incoming IP packets that are - // successfully delivered to the transport layer. + // PacketsDelivered is the number of incoming IP packets successfully + // delivered to the transport layer. PacketsDelivered tcpip.MultiCounterStat // PacketsSent is the number of IP packets sent via WritePacket. diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go index c8ed1ce79..d1a82b584 100644 --- a/pkg/tcpip/network/ipv4/icmp.go +++ b/pkg/tcpip/network/ipv4/icmp.go @@ -387,6 +387,8 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) { // icmpReason is a marker interface for IPv4 specific ICMP errors. type icmpReason interface { isICMPReason() + // isForwarding indicates whether or not the error arose while attempting to + // forward a packet. isForwarding() bool } @@ -463,6 +465,22 @@ func (*icmpReasonNetworkUnreachable) isForwarding() bool { return true } +// icmpReasonFragmentationNeeded is an error where a packet requires +// fragmentation while also having the Don't Fragment flag set, as per RFC 792 +// page 3, Destination Unreachable Message. +type icmpReasonFragmentationNeeded struct{} + +func (*icmpReasonFragmentationNeeded) isICMPReason() {} +func (*icmpReasonFragmentationNeeded) isForwarding() bool { + // If we hit a Don't Fragment error, then we know we are operating as a router. + // As per RFC 792 page 4, Destination Unreachable Message, + // + // Another case is when a datagram must be fragmented to be forwarded by a + // gateway yet the Don't Fragment flag is on. In this case the gateway must + // discard the datagram and may return a destination unreachable message. + return true +} + // returnError takes an error descriptor and generates the appropriate ICMP // error packet for IPv4 and sends it back to the remote device that sent // the problematic packet. It incorporates as much of that packet as @@ -635,6 +653,10 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip icmpHdr.SetType(header.ICMPv4DstUnreachable) icmpHdr.SetCode(header.ICMPv4NetUnreachable) counter = sent.dstUnreachable + case *icmpReasonFragmentationNeeded: + icmpHdr.SetType(header.ICMPv4DstUnreachable) + icmpHdr.SetCode(header.ICMPv4FragmentationNeeded) + counter = sent.dstUnreachable case *icmpReasonTTLExceeded: icmpHdr.SetType(header.ICMPv4TimeExceeded) icmpHdr.SetCode(header.ICMPv4TTLExceeded) diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 4031032d0..aef83e834 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -434,6 +434,12 @@ func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, headerIn } if packetMustBeFragmented(pkt, networkMTU) { + h := header.IPv4(pkt.NetworkHeader().View()) + if h.Flags()&header.IPv4FlagDontFragment != 0 && pkt.NetworkPacketInfo.IsForwardedPacket { + // TODO(gvisor.dev/issue/5919): Handle error condition in which DontFragment + // is set but the packet must be fragmented for the non-forwarding case. + return &tcpip.ErrMessageTooLong{} + } sent, remain, err := e.handleFragments(r, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) tcpip.Error { // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each // fragment one by one using WritePacket() (current strategy) or if we @@ -695,13 +701,28 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError { // spent, the field must be decremented by 1. newHdr.SetTTL(ttl - 1) - if err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ + switch err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ ReserveHeaderBytes: int(r.MaxHeaderLength()), Data: buffer.View(newHdr).ToVectorisedView(), - })); err != nil { + IsForwardedPacket: true, + })); err.(type) { + case nil: + return nil + case *tcpip.ErrMessageTooLong: + // As per RFC 792, page 4, Destination Unreachable: + // + // Another case is when a datagram must be fragmented to be forwarded by a + // gateway yet the Don't Fragment flag is on. In this case the gateway must + // discard the datagram and may return a destination unreachable message. + // + // WriteHeaderIncludedPacket checks for the presence of the Don't Fragment bit + // while sending the packet and returns this error iff fragmentation is + // necessary and the bit is also set. + _ = e.protocol.returnError(&icmpReasonFragmentationNeeded{}, pkt) + return &ip.ErrMessageTooLong{} + default: return &ip.ErrOther{Err: err} } - return nil } // HandlePacket is called by the link layer when new ipv4 packets arrive for @@ -830,6 +851,8 @@ func (e *endpoint) handleValidatedPacket(h header.IPv4, pkt *stack.PacketBuffer) case *ip.ErrParameterProblem: e.protocol.stack.Stats().MalformedRcvdPackets.Increment() stats.ip.MalformedPacketsReceived.Increment() + case *ip.ErrMessageTooLong: + stats.ip.Forwarding.PacketTooBig.Increment() default: panic(fmt.Sprintf("unexpected error %s while trying to forward packet: %#v", err, pkt)) } diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go index 7a7cad04a..3c8a39973 100644 --- a/pkg/tcpip/network/ipv4/ipv4_test.go +++ b/pkg/tcpip/network/ipv4/ipv4_test.go @@ -112,6 +112,10 @@ func TestExcludeBroadcast(t *testing.T) { }) } +type forwardedPacket struct { + fragments []fragmentInfo +} + func TestForwarding(t *testing.T) { const ( nicID1 = 1 @@ -129,6 +133,7 @@ func TestForwarding(t *testing.T) { Address: tcpip.Address(net.ParseIP("11.0.0.1").To4()), PrefixLen: 8, } + linkAddr2 := tcpip.LinkAddress("\x02\x03\x03\x04\x05\x06") remoteIPv4Addr1 := tcpip.Address(net.ParseIP("10.0.0.2").To4()) remoteIPv4Addr2 := tcpip.Address(net.ParseIP("11.0.0.2").To4()) unreachableIPv4Addr := tcpip.Address(net.ParseIP("12.0.0.2").To4()) @@ -141,7 +146,9 @@ func TestForwarding(t *testing.T) { sourceAddr tcpip.Address destAddr tcpip.Address expectErrorICMP bool - expectPacketForwarded bool + ipFlags uint8 + mtu uint32 + payloadLength int options header.IPv4Options forwardedOptions header.IPv4Options icmpType header.ICMPv4Type @@ -149,6 +156,8 @@ func TestForwarding(t *testing.T) { expectPacketUnrouteableError bool expectLinkLocalSourceError bool expectLinkLocalDestError bool + expectPacketForwarded bool + expectedFragmentsForwarded []fragmentInfo }{ { name: "TTL of zero", @@ -158,6 +167,7 @@ func TestForwarding(t *testing.T) { expectErrorICMP: true, icmpType: header.ICMPv4TimeExceeded, icmpCode: header.ICMPv4TTLExceeded, + mtu: ipv4.MaxTotalSize, }, { name: "TTL of one", @@ -165,6 +175,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, expectPacketForwarded: true, + mtu: ipv4.MaxTotalSize, }, { name: "TTL of two", @@ -172,6 +183,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, expectPacketForwarded: true, + mtu: ipv4.MaxTotalSize, }, { name: "Max TTL", @@ -179,6 +191,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, expectPacketForwarded: true, + mtu: ipv4.MaxTotalSize, }, { name: "four EOL options", @@ -186,6 +199,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, expectPacketForwarded: true, + mtu: ipv4.MaxTotalSize, options: header.IPv4Options{0, 0, 0, 0}, forwardedOptions: header.IPv4Options{0, 0, 0, 0}, }, @@ -194,6 +208,7 @@ func TestForwarding(t *testing.T) { TTL: 2, sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, + mtu: ipv4.MaxTotalSize, options: header.IPv4Options{ 68, 12, 13, 0xF1, 192, 168, 1, 12, @@ -208,6 +223,7 @@ func TestForwarding(t *testing.T) { TTL: 2, sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, + mtu: ipv4.MaxTotalSize, options: header.IPv4Options{ 68, 24, 21, 0x00, 1, 2, 3, 4, @@ -231,6 +247,7 @@ func TestForwarding(t *testing.T) { TTL: 2, sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, + mtu: ipv4.MaxTotalSize, options: header.IPv4Options{ 68, 12, 13, 0x11, 192, 168, 1, 12, @@ -254,6 +271,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: unreachableIPv4Addr, expectErrorICMP: true, + mtu: ipv4.MaxTotalSize, icmpType: header.ICMPv4DstUnreachable, icmpCode: header.ICMPv4NetUnreachable, expectPacketUnrouteableError: true, @@ -278,6 +296,51 @@ func TestForwarding(t *testing.T) { destAddr: remoteIPv4Addr2, expectLinkLocalSourceError: true, }, + { + name: "Fragmentation needed and DF set", + TTL: 2, + sourceAddr: remoteIPv4Addr1, + destAddr: remoteIPv4Addr2, + ipFlags: header.IPv4FlagDontFragment, + // We've picked this MTU because it is: + // + // 1) Greater than the minimum MTU that IPv4 hosts are required to process + // (576 bytes). As per RFC 1812, Section 4.3.2.3: + // + // The ICMP datagram SHOULD contain as much of the original datagram as + // possible without the length of the ICMP datagram exceeding 576 bytes. + // + // Therefore, setting an MTU greater than 576 bytes ensures that we can fit a + // complete ICMP packet on the incoming endpoint (and make assertions about + // it). + // + // 2) Less than `ipv4.MaxTotalSize`, which lets us build an IPv4 packet whose + // size exceeds the MTU. + mtu: 1000, + payloadLength: 1004, + expectErrorICMP: true, + icmpType: header.ICMPv4DstUnreachable, + icmpCode: header.ICMPv4FragmentationNeeded, + }, + { + name: "Fragmentation needed and DF not set", + TTL: 2, + sourceAddr: remoteIPv4Addr1, + destAddr: remoteIPv4Addr2, + mtu: 1000, + payloadLength: 1004, + expectPacketForwarded: true, + // Combined, these fragments have length of 1012 octets, which is equal to + // the length of the payload (1004 octets), plus the length of the ICMP + // header (8 octets). + expectedFragmentsForwarded: []fragmentInfo{ + // The first fragment has a length of the greatest multiple of 8 which is + // less than or equal to to `mtu - header.IPv4MinimumSize`. + {offset: 0, payloadSize: uint16(976), more: true}, + // The next fragment holds the rest of the packet. + {offset: uint16(976), payloadSize: 36, more: false}, + }, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { @@ -293,7 +356,7 @@ func TestForwarding(t *testing.T) { clock.Advance(time.Millisecond * randomTimeOffset) // We expect at most a single packet in response to our ICMP Echo Request. - e1 := channel.New(1, ipv4.MaxTotalSize, "") + e1 := channel.New(1, test.mtu, "") if err := s.CreateNIC(nicID1, e1); err != nil { t.Fatalf("CreateNIC(%d, _): %s", nicID1, err) } @@ -302,7 +365,11 @@ func TestForwarding(t *testing.T) { t.Fatalf("AddProtocolAddress(%d, %#v): %s", nicID1, ipv4ProtoAddr1, err) } - e2 := channel.New(1, ipv4.MaxTotalSize, "") + expectedEmittedPacketCount := 1 + if len(test.expectedFragmentsForwarded) > expectedEmittedPacketCount { + expectedEmittedPacketCount = len(test.expectedFragmentsForwarded) + } + e2 := channel.New(expectedEmittedPacketCount, test.mtu, linkAddr2) if err := s.CreateNIC(nicID2, e2); err != nil { t.Fatalf("CreateNIC(%d, _): %s", nicID2, err) } @@ -330,9 +397,11 @@ func TestForwarding(t *testing.T) { if ipHeaderLength > header.IPv4MaximumHeaderSize { t.Fatalf("got ipHeaderLength = %d, want <= %d ", ipHeaderLength, header.IPv4MaximumHeaderSize) } - totalLen := uint16(ipHeaderLength + header.ICMPv4MinimumSize) - hdr := buffer.NewPrependable(int(totalLen)) - icmp := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize)) + icmpHeaderLength := header.ICMPv4MinimumSize + totalLength := ipHeaderLength + icmpHeaderLength + test.payloadLength + hdr := buffer.NewPrependable(totalLength) + hdr.Prepend(test.payloadLength) + icmp := header.ICMPv4(hdr.Prepend(icmpHeaderLength)) icmp.SetIdent(randomIdent) icmp.SetSequence(randomSequence) icmp.SetType(header.ICMPv4Echo) @@ -341,11 +410,12 @@ func TestForwarding(t *testing.T) { icmp.SetChecksum(^header.Checksum(icmp, 0)) ip := header.IPv4(hdr.Prepend(ipHeaderLength)) ip.Encode(&header.IPv4Fields{ - TotalLength: totalLen, + TotalLength: uint16(totalLength), Protocol: uint8(header.ICMPv4ProtocolNumber), TTL: test.TTL, SrcAddr: test.sourceAddr, DstAddr: test.destAddr, + Flags: test.ipFlags, }) if len(test.options) != 0 { ip.SetHeaderLength(uint8(ipHeaderLength)) @@ -360,6 +430,7 @@ func TestForwarding(t *testing.T) { requestPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ Data: hdr.View().ToVectorisedView(), }) + requestPkt.NetworkProtocolNumber = header.IPv4ProtocolNumber e1.InjectInbound(header.IPv4ProtocolNumber, requestPkt) reply, ok := e1.Read() @@ -368,6 +439,18 @@ func TestForwarding(t *testing.T) { t.Fatalf("expected ICMP packet type %d through incoming NIC", test.icmpType) } + // We expect the ICMP packet to contain as much of the original packet as + // possible up to a limit of 576 bytes, split between payload, IP header, + // and ICMP header. + expectedICMPPayloadLength := func() int { + maxICMPPacketLength := header.IPv4MinimumProcessableDatagramSize + maxICMPPayloadLength := maxICMPPacketLength - icmpHeaderLength - ipHeaderLength + if len(hdr.View()) > maxICMPPayloadLength { + return maxICMPPayloadLength + } + return len(hdr.View()) + } + checker.IPv4(t, header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader())), checker.SrcAddr(ipv4Addr1.Address), checker.DstAddr(test.sourceAddr), @@ -376,41 +459,58 @@ func TestForwarding(t *testing.T) { checker.ICMPv4Checksum(), checker.ICMPv4Type(test.icmpType), checker.ICMPv4Code(test.icmpCode), - checker.ICMPv4Payload([]byte(hdr.View())), + checker.ICMPv4Payload([]byte(hdr.View()[0:expectedICMPPayloadLength()])), ), ) - - if n := e2.Drain(); n != 0 { - t.Fatalf("got e2.Drain() = %d, want = 0", n) - } } else if ok { t.Fatalf("expected no ICMP packet through incoming NIC, instead found: %#v", reply) } - reply, ok = e2.Read() if test.expectPacketForwarded { - if !ok { - t.Fatal("expected ICMP Echo packet through outgoing NIC") - } + if len(test.expectedFragmentsForwarded) != 0 { + fragmentedPackets := []*stack.PacketBuffer{} + for i := 0; i < len(test.expectedFragmentsForwarded); i++ { + reply, ok = e2.Read() + if !ok { + t.Fatal("expected ICMP Echo fragment through outgoing NIC") + } + fragmentedPackets = append(fragmentedPackets, reply.Pkt) + } - checker.IPv4(t, header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader())), - checker.SrcAddr(test.sourceAddr), - checker.DstAddr(test.destAddr), - checker.TTL(test.TTL-1), - checker.IPv4Options(test.forwardedOptions), - checker.ICMPv4( - checker.ICMPv4Checksum(), - checker.ICMPv4Type(header.ICMPv4Echo), - checker.ICMPv4Code(header.ICMPv4UnusedCode), - checker.ICMPv4Payload(nil), - ), - ) + // The forwarded packet's TTL will have been decremented. + ipHeader := header.IPv4(requestPkt.NetworkHeader().View()) + ipHeader.SetTTL(ipHeader.TTL() - 1) + + // Forwarded packets have available header bytes equalling the sum of the + // maximum IP header size and the maximum size allocated for link layer + // headers. In this case, no size is allocated for link layer headers. + expectedAvailableHeaderBytes := header.IPv4MaximumHeaderSize + if err := compareFragments(fragmentedPackets, requestPkt, uint32(test.mtu), test.expectedFragmentsForwarded, header.ICMPv4ProtocolNumber, true /* withIPHeader */, expectedAvailableHeaderBytes); err != nil { + t.Error(err) + } + } else { + reply, ok = e2.Read() + if !ok { + t.Fatal("expected ICMP Echo packet through outgoing NIC") + } - if n := e1.Drain(); n != 0 { - t.Fatalf("got e1.Drain() = %d, want = 0", n) + checker.IPv4(t, header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader())), + checker.SrcAddr(test.sourceAddr), + checker.DstAddr(test.destAddr), + checker.TTL(test.TTL-1), + checker.IPv4Options(test.forwardedOptions), + checker.ICMPv4( + checker.ICMPv4Checksum(), + checker.ICMPv4Type(header.ICMPv4Echo), + checker.ICMPv4Code(header.ICMPv4UnusedCode), + checker.ICMPv4Payload(nil), + ), + ) + } + } else { + if reply, ok = e2.Read(); ok { + t.Fatalf("expected no ICMP Echo packet through outgoing NIC, instead found: %#v", reply) } - } else if ok { - t.Fatalf("expected no ICMP Echo packet through outgoing NIC, instead found: %#v", reply) } boolToInt := func(val bool) uint64 { @@ -443,6 +543,10 @@ func TestForwarding(t *testing.T) { if got, want := s.Stats().IP.Forwarding.Errors.Value(), boolToInt(!test.expectPacketForwarded); got != want { t.Errorf("got s.Stats().IP.Forwarding.Errors.Value() = %d, want = %d", got, want) } + + if got, want := s.Stats().IP.Forwarding.PacketTooBig.Value(), boolToInt(test.icmpCode == header.ICMPv4FragmentationNeeded); got != want { + t.Errorf("got s.Stats().IP.Forwarding.PacketTooBig.Value() = %d, want = %d", got, want) + } }) } } @@ -1264,13 +1368,25 @@ func TestIPv4Sanity(t *testing.T) { } } -// comparePayloads compared the contents of all the packets against the contents -// of the source packet. -func compareFragments(packets []*stack.PacketBuffer, sourcePacket *stack.PacketBuffer, mtu uint32, wantFragments []fragmentInfo, proto tcpip.TransportProtocolNumber) error { +// compareFragments compares the contents of a set of fragmented packets against +// the contents of a source packet. +// +// If withIPHeader is set to true, we will validate the fragmented packets' IP +// headers against the source packet's IP header. If set to false, we validate +// the fragmented packets' IP headers against each other. +func compareFragments(packets []*stack.PacketBuffer, sourcePacket *stack.PacketBuffer, mtu uint32, wantFragments []fragmentInfo, proto tcpip.TransportProtocolNumber, withIPHeader bool, expectedAvailableHeaderBytes int) error { // Make a complete array of the sourcePacket packet. - source := header.IPv4(packets[0].NetworkHeader().View()) + var source header.IPv4 vv := buffer.NewVectorisedView(sourcePacket.Size(), sourcePacket.Views()) - source = append(source, vv.ToView()...) + + // If the packet to be fragmented contains an IPv4 header, use that header for + // validating fragment headers. Else, use the header of the first fragment. + if withIPHeader { + source = header.IPv4(vv.ToView()) + } else { + source = header.IPv4(packets[0].NetworkHeader().View()) + source = append(source, vv.ToView()...) + } // Make a copy of the IP header, which will be modified in some fields to make // an expected header. @@ -1293,12 +1409,12 @@ func compareFragments(packets []*stack.PacketBuffer, sourcePacket *stack.PacketB if got := fragmentIPHeader.TransportProtocol(); got != proto { return fmt.Errorf("fragment #%d: got fragmentIPHeader.TransportProtocol() = %d, want = %d", i, got, uint8(proto)) } - if got := packet.AvailableHeaderBytes(); got != extraHeaderReserve { - return fmt.Errorf("fragment #%d: got packet.AvailableHeaderBytes() = %d, want = %d", i, got, extraHeaderReserve) - } if got, want := packet.NetworkProtocolNumber, sourcePacket.NetworkProtocolNumber; got != want { return fmt.Errorf("fragment #%d: got fragment.NetworkProtocolNumber = %d, want = %d", i, got, want) } + if got := packet.AvailableHeaderBytes(); got != expectedAvailableHeaderBytes { + return fmt.Errorf("fragment #%d: got packet.AvailableHeaderBytes() = %d, want = %d", i, got, expectedAvailableHeaderBytes) + } if got, want := fragmentIPHeader.CalculateChecksum(), uint16(0xffff); got != want { return fmt.Errorf("fragment #%d: got ip.CalculateChecksum() = %#x, want = %#x", i, got, want) } @@ -1314,6 +1430,14 @@ func compareFragments(packets []*stack.PacketBuffer, sourcePacket *stack.PacketB sourceCopy.SetTotalLength(wantFragments[i].payloadSize + header.IPv4MinimumSize) sourceCopy.SetChecksum(0) sourceCopy.SetChecksum(^sourceCopy.CalculateChecksum()) + + // If we are validating against the original IP header, we should exclude the + // ID field, which will only be set fo fragmented packets. + if withIPHeader { + fragmentIPHeader.SetID(0) + fragmentIPHeader.SetChecksum(0) + fragmentIPHeader.SetChecksum(^fragmentIPHeader.CalculateChecksum()) + } if diff := cmp.Diff(fragmentIPHeader[:fragmentIPHeader.HeaderLength()], sourceCopy[:sourceCopy.HeaderLength()]); diff != "" { return fmt.Errorf("fragment #%d: fragmentIPHeader mismatch (-want +got):\n%s", i, diff) } @@ -1442,7 +1566,7 @@ func TestFragmentationWritePacket(t *testing.T) { if got := r.Stats().IP.OutgoingPacketErrors.Value(); got != 0 { t.Errorf("got r.Stats().IP.OutgoingPacketErrors.Value() = %d, want = 0", got) } - if err := compareFragments(ep.WrittenPackets, source, ft.mtu, ft.wantFragments, tcp.ProtocolNumber); err != nil { + if err := compareFragments(ep.WrittenPackets, source, ft.mtu, ft.wantFragments, tcp.ProtocolNumber, false /* withIPHeader */, extraHeaderReserve); err != nil { t.Error(err) } }) @@ -1523,7 +1647,7 @@ func TestFragmentationWritePackets(t *testing.T) { } fragments := ep.WrittenPackets[test.insertBefore : len(ft.wantFragments)+test.insertBefore] - if err := compareFragments(fragments, pkt, ft.mtu, ft.wantFragments, tcp.ProtocolNumber); err != nil { + if err := compareFragments(fragments, pkt, ft.mtu, ft.wantFragments, tcp.ProtocolNumber, false /* withIPHeader */, extraHeaderReserve); err != nil { t.Error(err) } }) diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go index 247a07dc2..4051fda07 100644 --- a/pkg/tcpip/network/ipv6/icmp.go +++ b/pkg/tcpip/network/ipv6/icmp.go @@ -955,7 +955,19 @@ func (*endpoint) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bo // icmpReason is a marker interface for IPv6 specific ICMP errors. type icmpReason interface { isICMPReason() + // isForwarding indicates whether or not the error arose while attempting to + // forward a packet. isForwarding() bool + // respondToMulticast indicates whether this error falls under the exception + // outlined by RFC 4443 section 2.4 point e.3 exception 2: + // + // (e.3) A packet destined to an IPv6 multicast address. (There are two + // exceptions to this rule: (1) the Packet Too Big Message (Section 3.2) to + // allow Path MTU discovery to work for IPv6 multicast, and (2) the Parameter + // Problem Message, Code 2 (Section 3.4) reporting an unrecognized IPv6 + // option (see Section 4.2 of [IPv6]) that has the Option Type highest- + // order two bits set to 10). + respondsToMulticast() bool } // icmpReasonParameterProblem is an error during processing of extension headers @@ -963,18 +975,6 @@ type icmpReason interface { type icmpReasonParameterProblem struct { code header.ICMPv6Code - // respondToMulticast indicates that we are sending a packet that falls under - // the exception outlined by RFC 4443 section 2.4 point e.3 exception 2: - // - // (e.3) A packet destined to an IPv6 multicast address. (There are - // two exceptions to this rule: (1) the Packet Too Big Message - // (Section 3.2) to allow Path MTU discovery to work for IPv6 - // multicast, and (2) the Parameter Problem Message, Code 2 - // (Section 3.4) reporting an unrecognized IPv6 option (see - // Section 4.2 of [IPv6]) that has the Option Type highest- - // order two bits set to 10). - respondToMulticast bool - // pointer is defined in the RFC 4443 setion 3.4 which reads: // // Pointer Identifies the octet offset within the invoking packet @@ -985,9 +985,9 @@ type icmpReasonParameterProblem struct { // in the maximum size of an ICMPv6 error message. pointer uint32 - // forwarding indicates that the problem arose while we were trying to forward - // a packet. forwarding bool + + respondToMulticast bool } func (*icmpReasonParameterProblem) isICMPReason() {} @@ -995,6 +995,10 @@ func (p *icmpReasonParameterProblem) isForwarding() bool { return p.forwarding } +func (p *icmpReasonParameterProblem) respondsToMulticast() bool { + return p.respondToMulticast +} + // icmpReasonPortUnreachable is an error where the transport protocol has no // listener and no alternative means to inform the sender. type icmpReasonPortUnreachable struct{} @@ -1005,6 +1009,10 @@ func (*icmpReasonPortUnreachable) isForwarding() bool { return false } +func (*icmpReasonPortUnreachable) respondsToMulticast() bool { + return false +} + // icmpReasonNetUnreachable is an error where no route can be found to the // network of the final destination. type icmpReasonNetUnreachable struct{} @@ -1021,6 +1029,30 @@ func (*icmpReasonNetUnreachable) isForwarding() bool { return true } +func (*icmpReasonNetUnreachable) respondsToMulticast() bool { + return false +} + +// icmpReasonFragmentationNeeded is an error where a packet is to big to be sent +// out through the outgoing MTU, as per RFC 4443 page 9, Packet Too Big Message. +type icmpReasonPacketTooBig struct{} + +func (*icmpReasonPacketTooBig) isICMPReason() {} + +func (*icmpReasonPacketTooBig) isForwarding() bool { + // If we hit a Packet Too Big error, then we know we are operating as a router. + // As per RFC 4443 section 3.2: + // + // A Packet Too Big MUST be sent by a router in response to a packet that it + // cannot forward because the packet is larger than the MTU of the outgoing + // link. + return true +} + +func (*icmpReasonPacketTooBig) respondsToMulticast() bool { + return true +} + // icmpReasonHopLimitExceeded is an error where a packet's hop limit exceeded in // transit to its final destination, as per RFC 4443 section 3.3. type icmpReasonHopLimitExceeded struct{} @@ -1039,6 +1071,10 @@ func (*icmpReasonHopLimitExceeded) isForwarding() bool { return true } +func (*icmpReasonHopLimitExceeded) respondsToMulticast() bool { + return false +} + // icmpReasonReassemblyTimeout is an error where insufficient fragments are // received to complete reassembly of a packet within a configured time after // the reception of the first-arriving fragment of that packet. @@ -1050,6 +1086,10 @@ func (*icmpReasonReassemblyTimeout) isForwarding() bool { return false } +func (*icmpReasonReassemblyTimeout) respondsToMulticast() bool { + return false +} + // returnError takes an error descriptor and generates the appropriate ICMP // error packet for IPv6 and sends it. func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip.Error { @@ -1078,11 +1118,7 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip // Section 4.2 of [IPv6]) that has the Option Type highest- // order two bits set to 10). // - var allowResponseToMulticast bool - if reason, ok := reason.(*icmpReasonParameterProblem); ok { - allowResponseToMulticast = reason.respondToMulticast - } - + allowResponseToMulticast := reason.respondsToMulticast() isOrigDstMulticast := header.IsV6MulticastAddress(origIPHdrDst) if (!allowResponseToMulticast && isOrigDstMulticast) || origIPHdrSrc == header.IPv6Any { return nil @@ -1190,6 +1226,10 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip icmpHdr.SetType(header.ICMPv6DstUnreachable) icmpHdr.SetCode(header.ICMPv6NetworkUnreachable) counter = sent.dstUnreachable + case *icmpReasonPacketTooBig: + icmpHdr.SetType(header.ICMPv6PacketTooBig) + icmpHdr.SetCode(header.ICMPv6UnusedCode) + counter = sent.packetTooBig case *icmpReasonHopLimitExceeded: icmpHdr.SetType(header.ICMPv6TimeExceeded) icmpHdr.SetCode(header.ICMPv6HopLimitExceeded) diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index 029d5f51b..880290b4b 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -761,6 +761,12 @@ func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, protocol } if packetMustBeFragmented(pkt, networkMTU) { + if pkt.NetworkPacketInfo.IsForwardedPacket { + // As per RFC 2460, section 4.5: + // Unlike IPv4, fragmentation in IPv6 is performed only by source nodes, + // not by routers along a packet's delivery path. + return &tcpip.ErrMessageTooLong{} + } sent, remain, err := e.handleFragments(r, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error { // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each // fragment one by one using WritePacket() (current strategy) or if we @@ -950,9 +956,8 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError { switch err.(type) { case nil: case *tcpip.ErrNoRoute, *tcpip.ErrNetworkUnreachable: - // We return the original error rather than the result of returning - // the ICMP packet because the original error is more relevant to - // the caller. + // We return the original error rather than the result of returning the + // ICMP packet because the original error is more relevant to the caller. _ = e.protocol.returnError(&icmpReasonNetUnreachable{}, pkt) return &ip.ErrNoRoute{} default: @@ -971,13 +976,23 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError { // each node that forwards the packet. newHdr.SetHopLimit(hopLimit - 1) - if err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ + switch err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ ReserveHeaderBytes: int(r.MaxHeaderLength()), Data: buffer.View(newHdr).ToVectorisedView(), - })); err != nil { + IsForwardedPacket: true, + })); err.(type) { + case nil: + return nil + case *tcpip.ErrMessageTooLong: + // As per RFC 4443, section 3.2: + // A Packet Too Big MUST be sent by a router in response to a packet that + // it cannot forward because the packet is larger than the MTU of the + // outgoing link. + _ = e.protocol.returnError(&icmpReasonPacketTooBig{}, pkt) + return &ip.ErrMessageTooLong{} + default: return &ip.ErrOther{Err: err} } - return nil } // HandlePacket is called by the link layer when new ipv6 packets arrive for @@ -1091,6 +1106,8 @@ func (e *endpoint) handleValidatedPacket(h header.IPv6, pkt *stack.PacketBuffer) e.stats.ip.Forwarding.Unrouteable.Increment() case *ip.ErrParameterProblem: e.stats.ip.Forwarding.ExtensionHeaderProblem.Increment() + case *ip.ErrMessageTooLong: + e.stats.ip.Forwarding.PacketTooBig.Increment() default: panic(fmt.Sprintf("unexpected error %s while trying to forward packet: %#v", err, pkt)) } diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go index 8ebca735b..faf6a782e 100644 --- a/pkg/tcpip/network/ipv6/ipv6_test.go +++ b/pkg/tcpip/network/ipv6/ipv6_test.go @@ -3018,10 +3018,13 @@ func TestForwarding(t *testing.T) { Address: tcpip.Address(net.ParseIP("11::1").To16()), PrefixLen: 64, } + multicastIPv6Addr := tcpip.AddressWithPrefix{ + Address: tcpip.Address(net.ParseIP("ff00::").To16()), + PrefixLen: 64, + } remoteIPv6Addr1 := tcpip.Address(net.ParseIP("10::2").To16()) remoteIPv6Addr2 := tcpip.Address(net.ParseIP("11::2").To16()) unreachableIPv6Addr := tcpip.Address(net.ParseIP("12::2").To16()) - multicastIPv6Addr := tcpip.Address(net.ParseIP("ff00::").To16()) linkLocalIPv6Addr := tcpip.Address(net.ParseIP("fe80::").To16()) tests := []struct { @@ -3030,6 +3033,7 @@ func TestForwarding(t *testing.T) { TTL uint8 expectErrorICMP bool expectPacketForwarded bool + payloadLength int countUnrouteablePackets uint64 sourceAddr tcpip.Address destAddr tcpip.Address @@ -3090,12 +3094,12 @@ func TestForwarding(t *testing.T) { expectPacketUnrouteableError: true, }, { - name: "Multicast destination", - TTL: 2, - countUnrouteablePackets: 1, - sourceAddr: remoteIPv6Addr1, - destAddr: multicastIPv6Addr, - expectPacketUnrouteableError: true, + name: "Multicast destination", + TTL: 2, + countUnrouteablePackets: 1, + sourceAddr: remoteIPv6Addr1, + destAddr: multicastIPv6Addr.Address, + expectPacketForwarded: true, }, { name: "Link local destination", @@ -3172,7 +3176,7 @@ func TestForwarding(t *testing.T) { name: "Hopbyhop with unknown option discard and send icmp action (multicast)", TTL: 2, sourceAddr: remoteIPv6Addr1, - destAddr: multicastIPv6Addr, + destAddr: multicastIPv6Addr.Address, extHdr: func(nextHdr uint8) ([]byte, uint8, checker.NetworkChecker) { return []byte{ nextHdr, 1, @@ -3215,7 +3219,7 @@ func TestForwarding(t *testing.T) { name: "Hopbyhop with unknown option discard and send icmp action unless multicast dest (multicast)", TTL: 2, sourceAddr: remoteIPv6Addr1, - destAddr: multicastIPv6Addr, + destAddr: multicastIPv6Addr.Address, extHdr: func(nextHdr uint8) ([]byte, uint8, checker.NetworkChecker) { return []byte{ nextHdr, 1, @@ -3263,6 +3267,26 @@ func TestForwarding(t *testing.T) { }, expectExtensionHeaderError: true, }, + { + name: "Can't fragment", + TTL: 2, + payloadLength: header.IPv6MinimumMTU + 1, + expectErrorICMP: true, + sourceAddr: remoteIPv6Addr1, + destAddr: remoteIPv6Addr2, + icmpType: header.ICMPv6PacketTooBig, + icmpCode: header.ICMPv6UnusedCode, + }, + { + name: "Can't fragment multicast", + TTL: 2, + payloadLength: header.IPv6MinimumMTU + 1, + sourceAddr: remoteIPv6Addr1, + destAddr: multicastIPv6Addr.Address, + expectErrorICMP: true, + icmpType: header.ICMPv6PacketTooBig, + icmpCode: header.ICMPv6UnusedCode, + }, } for _, test := range tests { @@ -3299,6 +3323,10 @@ func TestForwarding(t *testing.T) { Destination: ipv6Addr2.Subnet(), NIC: nicID2, }, + { + Destination: multicastIPv6Addr.Subnet(), + NIC: nicID2, + }, }) if err := s.SetForwarding(ProtocolNumber, true); err != nil { @@ -3315,8 +3343,13 @@ func TestForwarding(t *testing.T) { } extHdrLen := len(extHdrBytes) - hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.ICMPv6MinimumSize + extHdrLen) - icmp := header.ICMPv6(hdr.Prepend(header.ICMPv6MinimumSize)) + ipHeaderLength := header.IPv6MinimumSize + icmpHeaderLength := header.ICMPv6MinimumSize + totalLength := ipHeaderLength + icmpHeaderLength + test.payloadLength + extHdrLen + hdr := buffer.NewPrependable(totalLength) + hdr.Prepend(test.payloadLength) + icmp := header.ICMPv6(hdr.Prepend(icmpHeaderLength)) + icmp.SetIdent(randomIdent) icmp.SetSequence(randomSequence) icmp.SetType(header.ICMPv6EchoRequest) @@ -3328,9 +3361,9 @@ func TestForwarding(t *testing.T) { Dst: test.destAddr, })) copy(hdr.Prepend(extHdrLen), extHdrBytes) - ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) + ip := header.IPv6(hdr.Prepend(ipHeaderLength)) ip.Encode(&header.IPv6Fields{ - PayloadLength: header.ICMPv6MinimumSize, + PayloadLength: uint16(header.ICMPv6MinimumSize + test.payloadLength), TransportProtocol: transportProtocol, HopLimit: test.TTL, SrcAddr: test.sourceAddr, @@ -3347,6 +3380,19 @@ func TestForwarding(t *testing.T) { t.Fatalf("expected ICMP packet type %d through incoming NIC", test.icmpType) } + // As per RFC 4443, page 9: + // + // The returned ICMP packet will contain as much of invoking packet + // as possible without the ICMPv6 packet exceeding the minimum IPv6 + // MTU. + expectedICMPPayloadLength := func() int { + maxICMPPayloadLength := header.IPv6MinimumMTU - ipHeaderLength - icmpHeaderLength + if len(hdr.View()) > maxICMPPayloadLength { + return maxICMPPayloadLength + } + return len(hdr.View()) + } + checker.IPv6(t, header.IPv6(stack.PayloadSince(reply.Pkt.NetworkHeader())), checker.SrcAddr(ipv6Addr1.Address), checker.DstAddr(test.sourceAddr), @@ -3354,7 +3400,7 @@ func TestForwarding(t *testing.T) { checker.ICMPv6( checker.ICMPv6Type(test.icmpType), checker.ICMPv6Code(test.icmpCode), - checker.ICMPv6Payload([]byte(hdr.View())), + checker.ICMPv6Payload([]byte(hdr.View()[0:expectedICMPPayloadLength()])), ), ) @@ -3420,6 +3466,10 @@ func TestForwarding(t *testing.T) { if got, want := s.Stats().IP.Forwarding.ExtensionHeaderProblem.Value(), boolToInt(test.expectExtensionHeaderError); got != want { t.Errorf("got s.Stats().IP.Forwarding.ExtensionHeaderProblem.Value() = %d, want = %d", got, want) } + + if got, want := s.Stats().IP.Forwarding.PacketTooBig.Value(), boolToInt(test.icmpType == header.ICMPv6PacketTooBig); got != want { + t.Errorf("got s.Stats().IP.Forwarding.PacketTooBig.Value() = %d, want = %d", got, want) + } }) } } diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go index 9527416cf..fc3c54e34 100644 --- a/pkg/tcpip/stack/packet_buffer.go +++ b/pkg/tcpip/stack/packet_buffer.go @@ -40,6 +40,10 @@ type PacketBufferOptions struct { // Data is the initial unparsed data for the new packet. If set, it will be // owned by the new packet. Data buffer.VectorisedView + + // IsForwardedPacket identifies that the PacketBuffer being created is for a + // forwarded packet. + IsForwardedPacket bool } // A PacketBuffer contains all the data of a network packet. @@ -132,6 +136,9 @@ func NewPacketBuffer(opts PacketBufferOptions) *PacketBuffer { if opts.ReserveHeaderBytes != 0 { pk.header = buffer.NewPrependable(opts.ReserveHeaderBytes) } + if opts.IsForwardedPacket { + pk.NetworkPacketInfo.IsForwardedPacket = opts.IsForwardedPacket + } return pk } diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index e26225552..a82c807b4 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -55,6 +55,9 @@ type NetworkPacketInfo struct { // LocalAddressBroadcast is true if the packet's local address is a broadcast // address. LocalAddressBroadcast bool + + // IsForwardedPacket is true if the packet is being forwarded. + IsForwardedPacket bool } // TransportErrorKind enumerates error types that are handled by the transport diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index f9acd4bb8..7b9c8cd4f 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -1548,6 +1548,10 @@ type IPForwardingStats struct { // because they contained a link-local destination address. LinkLocalDestination *StatCounter + // PacketTooBig is the number of IP packets which were dropped because they + // were too big for the outgoing MTU. + PacketTooBig *StatCounter + // ExtensionHeaderProblem is the number of IP packets which were dropped // because of a problem encountered when processing an IPv6 extension // header. -- cgit v1.2.3