diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/socket/netstack/netstack.go | 1 | ||||
-rw-r--r-- | pkg/tcpip/network/internal/ip/errors.go | 12 | ||||
-rw-r--r-- | pkg/tcpip/network/internal/ip/stats.go | 9 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/icmp.go | 22 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/ipv4.go | 29 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/ipv4_test.go | 208 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv6/icmp.go | 78 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv6/ipv6.go | 29 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv6/ipv6_test.go | 78 | ||||
-rw-r--r-- | pkg/tcpip/stack/packet_buffer.go | 7 | ||||
-rw-r--r-- | pkg/tcpip/stack/registration.go | 3 | ||||
-rw-r--r-- | pkg/tcpip/tcpip.go | 4 |
12 files changed, 392 insertions, 88 deletions
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 3fd22f936..0b64a24c3 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -205,6 +205,7 @@ var Metrics = tcpip.Stats{ LinkLocalSource: mustCreateMetric("/netstack/ip/forwarding/link_local_source_address", "Number of IP packets received which could not be forwarded due to a link-local source address."), LinkLocalDestination: mustCreateMetric("/netstack/ip/forwarding/link_local_destination_address", "Number of IP packets received which could not be forwarded due to a link-local destination address."), ExtensionHeaderProblem: mustCreateMetric("/netstack/ip/forwarding/extension_header_problem", "Number of IP packets received which could not be forwarded due to a problem processing their IPv6 extension headers."), + PacketTooBig: mustCreateMetric("/netstack/ip/forwarding/packet_too_big", "Number of IP packets received which could not fit within the outgoing MTU."), Errors: mustCreateMetric("/netstack/ip/forwarding/errors", "Number of IP packets which couldn't be forwarded."), }, }, diff --git a/pkg/tcpip/network/internal/ip/errors.go b/pkg/tcpip/network/internal/ip/errors.go index d3577b377..94f1cd1cb 100644 --- a/pkg/tcpip/network/internal/ip/errors.go +++ b/pkg/tcpip/network/internal/ip/errors.go @@ -58,14 +58,22 @@ func (*ErrLinkLocalDestinationAddress) isForwardingError() {} func (*ErrLinkLocalDestinationAddress) String() string { return "link local destination address" } -// ErrNoRoute indicates the Netstack couldn't find a route for the -// received packet. +// ErrNoRoute indicates that a route for the received packet couldn't be found. type ErrNoRoute struct{} func (*ErrNoRoute) isForwardingError() {} func (*ErrNoRoute) String() string { return "no route" } +// ErrMessageTooLong indicates the packet was too big for the outgoing MTU. +// +// +stateify savable +type ErrMessageTooLong struct{} + +func (*ErrMessageTooLong) isForwardingError() {} + +func (*ErrMessageTooLong) String() string { return "message too long" } + // ErrOther indicates the packet coould not be forwarded for a reason // captured by the contained error. type ErrOther struct { diff --git a/pkg/tcpip/network/internal/ip/stats.go b/pkg/tcpip/network/internal/ip/stats.go index 68b8b550e..444515d40 100644 --- a/pkg/tcpip/network/internal/ip/stats.go +++ b/pkg/tcpip/network/internal/ip/stats.go @@ -38,6 +38,10 @@ type MultiCounterIPForwardingStats struct { // because they contained a link-local destination address. LinkLocalDestination tcpip.MultiCounterStat + // PacketTooBig is the number of IP packets which were dropped because they + // were too big for the outgoing MTU. + PacketTooBig tcpip.MultiCounterStat + // ExtensionHeaderProblem is the number of IP packets which were dropped // because of a problem encountered when processing an IPv6 extension // header. @@ -55,6 +59,7 @@ func (m *MultiCounterIPForwardingStats) Init(a, b *tcpip.IPForwardingStats) { m.LinkLocalSource.Init(a.LinkLocalSource, b.LinkLocalSource) m.LinkLocalDestination.Init(a.LinkLocalDestination, b.LinkLocalDestination) m.ExtensionHeaderProblem.Init(a.ExtensionHeaderProblem, b.ExtensionHeaderProblem) + m.PacketTooBig.Init(a.PacketTooBig, b.PacketTooBig) m.ExhaustedTTL.Init(a.ExhaustedTTL, b.ExhaustedTTL) } @@ -82,8 +87,8 @@ type MultiCounterIPStats struct { // wire. InvalidSourceAddressesReceived tcpip.MultiCounterStat - // PacketsDelivered is the number of incoming IP packets that are - // successfully delivered to the transport layer. + // PacketsDelivered is the number of incoming IP packets successfully + // delivered to the transport layer. PacketsDelivered tcpip.MultiCounterStat // PacketsSent is the number of IP packets sent via WritePacket. diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go index c8ed1ce79..d1a82b584 100644 --- a/pkg/tcpip/network/ipv4/icmp.go +++ b/pkg/tcpip/network/ipv4/icmp.go @@ -387,6 +387,8 @@ func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) { // icmpReason is a marker interface for IPv4 specific ICMP errors. type icmpReason interface { isICMPReason() + // isForwarding indicates whether or not the error arose while attempting to + // forward a packet. isForwarding() bool } @@ -463,6 +465,22 @@ func (*icmpReasonNetworkUnreachable) isForwarding() bool { return true } +// icmpReasonFragmentationNeeded is an error where a packet requires +// fragmentation while also having the Don't Fragment flag set, as per RFC 792 +// page 3, Destination Unreachable Message. +type icmpReasonFragmentationNeeded struct{} + +func (*icmpReasonFragmentationNeeded) isICMPReason() {} +func (*icmpReasonFragmentationNeeded) isForwarding() bool { + // If we hit a Don't Fragment error, then we know we are operating as a router. + // As per RFC 792 page 4, Destination Unreachable Message, + // + // Another case is when a datagram must be fragmented to be forwarded by a + // gateway yet the Don't Fragment flag is on. In this case the gateway must + // discard the datagram and may return a destination unreachable message. + return true +} + // returnError takes an error descriptor and generates the appropriate ICMP // error packet for IPv4 and sends it back to the remote device that sent // the problematic packet. It incorporates as much of that packet as @@ -635,6 +653,10 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip icmpHdr.SetType(header.ICMPv4DstUnreachable) icmpHdr.SetCode(header.ICMPv4NetUnreachable) counter = sent.dstUnreachable + case *icmpReasonFragmentationNeeded: + icmpHdr.SetType(header.ICMPv4DstUnreachable) + icmpHdr.SetCode(header.ICMPv4FragmentationNeeded) + counter = sent.dstUnreachable case *icmpReasonTTLExceeded: icmpHdr.SetType(header.ICMPv4TimeExceeded) icmpHdr.SetCode(header.ICMPv4TTLExceeded) diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 4031032d0..aef83e834 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -434,6 +434,12 @@ func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, headerIn } if packetMustBeFragmented(pkt, networkMTU) { + h := header.IPv4(pkt.NetworkHeader().View()) + if h.Flags()&header.IPv4FlagDontFragment != 0 && pkt.NetworkPacketInfo.IsForwardedPacket { + // TODO(gvisor.dev/issue/5919): Handle error condition in which DontFragment + // is set but the packet must be fragmented for the non-forwarding case. + return &tcpip.ErrMessageTooLong{} + } sent, remain, err := e.handleFragments(r, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) tcpip.Error { // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each // fragment one by one using WritePacket() (current strategy) or if we @@ -695,13 +701,28 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError { // spent, the field must be decremented by 1. newHdr.SetTTL(ttl - 1) - if err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ + switch err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ ReserveHeaderBytes: int(r.MaxHeaderLength()), Data: buffer.View(newHdr).ToVectorisedView(), - })); err != nil { + IsForwardedPacket: true, + })); err.(type) { + case nil: + return nil + case *tcpip.ErrMessageTooLong: + // As per RFC 792, page 4, Destination Unreachable: + // + // Another case is when a datagram must be fragmented to be forwarded by a + // gateway yet the Don't Fragment flag is on. In this case the gateway must + // discard the datagram and may return a destination unreachable message. + // + // WriteHeaderIncludedPacket checks for the presence of the Don't Fragment bit + // while sending the packet and returns this error iff fragmentation is + // necessary and the bit is also set. + _ = e.protocol.returnError(&icmpReasonFragmentationNeeded{}, pkt) + return &ip.ErrMessageTooLong{} + default: return &ip.ErrOther{Err: err} } - return nil } // HandlePacket is called by the link layer when new ipv4 packets arrive for @@ -830,6 +851,8 @@ func (e *endpoint) handleValidatedPacket(h header.IPv4, pkt *stack.PacketBuffer) case *ip.ErrParameterProblem: e.protocol.stack.Stats().MalformedRcvdPackets.Increment() stats.ip.MalformedPacketsReceived.Increment() + case *ip.ErrMessageTooLong: + stats.ip.Forwarding.PacketTooBig.Increment() default: panic(fmt.Sprintf("unexpected error %s while trying to forward packet: %#v", err, pkt)) } diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go index 7a7cad04a..3c8a39973 100644 --- a/pkg/tcpip/network/ipv4/ipv4_test.go +++ b/pkg/tcpip/network/ipv4/ipv4_test.go @@ -112,6 +112,10 @@ func TestExcludeBroadcast(t *testing.T) { }) } +type forwardedPacket struct { + fragments []fragmentInfo +} + func TestForwarding(t *testing.T) { const ( nicID1 = 1 @@ -129,6 +133,7 @@ func TestForwarding(t *testing.T) { Address: tcpip.Address(net.ParseIP("11.0.0.1").To4()), PrefixLen: 8, } + linkAddr2 := tcpip.LinkAddress("\x02\x03\x03\x04\x05\x06") remoteIPv4Addr1 := tcpip.Address(net.ParseIP("10.0.0.2").To4()) remoteIPv4Addr2 := tcpip.Address(net.ParseIP("11.0.0.2").To4()) unreachableIPv4Addr := tcpip.Address(net.ParseIP("12.0.0.2").To4()) @@ -141,7 +146,9 @@ func TestForwarding(t *testing.T) { sourceAddr tcpip.Address destAddr tcpip.Address expectErrorICMP bool - expectPacketForwarded bool + ipFlags uint8 + mtu uint32 + payloadLength int options header.IPv4Options forwardedOptions header.IPv4Options icmpType header.ICMPv4Type @@ -149,6 +156,8 @@ func TestForwarding(t *testing.T) { expectPacketUnrouteableError bool expectLinkLocalSourceError bool expectLinkLocalDestError bool + expectPacketForwarded bool + expectedFragmentsForwarded []fragmentInfo }{ { name: "TTL of zero", @@ -158,6 +167,7 @@ func TestForwarding(t *testing.T) { expectErrorICMP: true, icmpType: header.ICMPv4TimeExceeded, icmpCode: header.ICMPv4TTLExceeded, + mtu: ipv4.MaxTotalSize, }, { name: "TTL of one", @@ -165,6 +175,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, expectPacketForwarded: true, + mtu: ipv4.MaxTotalSize, }, { name: "TTL of two", @@ -172,6 +183,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, expectPacketForwarded: true, + mtu: ipv4.MaxTotalSize, }, { name: "Max TTL", @@ -179,6 +191,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, expectPacketForwarded: true, + mtu: ipv4.MaxTotalSize, }, { name: "four EOL options", @@ -186,6 +199,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, expectPacketForwarded: true, + mtu: ipv4.MaxTotalSize, options: header.IPv4Options{0, 0, 0, 0}, forwardedOptions: header.IPv4Options{0, 0, 0, 0}, }, @@ -194,6 +208,7 @@ func TestForwarding(t *testing.T) { TTL: 2, sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, + mtu: ipv4.MaxTotalSize, options: header.IPv4Options{ 68, 12, 13, 0xF1, 192, 168, 1, 12, @@ -208,6 +223,7 @@ func TestForwarding(t *testing.T) { TTL: 2, sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, + mtu: ipv4.MaxTotalSize, options: header.IPv4Options{ 68, 24, 21, 0x00, 1, 2, 3, 4, @@ -231,6 +247,7 @@ func TestForwarding(t *testing.T) { TTL: 2, sourceAddr: remoteIPv4Addr1, destAddr: remoteIPv4Addr2, + mtu: ipv4.MaxTotalSize, options: header.IPv4Options{ 68, 12, 13, 0x11, 192, 168, 1, 12, @@ -254,6 +271,7 @@ func TestForwarding(t *testing.T) { sourceAddr: remoteIPv4Addr1, destAddr: unreachableIPv4Addr, expectErrorICMP: true, + mtu: ipv4.MaxTotalSize, icmpType: header.ICMPv4DstUnreachable, icmpCode: header.ICMPv4NetUnreachable, expectPacketUnrouteableError: true, @@ -278,6 +296,51 @@ func TestForwarding(t *testing.T) { destAddr: remoteIPv4Addr2, expectLinkLocalSourceError: true, }, + { + name: "Fragmentation needed and DF set", + TTL: 2, + sourceAddr: remoteIPv4Addr1, + destAddr: remoteIPv4Addr2, + ipFlags: header.IPv4FlagDontFragment, + // We've picked this MTU because it is: + // + // 1) Greater than the minimum MTU that IPv4 hosts are required to process + // (576 bytes). As per RFC 1812, Section 4.3.2.3: + // + // The ICMP datagram SHOULD contain as much of the original datagram as + // possible without the length of the ICMP datagram exceeding 576 bytes. + // + // Therefore, setting an MTU greater than 576 bytes ensures that we can fit a + // complete ICMP packet on the incoming endpoint (and make assertions about + // it). + // + // 2) Less than `ipv4.MaxTotalSize`, which lets us build an IPv4 packet whose + // size exceeds the MTU. + mtu: 1000, + payloadLength: 1004, + expectErrorICMP: true, + icmpType: header.ICMPv4DstUnreachable, + icmpCode: header.ICMPv4FragmentationNeeded, + }, + { + name: "Fragmentation needed and DF not set", + TTL: 2, + sourceAddr: remoteIPv4Addr1, + destAddr: remoteIPv4Addr2, + mtu: 1000, + payloadLength: 1004, + expectPacketForwarded: true, + // Combined, these fragments have length of 1012 octets, which is equal to + // the length of the payload (1004 octets), plus the length of the ICMP + // header (8 octets). + expectedFragmentsForwarded: []fragmentInfo{ + // The first fragment has a length of the greatest multiple of 8 which is + // less than or equal to to `mtu - header.IPv4MinimumSize`. + {offset: 0, payloadSize: uint16(976), more: true}, + // The next fragment holds the rest of the packet. + {offset: uint16(976), payloadSize: 36, more: false}, + }, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { @@ -293,7 +356,7 @@ func TestForwarding(t *testing.T) { clock.Advance(time.Millisecond * randomTimeOffset) // We expect at most a single packet in response to our ICMP Echo Request. - e1 := channel.New(1, ipv4.MaxTotalSize, "") + e1 := channel.New(1, test.mtu, "") if err := s.CreateNIC(nicID1, e1); err != nil { t.Fatalf("CreateNIC(%d, _): %s", nicID1, err) } @@ -302,7 +365,11 @@ func TestForwarding(t *testing.T) { t.Fatalf("AddProtocolAddress(%d, %#v): %s", nicID1, ipv4ProtoAddr1, err) } - e2 := channel.New(1, ipv4.MaxTotalSize, "") + expectedEmittedPacketCount := 1 + if len(test.expectedFragmentsForwarded) > expectedEmittedPacketCount { + expectedEmittedPacketCount = len(test.expectedFragmentsForwarded) + } + e2 := channel.New(expectedEmittedPacketCount, test.mtu, linkAddr2) if err := s.CreateNIC(nicID2, e2); err != nil { t.Fatalf("CreateNIC(%d, _): %s", nicID2, err) } @@ -330,9 +397,11 @@ func TestForwarding(t *testing.T) { if ipHeaderLength > header.IPv4MaximumHeaderSize { t.Fatalf("got ipHeaderLength = %d, want <= %d ", ipHeaderLength, header.IPv4MaximumHeaderSize) } - totalLen := uint16(ipHeaderLength + header.ICMPv4MinimumSize) - hdr := buffer.NewPrependable(int(totalLen)) - icmp := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize)) + icmpHeaderLength := header.ICMPv4MinimumSize + totalLength := ipHeaderLength + icmpHeaderLength + test.payloadLength + hdr := buffer.NewPrependable(totalLength) + hdr.Prepend(test.payloadLength) + icmp := header.ICMPv4(hdr.Prepend(icmpHeaderLength)) icmp.SetIdent(randomIdent) icmp.SetSequence(randomSequence) icmp.SetType(header.ICMPv4Echo) @@ -341,11 +410,12 @@ func TestForwarding(t *testing.T) { icmp.SetChecksum(^header.Checksum(icmp, 0)) ip := header.IPv4(hdr.Prepend(ipHeaderLength)) ip.Encode(&header.IPv4Fields{ - TotalLength: totalLen, + TotalLength: uint16(totalLength), Protocol: uint8(header.ICMPv4ProtocolNumber), TTL: test.TTL, SrcAddr: test.sourceAddr, DstAddr: test.destAddr, + Flags: test.ipFlags, }) if len(test.options) != 0 { ip.SetHeaderLength(uint8(ipHeaderLength)) @@ -360,6 +430,7 @@ func TestForwarding(t *testing.T) { requestPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ Data: hdr.View().ToVectorisedView(), }) + requestPkt.NetworkProtocolNumber = header.IPv4ProtocolNumber e1.InjectInbound(header.IPv4ProtocolNumber, requestPkt) reply, ok := e1.Read() @@ -368,6 +439,18 @@ func TestForwarding(t *testing.T) { t.Fatalf("expected ICMP packet type %d through incoming NIC", test.icmpType) } + // We expect the ICMP packet to contain as much of the original packet as + // possible up to a limit of 576 bytes, split between payload, IP header, + // and ICMP header. + expectedICMPPayloadLength := func() int { + maxICMPPacketLength := header.IPv4MinimumProcessableDatagramSize + maxICMPPayloadLength := maxICMPPacketLength - icmpHeaderLength - ipHeaderLength + if len(hdr.View()) > maxICMPPayloadLength { + return maxICMPPayloadLength + } + return len(hdr.View()) + } + checker.IPv4(t, header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader())), checker.SrcAddr(ipv4Addr1.Address), checker.DstAddr(test.sourceAddr), @@ -376,41 +459,58 @@ func TestForwarding(t *testing.T) { checker.ICMPv4Checksum(), checker.ICMPv4Type(test.icmpType), checker.ICMPv4Code(test.icmpCode), - checker.ICMPv4Payload([]byte(hdr.View())), + checker.ICMPv4Payload([]byte(hdr.View()[0:expectedICMPPayloadLength()])), ), ) - - if n := e2.Drain(); n != 0 { - t.Fatalf("got e2.Drain() = %d, want = 0", n) - } } else if ok { t.Fatalf("expected no ICMP packet through incoming NIC, instead found: %#v", reply) } - reply, ok = e2.Read() if test.expectPacketForwarded { - if !ok { - t.Fatal("expected ICMP Echo packet through outgoing NIC") - } + if len(test.expectedFragmentsForwarded) != 0 { + fragmentedPackets := []*stack.PacketBuffer{} + for i := 0; i < len(test.expectedFragmentsForwarded); i++ { + reply, ok = e2.Read() + if !ok { + t.Fatal("expected ICMP Echo fragment through outgoing NIC") + } + fragmentedPackets = append(fragmentedPackets, reply.Pkt) + } - checker.IPv4(t, header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader())), - checker.SrcAddr(test.sourceAddr), - checker.DstAddr(test.destAddr), - checker.TTL(test.TTL-1), - checker.IPv4Options(test.forwardedOptions), - checker.ICMPv4( - checker.ICMPv4Checksum(), - checker.ICMPv4Type(header.ICMPv4Echo), - checker.ICMPv4Code(header.ICMPv4UnusedCode), - checker.ICMPv4Payload(nil), - ), - ) + // The forwarded packet's TTL will have been decremented. + ipHeader := header.IPv4(requestPkt.NetworkHeader().View()) + ipHeader.SetTTL(ipHeader.TTL() - 1) + + // Forwarded packets have available header bytes equalling the sum of the + // maximum IP header size and the maximum size allocated for link layer + // headers. In this case, no size is allocated for link layer headers. + expectedAvailableHeaderBytes := header.IPv4MaximumHeaderSize + if err := compareFragments(fragmentedPackets, requestPkt, uint32(test.mtu), test.expectedFragmentsForwarded, header.ICMPv4ProtocolNumber, true /* withIPHeader */, expectedAvailableHeaderBytes); err != nil { + t.Error(err) + } + } else { + reply, ok = e2.Read() + if !ok { + t.Fatal("expected ICMP Echo packet through outgoing NIC") + } - if n := e1.Drain(); n != 0 { - t.Fatalf("got e1.Drain() = %d, want = 0", n) + checker.IPv4(t, header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader())), + checker.SrcAddr(test.sourceAddr), + checker.DstAddr(test.destAddr), + checker.TTL(test.TTL-1), + checker.IPv4Options(test.forwardedOptions), + checker.ICMPv4( + checker.ICMPv4Checksum(), + checker.ICMPv4Type(header.ICMPv4Echo), + checker.ICMPv4Code(header.ICMPv4UnusedCode), + checker.ICMPv4Payload(nil), + ), + ) + } + } else { + if reply, ok = e2.Read(); ok { + t.Fatalf("expected no ICMP Echo packet through outgoing NIC, instead found: %#v", reply) } - } else if ok { - t.Fatalf("expected no ICMP Echo packet through outgoing NIC, instead found: %#v", reply) } boolToInt := func(val bool) uint64 { @@ -443,6 +543,10 @@ func TestForwarding(t *testing.T) { if got, want := s.Stats().IP.Forwarding.Errors.Value(), boolToInt(!test.expectPacketForwarded); got != want { t.Errorf("got s.Stats().IP.Forwarding.Errors.Value() = %d, want = %d", got, want) } + + if got, want := s.Stats().IP.Forwarding.PacketTooBig.Value(), boolToInt(test.icmpCode == header.ICMPv4FragmentationNeeded); got != want { + t.Errorf("got s.Stats().IP.Forwarding.PacketTooBig.Value() = %d, want = %d", got, want) + } }) } } @@ -1264,13 +1368,25 @@ func TestIPv4Sanity(t *testing.T) { } } -// comparePayloads compared the contents of all the packets against the contents -// of the source packet. -func compareFragments(packets []*stack.PacketBuffer, sourcePacket *stack.PacketBuffer, mtu uint32, wantFragments []fragmentInfo, proto tcpip.TransportProtocolNumber) error { +// compareFragments compares the contents of a set of fragmented packets against +// the contents of a source packet. +// +// If withIPHeader is set to true, we will validate the fragmented packets' IP +// headers against the source packet's IP header. If set to false, we validate +// the fragmented packets' IP headers against each other. +func compareFragments(packets []*stack.PacketBuffer, sourcePacket *stack.PacketBuffer, mtu uint32, wantFragments []fragmentInfo, proto tcpip.TransportProtocolNumber, withIPHeader bool, expectedAvailableHeaderBytes int) error { // Make a complete array of the sourcePacket packet. - source := header.IPv4(packets[0].NetworkHeader().View()) + var source header.IPv4 vv := buffer.NewVectorisedView(sourcePacket.Size(), sourcePacket.Views()) - source = append(source, vv.ToView()...) + + // If the packet to be fragmented contains an IPv4 header, use that header for + // validating fragment headers. Else, use the header of the first fragment. + if withIPHeader { + source = header.IPv4(vv.ToView()) + } else { + source = header.IPv4(packets[0].NetworkHeader().View()) + source = append(source, vv.ToView()...) + } // Make a copy of the IP header, which will be modified in some fields to make // an expected header. @@ -1293,12 +1409,12 @@ func compareFragments(packets []*stack.PacketBuffer, sourcePacket *stack.PacketB if got := fragmentIPHeader.TransportProtocol(); got != proto { return fmt.Errorf("fragment #%d: got fragmentIPHeader.TransportProtocol() = %d, want = %d", i, got, uint8(proto)) } - if got := packet.AvailableHeaderBytes(); got != extraHeaderReserve { - return fmt.Errorf("fragment #%d: got packet.AvailableHeaderBytes() = %d, want = %d", i, got, extraHeaderReserve) - } if got, want := packet.NetworkProtocolNumber, sourcePacket.NetworkProtocolNumber; got != want { return fmt.Errorf("fragment #%d: got fragment.NetworkProtocolNumber = %d, want = %d", i, got, want) } + if got := packet.AvailableHeaderBytes(); got != expectedAvailableHeaderBytes { + return fmt.Errorf("fragment #%d: got packet.AvailableHeaderBytes() = %d, want = %d", i, got, expectedAvailableHeaderBytes) + } if got, want := fragmentIPHeader.CalculateChecksum(), uint16(0xffff); got != want { return fmt.Errorf("fragment #%d: got ip.CalculateChecksum() = %#x, want = %#x", i, got, want) } @@ -1314,6 +1430,14 @@ func compareFragments(packets []*stack.PacketBuffer, sourcePacket *stack.PacketB sourceCopy.SetTotalLength(wantFragments[i].payloadSize + header.IPv4MinimumSize) sourceCopy.SetChecksum(0) sourceCopy.SetChecksum(^sourceCopy.CalculateChecksum()) + + // If we are validating against the original IP header, we should exclude the + // ID field, which will only be set fo fragmented packets. + if withIPHeader { + fragmentIPHeader.SetID(0) + fragmentIPHeader.SetChecksum(0) + fragmentIPHeader.SetChecksum(^fragmentIPHeader.CalculateChecksum()) + } if diff := cmp.Diff(fragmentIPHeader[:fragmentIPHeader.HeaderLength()], sourceCopy[:sourceCopy.HeaderLength()]); diff != "" { return fmt.Errorf("fragment #%d: fragmentIPHeader mismatch (-want +got):\n%s", i, diff) } @@ -1442,7 +1566,7 @@ func TestFragmentationWritePacket(t *testing.T) { if got := r.Stats().IP.OutgoingPacketErrors.Value(); got != 0 { t.Errorf("got r.Stats().IP.OutgoingPacketErrors.Value() = %d, want = 0", got) } - if err := compareFragments(ep.WrittenPackets, source, ft.mtu, ft.wantFragments, tcp.ProtocolNumber); err != nil { + if err := compareFragments(ep.WrittenPackets, source, ft.mtu, ft.wantFragments, tcp.ProtocolNumber, false /* withIPHeader */, extraHeaderReserve); err != nil { t.Error(err) } }) @@ -1523,7 +1647,7 @@ func TestFragmentationWritePackets(t *testing.T) { } fragments := ep.WrittenPackets[test.insertBefore : len(ft.wantFragments)+test.insertBefore] - if err := compareFragments(fragments, pkt, ft.mtu, ft.wantFragments, tcp.ProtocolNumber); err != nil { + if err := compareFragments(fragments, pkt, ft.mtu, ft.wantFragments, tcp.ProtocolNumber, false /* withIPHeader */, extraHeaderReserve); err != nil { t.Error(err) } }) diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go index 247a07dc2..4051fda07 100644 --- a/pkg/tcpip/network/ipv6/icmp.go +++ b/pkg/tcpip/network/ipv6/icmp.go @@ -955,7 +955,19 @@ func (*endpoint) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bo // icmpReason is a marker interface for IPv6 specific ICMP errors. type icmpReason interface { isICMPReason() + // isForwarding indicates whether or not the error arose while attempting to + // forward a packet. isForwarding() bool + // respondToMulticast indicates whether this error falls under the exception + // outlined by RFC 4443 section 2.4 point e.3 exception 2: + // + // (e.3) A packet destined to an IPv6 multicast address. (There are two + // exceptions to this rule: (1) the Packet Too Big Message (Section 3.2) to + // allow Path MTU discovery to work for IPv6 multicast, and (2) the Parameter + // Problem Message, Code 2 (Section 3.4) reporting an unrecognized IPv6 + // option (see Section 4.2 of [IPv6]) that has the Option Type highest- + // order two bits set to 10). + respondsToMulticast() bool } // icmpReasonParameterProblem is an error during processing of extension headers @@ -963,18 +975,6 @@ type icmpReason interface { type icmpReasonParameterProblem struct { code header.ICMPv6Code - // respondToMulticast indicates that we are sending a packet that falls under - // the exception outlined by RFC 4443 section 2.4 point e.3 exception 2: - // - // (e.3) A packet destined to an IPv6 multicast address. (There are - // two exceptions to this rule: (1) the Packet Too Big Message - // (Section 3.2) to allow Path MTU discovery to work for IPv6 - // multicast, and (2) the Parameter Problem Message, Code 2 - // (Section 3.4) reporting an unrecognized IPv6 option (see - // Section 4.2 of [IPv6]) that has the Option Type highest- - // order two bits set to 10). - respondToMulticast bool - // pointer is defined in the RFC 4443 setion 3.4 which reads: // // Pointer Identifies the octet offset within the invoking packet @@ -985,9 +985,9 @@ type icmpReasonParameterProblem struct { // in the maximum size of an ICMPv6 error message. pointer uint32 - // forwarding indicates that the problem arose while we were trying to forward - // a packet. forwarding bool + + respondToMulticast bool } func (*icmpReasonParameterProblem) isICMPReason() {} @@ -995,6 +995,10 @@ func (p *icmpReasonParameterProblem) isForwarding() bool { return p.forwarding } +func (p *icmpReasonParameterProblem) respondsToMulticast() bool { + return p.respondToMulticast +} + // icmpReasonPortUnreachable is an error where the transport protocol has no // listener and no alternative means to inform the sender. type icmpReasonPortUnreachable struct{} @@ -1005,6 +1009,10 @@ func (*icmpReasonPortUnreachable) isForwarding() bool { return false } +func (*icmpReasonPortUnreachable) respondsToMulticast() bool { + return false +} + // icmpReasonNetUnreachable is an error where no route can be found to the // network of the final destination. type icmpReasonNetUnreachable struct{} @@ -1021,6 +1029,30 @@ func (*icmpReasonNetUnreachable) isForwarding() bool { return true } +func (*icmpReasonNetUnreachable) respondsToMulticast() bool { + return false +} + +// icmpReasonFragmentationNeeded is an error where a packet is to big to be sent +// out through the outgoing MTU, as per RFC 4443 page 9, Packet Too Big Message. +type icmpReasonPacketTooBig struct{} + +func (*icmpReasonPacketTooBig) isICMPReason() {} + +func (*icmpReasonPacketTooBig) isForwarding() bool { + // If we hit a Packet Too Big error, then we know we are operating as a router. + // As per RFC 4443 section 3.2: + // + // A Packet Too Big MUST be sent by a router in response to a packet that it + // cannot forward because the packet is larger than the MTU of the outgoing + // link. + return true +} + +func (*icmpReasonPacketTooBig) respondsToMulticast() bool { + return true +} + // icmpReasonHopLimitExceeded is an error where a packet's hop limit exceeded in // transit to its final destination, as per RFC 4443 section 3.3. type icmpReasonHopLimitExceeded struct{} @@ -1039,6 +1071,10 @@ func (*icmpReasonHopLimitExceeded) isForwarding() bool { return true } +func (*icmpReasonHopLimitExceeded) respondsToMulticast() bool { + return false +} + // icmpReasonReassemblyTimeout is an error where insufficient fragments are // received to complete reassembly of a packet within a configured time after // the reception of the first-arriving fragment of that packet. @@ -1050,6 +1086,10 @@ func (*icmpReasonReassemblyTimeout) isForwarding() bool { return false } +func (*icmpReasonReassemblyTimeout) respondsToMulticast() bool { + return false +} + // returnError takes an error descriptor and generates the appropriate ICMP // error packet for IPv6 and sends it. func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip.Error { @@ -1078,11 +1118,7 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip // Section 4.2 of [IPv6]) that has the Option Type highest- // order two bits set to 10). // - var allowResponseToMulticast bool - if reason, ok := reason.(*icmpReasonParameterProblem); ok { - allowResponseToMulticast = reason.respondToMulticast - } - + allowResponseToMulticast := reason.respondsToMulticast() isOrigDstMulticast := header.IsV6MulticastAddress(origIPHdrDst) if (!allowResponseToMulticast && isOrigDstMulticast) || origIPHdrSrc == header.IPv6Any { return nil @@ -1190,6 +1226,10 @@ func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) tcpip icmpHdr.SetType(header.ICMPv6DstUnreachable) icmpHdr.SetCode(header.ICMPv6NetworkUnreachable) counter = sent.dstUnreachable + case *icmpReasonPacketTooBig: + icmpHdr.SetType(header.ICMPv6PacketTooBig) + icmpHdr.SetCode(header.ICMPv6UnusedCode) + counter = sent.packetTooBig case *icmpReasonHopLimitExceeded: icmpHdr.SetType(header.ICMPv6TimeExceeded) icmpHdr.SetCode(header.ICMPv6HopLimitExceeded) diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index 029d5f51b..880290b4b 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -761,6 +761,12 @@ func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, protocol } if packetMustBeFragmented(pkt, networkMTU) { + if pkt.NetworkPacketInfo.IsForwardedPacket { + // As per RFC 2460, section 4.5: + // Unlike IPv4, fragmentation in IPv6 is performed only by source nodes, + // not by routers along a packet's delivery path. + return &tcpip.ErrMessageTooLong{} + } sent, remain, err := e.handleFragments(r, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error { // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each // fragment one by one using WritePacket() (current strategy) or if we @@ -950,9 +956,8 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError { switch err.(type) { case nil: case *tcpip.ErrNoRoute, *tcpip.ErrNetworkUnreachable: - // We return the original error rather than the result of returning - // the ICMP packet because the original error is more relevant to - // the caller. + // We return the original error rather than the result of returning the + // ICMP packet because the original error is more relevant to the caller. _ = e.protocol.returnError(&icmpReasonNetUnreachable{}, pkt) return &ip.ErrNoRoute{} default: @@ -971,13 +976,23 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError { // each node that forwards the packet. newHdr.SetHopLimit(hopLimit - 1) - if err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ + switch err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ ReserveHeaderBytes: int(r.MaxHeaderLength()), Data: buffer.View(newHdr).ToVectorisedView(), - })); err != nil { + IsForwardedPacket: true, + })); err.(type) { + case nil: + return nil + case *tcpip.ErrMessageTooLong: + // As per RFC 4443, section 3.2: + // A Packet Too Big MUST be sent by a router in response to a packet that + // it cannot forward because the packet is larger than the MTU of the + // outgoing link. + _ = e.protocol.returnError(&icmpReasonPacketTooBig{}, pkt) + return &ip.ErrMessageTooLong{} + default: return &ip.ErrOther{Err: err} } - return nil } // HandlePacket is called by the link layer when new ipv6 packets arrive for @@ -1091,6 +1106,8 @@ func (e *endpoint) handleValidatedPacket(h header.IPv6, pkt *stack.PacketBuffer) e.stats.ip.Forwarding.Unrouteable.Increment() case *ip.ErrParameterProblem: e.stats.ip.Forwarding.ExtensionHeaderProblem.Increment() + case *ip.ErrMessageTooLong: + e.stats.ip.Forwarding.PacketTooBig.Increment() default: panic(fmt.Sprintf("unexpected error %s while trying to forward packet: %#v", err, pkt)) } diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go index 8ebca735b..faf6a782e 100644 --- a/pkg/tcpip/network/ipv6/ipv6_test.go +++ b/pkg/tcpip/network/ipv6/ipv6_test.go @@ -3018,10 +3018,13 @@ func TestForwarding(t *testing.T) { Address: tcpip.Address(net.ParseIP("11::1").To16()), PrefixLen: 64, } + multicastIPv6Addr := tcpip.AddressWithPrefix{ + Address: tcpip.Address(net.ParseIP("ff00::").To16()), + PrefixLen: 64, + } remoteIPv6Addr1 := tcpip.Address(net.ParseIP("10::2").To16()) remoteIPv6Addr2 := tcpip.Address(net.ParseIP("11::2").To16()) unreachableIPv6Addr := tcpip.Address(net.ParseIP("12::2").To16()) - multicastIPv6Addr := tcpip.Address(net.ParseIP("ff00::").To16()) linkLocalIPv6Addr := tcpip.Address(net.ParseIP("fe80::").To16()) tests := []struct { @@ -3030,6 +3033,7 @@ func TestForwarding(t *testing.T) { TTL uint8 expectErrorICMP bool expectPacketForwarded bool + payloadLength int countUnrouteablePackets uint64 sourceAddr tcpip.Address destAddr tcpip.Address @@ -3090,12 +3094,12 @@ func TestForwarding(t *testing.T) { expectPacketUnrouteableError: true, }, { - name: "Multicast destination", - TTL: 2, - countUnrouteablePackets: 1, - sourceAddr: remoteIPv6Addr1, - destAddr: multicastIPv6Addr, - expectPacketUnrouteableError: true, + name: "Multicast destination", + TTL: 2, + countUnrouteablePackets: 1, + sourceAddr: remoteIPv6Addr1, + destAddr: multicastIPv6Addr.Address, + expectPacketForwarded: true, }, { name: "Link local destination", @@ -3172,7 +3176,7 @@ func TestForwarding(t *testing.T) { name: "Hopbyhop with unknown option discard and send icmp action (multicast)", TTL: 2, sourceAddr: remoteIPv6Addr1, - destAddr: multicastIPv6Addr, + destAddr: multicastIPv6Addr.Address, extHdr: func(nextHdr uint8) ([]byte, uint8, checker.NetworkChecker) { return []byte{ nextHdr, 1, @@ -3215,7 +3219,7 @@ func TestForwarding(t *testing.T) { name: "Hopbyhop with unknown option discard and send icmp action unless multicast dest (multicast)", TTL: 2, sourceAddr: remoteIPv6Addr1, - destAddr: multicastIPv6Addr, + destAddr: multicastIPv6Addr.Address, extHdr: func(nextHdr uint8) ([]byte, uint8, checker.NetworkChecker) { return []byte{ nextHdr, 1, @@ -3263,6 +3267,26 @@ func TestForwarding(t *testing.T) { }, expectExtensionHeaderError: true, }, + { + name: "Can't fragment", + TTL: 2, + payloadLength: header.IPv6MinimumMTU + 1, + expectErrorICMP: true, + sourceAddr: remoteIPv6Addr1, + destAddr: remoteIPv6Addr2, + icmpType: header.ICMPv6PacketTooBig, + icmpCode: header.ICMPv6UnusedCode, + }, + { + name: "Can't fragment multicast", + TTL: 2, + payloadLength: header.IPv6MinimumMTU + 1, + sourceAddr: remoteIPv6Addr1, + destAddr: multicastIPv6Addr.Address, + expectErrorICMP: true, + icmpType: header.ICMPv6PacketTooBig, + icmpCode: header.ICMPv6UnusedCode, + }, } for _, test := range tests { @@ -3299,6 +3323,10 @@ func TestForwarding(t *testing.T) { Destination: ipv6Addr2.Subnet(), NIC: nicID2, }, + { + Destination: multicastIPv6Addr.Subnet(), + NIC: nicID2, + }, }) if err := s.SetForwarding(ProtocolNumber, true); err != nil { @@ -3315,8 +3343,13 @@ func TestForwarding(t *testing.T) { } extHdrLen := len(extHdrBytes) - hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.ICMPv6MinimumSize + extHdrLen) - icmp := header.ICMPv6(hdr.Prepend(header.ICMPv6MinimumSize)) + ipHeaderLength := header.IPv6MinimumSize + icmpHeaderLength := header.ICMPv6MinimumSize + totalLength := ipHeaderLength + icmpHeaderLength + test.payloadLength + extHdrLen + hdr := buffer.NewPrependable(totalLength) + hdr.Prepend(test.payloadLength) + icmp := header.ICMPv6(hdr.Prepend(icmpHeaderLength)) + icmp.SetIdent(randomIdent) icmp.SetSequence(randomSequence) icmp.SetType(header.ICMPv6EchoRequest) @@ -3328,9 +3361,9 @@ func TestForwarding(t *testing.T) { Dst: test.destAddr, })) copy(hdr.Prepend(extHdrLen), extHdrBytes) - ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) + ip := header.IPv6(hdr.Prepend(ipHeaderLength)) ip.Encode(&header.IPv6Fields{ - PayloadLength: header.ICMPv6MinimumSize, + PayloadLength: uint16(header.ICMPv6MinimumSize + test.payloadLength), TransportProtocol: transportProtocol, HopLimit: test.TTL, SrcAddr: test.sourceAddr, @@ -3347,6 +3380,19 @@ func TestForwarding(t *testing.T) { t.Fatalf("expected ICMP packet type %d through incoming NIC", test.icmpType) } + // As per RFC 4443, page 9: + // + // The returned ICMP packet will contain as much of invoking packet + // as possible without the ICMPv6 packet exceeding the minimum IPv6 + // MTU. + expectedICMPPayloadLength := func() int { + maxICMPPayloadLength := header.IPv6MinimumMTU - ipHeaderLength - icmpHeaderLength + if len(hdr.View()) > maxICMPPayloadLength { + return maxICMPPayloadLength + } + return len(hdr.View()) + } + checker.IPv6(t, header.IPv6(stack.PayloadSince(reply.Pkt.NetworkHeader())), checker.SrcAddr(ipv6Addr1.Address), checker.DstAddr(test.sourceAddr), @@ -3354,7 +3400,7 @@ func TestForwarding(t *testing.T) { checker.ICMPv6( checker.ICMPv6Type(test.icmpType), checker.ICMPv6Code(test.icmpCode), - checker.ICMPv6Payload([]byte(hdr.View())), + checker.ICMPv6Payload([]byte(hdr.View()[0:expectedICMPPayloadLength()])), ), ) @@ -3420,6 +3466,10 @@ func TestForwarding(t *testing.T) { if got, want := s.Stats().IP.Forwarding.ExtensionHeaderProblem.Value(), boolToInt(test.expectExtensionHeaderError); got != want { t.Errorf("got s.Stats().IP.Forwarding.ExtensionHeaderProblem.Value() = %d, want = %d", got, want) } + + if got, want := s.Stats().IP.Forwarding.PacketTooBig.Value(), boolToInt(test.icmpType == header.ICMPv6PacketTooBig); got != want { + t.Errorf("got s.Stats().IP.Forwarding.PacketTooBig.Value() = %d, want = %d", got, want) + } }) } } diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go index 9527416cf..fc3c54e34 100644 --- a/pkg/tcpip/stack/packet_buffer.go +++ b/pkg/tcpip/stack/packet_buffer.go @@ -40,6 +40,10 @@ type PacketBufferOptions struct { // Data is the initial unparsed data for the new packet. If set, it will be // owned by the new packet. Data buffer.VectorisedView + + // IsForwardedPacket identifies that the PacketBuffer being created is for a + // forwarded packet. + IsForwardedPacket bool } // A PacketBuffer contains all the data of a network packet. @@ -132,6 +136,9 @@ func NewPacketBuffer(opts PacketBufferOptions) *PacketBuffer { if opts.ReserveHeaderBytes != 0 { pk.header = buffer.NewPrependable(opts.ReserveHeaderBytes) } + if opts.IsForwardedPacket { + pk.NetworkPacketInfo.IsForwardedPacket = opts.IsForwardedPacket + } return pk } diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index e26225552..a82c807b4 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -55,6 +55,9 @@ type NetworkPacketInfo struct { // LocalAddressBroadcast is true if the packet's local address is a broadcast // address. LocalAddressBroadcast bool + + // IsForwardedPacket is true if the packet is being forwarded. + IsForwardedPacket bool } // TransportErrorKind enumerates error types that are handled by the transport diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index f9acd4bb8..7b9c8cd4f 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -1548,6 +1548,10 @@ type IPForwardingStats struct { // because they contained a link-local destination address. LinkLocalDestination *StatCounter + // PacketTooBig is the number of IP packets which were dropped because they + // were too big for the outgoing MTU. + PacketTooBig *StatCounter + // ExtensionHeaderProblem is the number of IP packets which were dropped // because of a problem encountered when processing an IPv6 extension // header. |