summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/network
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip/network')
-rw-r--r--pkg/tcpip/network/arp/BUILD1
-rw-r--r--pkg/tcpip/network/arp/arp.go84
-rw-r--r--pkg/tcpip/network/arp/arp_test.go201
-rw-r--r--pkg/tcpip/network/fragmentation/fragmentation.go58
-rw-r--r--pkg/tcpip/network/fragmentation/fragmentation_test.go120
-rw-r--r--pkg/tcpip/network/fragmentation/reassembler.go22
-rw-r--r--pkg/tcpip/network/fragmentation/reassembler_test.go23
-rw-r--r--pkg/tcpip/network/ip_test.go8
-rw-r--r--pkg/tcpip/network/ipv4/BUILD1
-rw-r--r--pkg/tcpip/network/ipv4/icmp.go124
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go460
-rw-r--r--pkg/tcpip/network/ipv4/ipv4_test.go834
-rw-r--r--pkg/tcpip/network/ipv6/BUILD1
-rw-r--r--pkg/tcpip/network/ipv6/icmp.go80
-rw-r--r--pkg/tcpip/network/ipv6/icmp_test.go128
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go165
-rw-r--r--pkg/tcpip/network/ipv6/ipv6_test.go444
17 files changed, 2348 insertions, 406 deletions
diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD
index b40dde96b..8a6bcfc2c 100644
--- a/pkg/tcpip/network/arp/BUILD
+++ b/pkg/tcpip/network/arp/BUILD
@@ -30,5 +30,6 @@ go_test(
"//pkg/tcpip/stack",
"//pkg/tcpip/transport/icmp",
"@com_github_google_go_cmp//cmp:go_default_library",
+ "@com_github_google_go_cmp//cmp/cmpopts:go_default_library",
],
)
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index 7df77c66e..a79379abb 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -18,6 +18,7 @@
package arp
import (
+ "fmt"
"sync/atomic"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -150,28 +151,36 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
remoteAddr := tcpip.Address(h.ProtocolAddressSender())
remoteLinkAddr := tcpip.LinkAddress(h.HardwareAddressSender())
- e.nud.HandleProbe(remoteAddr, localAddr, ProtocolNumber, remoteLinkAddr, e.protocol)
+ e.nud.HandleProbe(remoteAddr, ProtocolNumber, remoteLinkAddr, e.protocol)
}
- // As per RFC 826, under Packet Reception:
- // Swap hardware and protocol fields, putting the local hardware and
- // protocol addresses in the sender fields.
- //
- // Send the packet to the (new) target hardware address on the same
- // hardware on which the request was received.
- origSender := h.HardwareAddressSender()
- r.RemoteLinkAddress = tcpip.LinkAddress(origSender)
respPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
ReserveHeaderBytes: int(e.nic.MaxHeaderLength()) + header.ARPSize,
})
packet := header.ARP(respPkt.NetworkHeader().Push(header.ARPSize))
packet.SetIPv4OverEthernet()
packet.SetOp(header.ARPReply)
- copy(packet.HardwareAddressSender(), r.LocalLinkAddress[:])
- copy(packet.ProtocolAddressSender(), h.ProtocolAddressTarget())
- copy(packet.HardwareAddressTarget(), origSender)
- copy(packet.ProtocolAddressTarget(), h.ProtocolAddressSender())
- _ = e.nic.WritePacket(r, nil /* gso */, ProtocolNumber, respPkt)
+ // TODO(gvisor.dev/issue/4582): check copied length once TAP devices have a
+ // link address.
+ _ = copy(packet.HardwareAddressSender(), e.nic.LinkAddress())
+ if n := copy(packet.ProtocolAddressSender(), h.ProtocolAddressTarget()); n != header.IPv4AddressSize {
+ panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize))
+ }
+ origSender := h.HardwareAddressSender()
+ if n := copy(packet.HardwareAddressTarget(), origSender); n != header.EthernetAddressSize {
+ panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.EthernetAddressSize))
+ }
+ if n := copy(packet.ProtocolAddressTarget(), h.ProtocolAddressSender()); n != header.IPv4AddressSize {
+ panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize))
+ }
+
+ // As per RFC 826, under Packet Reception:
+ // Swap hardware and protocol fields, putting the local hardware and
+ // protocol addresses in the sender fields.
+ //
+ // Send the packet to the (new) target hardware address on the same
+ // hardware on which the request was received.
+ _ = e.nic.WritePacketToRemote(tcpip.LinkAddress(origSender), nil /* gso */, ProtocolNumber, respPkt)
case header.ARPReply:
addr := tcpip.Address(h.ProtocolAddressSender())
@@ -199,6 +208,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
// protocol implements stack.NetworkProtocol and stack.LinkAddressResolver.
type protocol struct {
+ stack *stack.Stack
}
func (p *protocol) Number() tcpip.NetworkProtocolNumber { return ProtocolNumber }
@@ -227,26 +237,44 @@ func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
}
// LinkAddressRequest implements stack.LinkAddressResolver.LinkAddressRequest.
-func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP stack.LinkEndpoint) *tcpip.Error {
- r := &stack.Route{
- NetProto: ProtocolNumber,
- RemoteLinkAddress: remoteLinkAddr,
+func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, nic stack.NetworkInterface) *tcpip.Error {
+ if len(remoteLinkAddr) == 0 {
+ remoteLinkAddr = header.EthernetBroadcastAddress
}
- if len(r.RemoteLinkAddress) == 0 {
- r.RemoteLinkAddress = header.EthernetBroadcastAddress
+
+ nicID := nic.ID()
+ if len(localAddr) == 0 {
+ addr, err := p.stack.GetMainNICAddress(nicID, header.IPv4ProtocolNumber)
+ if err != nil {
+ return err
+ }
+
+ if len(addr.Address) == 0 {
+ return tcpip.ErrNetworkUnreachable
+ }
+
+ localAddr = addr.Address
+ } else if p.stack.CheckLocalAddress(nicID, header.IPv4ProtocolNumber, localAddr) == 0 {
+ return tcpip.ErrBadLocalAddress
}
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- ReserveHeaderBytes: int(linkEP.MaxHeaderLength()) + header.ARPSize,
+ ReserveHeaderBytes: int(nic.MaxHeaderLength()) + header.ARPSize,
})
h := header.ARP(pkt.NetworkHeader().Push(header.ARPSize))
+ pkt.NetworkProtocolNumber = ProtocolNumber
h.SetIPv4OverEthernet()
h.SetOp(header.ARPRequest)
- copy(h.HardwareAddressSender(), linkEP.LinkAddress())
- copy(h.ProtocolAddressSender(), localAddr)
- copy(h.ProtocolAddressTarget(), addr)
-
- return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, pkt)
+ // TODO(gvisor.dev/issue/4582): check copied length once TAP devices have a
+ // link address.
+ _ = copy(h.HardwareAddressSender(), nic.LinkAddress())
+ if n := copy(h.ProtocolAddressSender(), localAddr); n != header.IPv4AddressSize {
+ panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize))
+ }
+ if n := copy(h.ProtocolAddressTarget(), targetAddr); n != header.IPv4AddressSize {
+ panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize))
+ }
+ return nic.WritePacketToRemote(remoteLinkAddr, nil /* gso */, ProtocolNumber, pkt)
}
// ResolveStaticAddress implements stack.LinkAddressResolver.ResolveStaticAddress.
@@ -286,6 +314,6 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNu
// Note, to make sure that the ARP endpoint receives ARP packets, the "arp"
// address must be added to every NIC that should respond to ARP requests. See
// ProtocolAddress for more details.
-func NewProtocol(*stack.Stack) stack.NetworkProtocol {
- return &protocol{}
+func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
+ return &protocol{stack: s}
}
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index 626af975a..bf1292bb8 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -22,6 +22,7 @@ import (
"time"
"github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -78,13 +79,11 @@ func (t eventType) String() string {
type eventInfo struct {
eventType eventType
nicID tcpip.NICID
- addr tcpip.Address
- linkAddr tcpip.LinkAddress
- state stack.NeighborState
+ entry stack.NeighborEntry
}
func (e eventInfo) String() string {
- return fmt.Sprintf("%s event for NIC #%d, addr=%q, linkAddr=%q, state=%q", e.eventType, e.nicID, e.addr, e.linkAddr, e.state)
+ return fmt.Sprintf("%s event for NIC #%d, %#v", e.eventType, e.nicID, e.entry)
}
// arpDispatcher implements NUDDispatcher to validate the dispatching of
@@ -96,35 +95,29 @@ type arpDispatcher struct {
var _ stack.NUDDispatcher = (*arpDispatcher)(nil)
-func (d *arpDispatcher) OnNeighborAdded(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) {
+func (d *arpDispatcher) OnNeighborAdded(nicID tcpip.NICID, entry stack.NeighborEntry) {
e := eventInfo{
eventType: entryAdded,
nicID: nicID,
- addr: addr,
- linkAddr: linkAddr,
- state: state,
+ entry: entry,
}
d.C <- e
}
-func (d *arpDispatcher) OnNeighborChanged(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) {
+func (d *arpDispatcher) OnNeighborChanged(nicID tcpip.NICID, entry stack.NeighborEntry) {
e := eventInfo{
eventType: entryChanged,
nicID: nicID,
- addr: addr,
- linkAddr: linkAddr,
- state: state,
+ entry: entry,
}
d.C <- e
}
-func (d *arpDispatcher) OnNeighborRemoved(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) {
+func (d *arpDispatcher) OnNeighborRemoved(nicID tcpip.NICID, entry stack.NeighborEntry) {
e := eventInfo{
eventType: entryRemoved,
nicID: nicID,
- addr: addr,
- linkAddr: linkAddr,
- state: state,
+ entry: entry,
}
d.C <- e
}
@@ -132,7 +125,7 @@ func (d *arpDispatcher) OnNeighborRemoved(nicID tcpip.NICID, addr tcpip.Address,
func (d *arpDispatcher) waitForEvent(ctx context.Context, want eventInfo) error {
select {
case got := <-d.C:
- if diff := cmp.Diff(got, want, cmp.AllowUnexported(got)); diff != "" {
+ if diff := cmp.Diff(got, want, cmp.AllowUnexported(got), cmpopts.IgnoreFields(stack.NeighborEntry{}, "UpdatedAt")); diff != "" {
return fmt.Errorf("got invalid event (-got +want):\n%s", diff)
}
case <-ctx.Done():
@@ -373,9 +366,11 @@ func TestDirectRequestWithNeighborCache(t *testing.T) {
wantEvent := eventInfo{
eventType: entryAdded,
nicID: nicID,
- addr: test.senderAddr,
- linkAddr: tcpip.LinkAddress(test.senderLinkAddr),
- state: stack.Stale,
+ entry: stack.NeighborEntry{
+ Addr: test.senderAddr,
+ LinkAddr: tcpip.LinkAddress(test.senderLinkAddr),
+ State: stack.Stale,
+ },
}
if err := c.nudDisp.waitForEventWithTimeout(wantEvent, time.Second); err != nil {
t.Fatal(err)
@@ -404,9 +399,6 @@ func TestDirectRequestWithNeighborCache(t *testing.T) {
if got, want := neigh.LinkAddr, test.senderLinkAddr; got != want {
t.Errorf("got neighbor LinkAddr = %s, want = %s", got, want)
}
- if got, want := neigh.LocalAddr, stackAddr; got != want {
- t.Errorf("got neighbor LocalAddr = %s, want = %s", got, want)
- }
if got, want := neigh.State, stack.Stale; got != want {
t.Errorf("got neighbor State = %s, want = %s", got, want)
}
@@ -423,43 +415,164 @@ func TestDirectRequestWithNeighborCache(t *testing.T) {
}
}
+var _ stack.NetworkInterface = (*testInterface)(nil)
+
+type testInterface struct {
+ stack.LinkEndpoint
+
+ nicID tcpip.NICID
+}
+
+func (t *testInterface) ID() tcpip.NICID {
+ return t.nicID
+}
+
+func (*testInterface) IsLoopback() bool {
+ return false
+}
+
+func (*testInterface) Name() string {
+ return ""
+}
+
+func (*testInterface) Enabled() bool {
+ return true
+}
+
+func (t *testInterface) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+ r := stack.Route{
+ NetProto: protocol,
+ RemoteLinkAddress: remoteLinkAddr,
+ }
+ return t.LinkEndpoint.WritePacket(&r, gso, protocol, pkt)
+}
+
func TestLinkAddressRequest(t *testing.T) {
+ const nicID = 1
+
+ testAddr := tcpip.Address([]byte{1, 2, 3, 4})
+
tests := []struct {
name string
+ nicAddr tcpip.Address
+ localAddr tcpip.Address
remoteLinkAddr tcpip.LinkAddress
- expectLinkAddr tcpip.LinkAddress
+
+ expectedErr *tcpip.Error
+ expectedLocalAddr tcpip.Address
+ expectedRemoteLinkAddr tcpip.LinkAddress
}{
{
- name: "Unicast",
+ name: "Unicast",
+ nicAddr: stackAddr,
+ localAddr: stackAddr,
+ remoteLinkAddr: remoteLinkAddr,
+ expectedLocalAddr: stackAddr,
+ expectedRemoteLinkAddr: remoteLinkAddr,
+ },
+ {
+ name: "Multicast",
+ nicAddr: stackAddr,
+ localAddr: stackAddr,
+ remoteLinkAddr: "",
+ expectedLocalAddr: stackAddr,
+ expectedRemoteLinkAddr: header.EthernetBroadcastAddress,
+ },
+ {
+ name: "Unicast with unspecified source",
+ nicAddr: stackAddr,
+ remoteLinkAddr: remoteLinkAddr,
+ expectedLocalAddr: stackAddr,
+ expectedRemoteLinkAddr: remoteLinkAddr,
+ },
+ {
+ name: "Multicast with unspecified source",
+ nicAddr: stackAddr,
+ remoteLinkAddr: "",
+ expectedLocalAddr: stackAddr,
+ expectedRemoteLinkAddr: header.EthernetBroadcastAddress,
+ },
+ {
+ name: "Unicast with unassigned address",
+ localAddr: testAddr,
+ remoteLinkAddr: remoteLinkAddr,
+ expectedErr: tcpip.ErrBadLocalAddress,
+ },
+ {
+ name: "Multicast with unassigned address",
+ localAddr: testAddr,
+ remoteLinkAddr: "",
+ expectedErr: tcpip.ErrBadLocalAddress,
+ },
+ {
+ name: "Unicast with no local address available",
remoteLinkAddr: remoteLinkAddr,
- expectLinkAddr: remoteLinkAddr,
+ expectedErr: tcpip.ErrNetworkUnreachable,
},
{
- name: "Multicast",
+ name: "Multicast with no local address available",
remoteLinkAddr: "",
- expectLinkAddr: header.EthernetBroadcastAddress,
+ expectedErr: tcpip.ErrNetworkUnreachable,
},
}
for _, test := range tests {
- p := arp.NewProtocol(nil)
- linkRes, ok := p.(stack.LinkAddressResolver)
- if !ok {
- t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver")
- }
+ t.Run(test.name, func(t *testing.T) {
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol},
+ })
+ p := s.NetworkProtocolInstance(arp.ProtocolNumber)
+ linkRes, ok := p.(stack.LinkAddressResolver)
+ if !ok {
+ t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver")
+ }
- linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr)
- if err := linkRes.LinkAddressRequest(stackAddr, remoteAddr, test.remoteLinkAddr, linkEP); err != nil {
- t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", stackAddr, remoteAddr, test.remoteLinkAddr, err)
- }
+ linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr)
+ if err := s.CreateNIC(nicID, linkEP); err != nil {
+ t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
+ }
- pkt, ok := linkEP.Read()
- if !ok {
- t.Fatal("expected to send a link address request")
- }
+ if len(test.nicAddr) != 0 {
+ if err := s.AddAddress(nicID, ipv4.ProtocolNumber, test.nicAddr); err != nil {
+ t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, ipv4.ProtocolNumber, test.nicAddr, err)
+ }
+ }
- if got, want := pkt.Route.RemoteLinkAddress, test.expectLinkAddr; got != want {
- t.Errorf("got pkt.Route.RemoteLinkAddress = %s, want = %s", got, want)
- }
+ // We pass a test network interface to LinkAddressRequest with the same
+ // NIC ID and link endpoint used by the NIC we created earlier so that we
+ // can mock a link address request and observe the packets sent to the
+ // link endpoint even though the stack uses the real NIC to validate the
+ // local address.
+ if err := linkRes.LinkAddressRequest(remoteAddr, test.localAddr, test.remoteLinkAddr, &testInterface{LinkEndpoint: linkEP, nicID: nicID}); err != test.expectedErr {
+ t.Fatalf("got p.LinkAddressRequest(%s, %s, %s, _) = %s, want = %s", remoteAddr, test.localAddr, test.remoteLinkAddr, err, test.expectedErr)
+ }
+
+ if test.expectedErr != nil {
+ return
+ }
+
+ pkt, ok := linkEP.Read()
+ if !ok {
+ t.Fatal("expected to send a link address request")
+ }
+
+ if pkt.Route.RemoteLinkAddress != test.expectedRemoteLinkAddr {
+ t.Errorf("got pkt.Route.RemoteLinkAddress = %s, want = %s", pkt.Route.RemoteLinkAddress, test.expectedRemoteLinkAddr)
+ }
+
+ rep := header.ARP(stack.PayloadSince(pkt.Pkt.NetworkHeader()))
+ if got := tcpip.LinkAddress(rep.HardwareAddressSender()); got != stackLinkAddr {
+ t.Errorf("got HardwareAddressSender = %s, want = %s", got, stackLinkAddr)
+ }
+ if got := tcpip.Address(rep.ProtocolAddressSender()); got != test.expectedLocalAddr {
+ t.Errorf("got ProtocolAddressSender = %s, want = %s", got, test.expectedLocalAddr)
+ }
+ if got, want := tcpip.LinkAddress(rep.HardwareAddressTarget()), tcpip.LinkAddress("\x00\x00\x00\x00\x00\x00"); got != want {
+ t.Errorf("got HardwareAddressTarget = %s, want = %s", got, want)
+ }
+ if got := tcpip.Address(rep.ProtocolAddressTarget()); got != remoteAddr {
+ t.Errorf("got ProtocolAddressTarget = %s, want = %s", got, remoteAddr)
+ }
+ })
}
}
diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go
index ed502a473..936601287 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation.go
@@ -136,8 +136,16 @@ func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, rea
// proto is the protocol number marked in the fragment being processed. It has
// to be given here outside of the FragmentID struct because IPv6 should not use
// the protocol to identify a fragment.
+//
+// releaseCB is a callback that will run when the fragment reassembly of a
+// packet is complete or cancelled. releaseCB take a a boolean argument which is
+// true iff the reassembly is cancelled due to timeout. releaseCB should be
+// passed only with the first fragment of a packet. If more than one releaseCB
+// are passed for the same packet, only the first releaseCB will be saved for
+// the packet and the succeeding ones will be dropped by running them
+// immediately with a false argument.
func (f *Fragmentation) Process(
- id FragmentID, first, last uint16, more bool, proto uint8, vv buffer.VectorisedView) (
+ id FragmentID, first, last uint16, more bool, proto uint8, vv buffer.VectorisedView, releaseCB func(bool)) (
buffer.VectorisedView, uint8, bool, error) {
if first > last {
return buffer.VectorisedView{}, 0, false, fmt.Errorf("first=%d is greater than last=%d: %w", first, last, ErrInvalidArgs)
@@ -171,6 +179,12 @@ func (f *Fragmentation) Process(
f.releaseReassemblersLocked()
}
}
+ if releaseCB != nil {
+ if !r.setCallback(releaseCB) {
+ // We got a duplicate callback. Release it immediately.
+ releaseCB(false /* timedOut */)
+ }
+ }
f.mu.Unlock()
res, firstFragmentProto, done, consumed, err := r.process(first, last, more, proto, vv)
@@ -178,14 +192,14 @@ func (f *Fragmentation) Process(
// We probably got an invalid sequence of fragments. Just
// discard the reassembler and move on.
f.mu.Lock()
- f.release(r)
+ f.release(r, false /* timedOut */)
f.mu.Unlock()
return buffer.VectorisedView{}, 0, false, fmt.Errorf("fragmentation processing error: %w", err)
}
f.mu.Lock()
f.size += consumed
if done {
- f.release(r)
+ f.release(r, false /* timedOut */)
}
// Evict reassemblers if we are consuming more memory than highLimit until
// we reach lowLimit.
@@ -195,14 +209,14 @@ func (f *Fragmentation) Process(
if tail == nil {
break
}
- f.release(tail)
+ f.release(tail, false /* timedOut */)
}
}
f.mu.Unlock()
return res, firstFragmentProto, done, nil
}
-func (f *Fragmentation) release(r *reassembler) {
+func (f *Fragmentation) release(r *reassembler, timedOut bool) {
// Before releasing a fragment we need to check if r is already marked as done.
// Otherwise, we would delete it twice.
if r.checkDoneOrMark() {
@@ -216,6 +230,8 @@ func (f *Fragmentation) release(r *reassembler) {
log.Printf("memory counter < 0 (%d), this is an accounting bug that requires investigation", f.size)
f.size = 0
}
+
+ r.release(timedOut) // releaseCB may run.
}
// releaseReassemblersLocked releases already-expired reassemblers, then
@@ -238,31 +254,31 @@ func (f *Fragmentation) releaseReassemblersLocked() {
break
}
// If the oldest reassembler has already expired, release it.
- f.release(r)
+ f.release(r, true /* timedOut*/)
}
}
// PacketFragmenter is the book-keeping struct for packet fragmentation.
type PacketFragmenter struct {
- transportHeader buffer.View
- data buffer.VectorisedView
- reserve int
- innerMTU int
- fragmentCount int
- currentFragment int
- fragmentOffset int
+ transportHeader buffer.View
+ data buffer.VectorisedView
+ reserve int
+ fragmentPayloadLen int
+ fragmentCount int
+ currentFragment int
+ fragmentOffset int
}
// MakePacketFragmenter prepares the struct needed for packet fragmentation.
//
// pkt is the packet to be fragmented.
//
-// innerMTU is the maximum number of bytes of fragmentable data a fragment can
+// fragmentPayloadLen is the maximum number of bytes of fragmentable data a fragment can
// have.
//
// reserve is the number of bytes that should be reserved for the headers in
// each generated fragment.
-func MakePacketFragmenter(pkt *stack.PacketBuffer, innerMTU int, reserve int) PacketFragmenter {
+func MakePacketFragmenter(pkt *stack.PacketBuffer, fragmentPayloadLen uint32, reserve int) PacketFragmenter {
// As per RFC 8200 Section 4.5, some IPv6 extension headers should not be
// repeated in each fragment. However we do not currently support any header
// of that kind yet, so the following computation is valid for both IPv4 and
@@ -273,13 +289,13 @@ func MakePacketFragmenter(pkt *stack.PacketBuffer, innerMTU int, reserve int) Pa
var fragmentableData buffer.VectorisedView
fragmentableData.AppendView(pkt.TransportHeader().View())
fragmentableData.Append(pkt.Data)
- fragmentCount := (fragmentableData.Size() + innerMTU - 1) / innerMTU
+ fragmentCount := (uint32(fragmentableData.Size()) + fragmentPayloadLen - 1) / fragmentPayloadLen
return PacketFragmenter{
- data: fragmentableData,
- reserve: reserve,
- innerMTU: innerMTU,
- fragmentCount: fragmentCount,
+ data: fragmentableData,
+ reserve: reserve,
+ fragmentPayloadLen: int(fragmentPayloadLen),
+ fragmentCount: int(fragmentCount),
}
}
@@ -302,7 +318,7 @@ func (pf *PacketFragmenter) BuildNextFragment() (*stack.PacketBuffer, int, int,
})
// Copy data for the fragment.
- copied := pf.data.ReadToVV(&fragPkt.Data, pf.innerMTU)
+ copied := pf.data.ReadToVV(&fragPkt.Data, pf.fragmentPayloadLen)
offset := pf.fragmentOffset
pf.fragmentOffset += copied
diff --git a/pkg/tcpip/network/fragmentation/fragmentation_test.go b/pkg/tcpip/network/fragmentation/fragmentation_test.go
index d3c7d7f92..5dcd10730 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation_test.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation_test.go
@@ -105,7 +105,7 @@ func TestFragmentationProcess(t *testing.T) {
f := NewFragmentation(minBlockSize, 1024, 512, reassembleTimeout, &faketime.NullClock{})
firstFragmentProto := c.in[0].proto
for i, in := range c.in {
- vv, proto, done, err := f.Process(in.id, in.first, in.last, in.more, in.proto, in.vv)
+ vv, proto, done, err := f.Process(in.id, in.first, in.last, in.more, in.proto, in.vv, nil)
if err != nil {
t.Fatalf("f.Process(%+v, %d, %d, %t, %d, %X) failed: %s",
in.id, in.first, in.last, in.more, in.proto, in.vv.ToView(), err)
@@ -240,7 +240,7 @@ func TestReassemblingTimeout(t *testing.T) {
for _, event := range test.events {
clock.Advance(event.clockAdvance)
if frag := event.fragment; frag != nil {
- _, _, done, err := f.Process(FragmentID{}, frag.first, frag.last, frag.more, protocol, vv(len(frag.data), frag.data))
+ _, _, done, err := f.Process(FragmentID{}, frag.first, frag.last, frag.more, protocol, vv(len(frag.data), frag.data), nil)
if err != nil {
t.Fatalf("%s: f.Process failed: %s", event.name, err)
}
@@ -259,15 +259,15 @@ func TestReassemblingTimeout(t *testing.T) {
func TestMemoryLimits(t *testing.T) {
f := NewFragmentation(minBlockSize, 3, 1, reassembleTimeout, &faketime.NullClock{})
// Send first fragment with id = 0.
- f.Process(FragmentID{ID: 0}, 0, 0, true, 0xFF, vv(1, "0"))
+ f.Process(FragmentID{ID: 0}, 0, 0, true, 0xFF, vv(1, "0"), nil)
// Send first fragment with id = 1.
- f.Process(FragmentID{ID: 1}, 0, 0, true, 0xFF, vv(1, "1"))
+ f.Process(FragmentID{ID: 1}, 0, 0, true, 0xFF, vv(1, "1"), nil)
// Send first fragment with id = 2.
- f.Process(FragmentID{ID: 2}, 0, 0, true, 0xFF, vv(1, "2"))
+ f.Process(FragmentID{ID: 2}, 0, 0, true, 0xFF, vv(1, "2"), nil)
// Send first fragment with id = 3. This should caused id = 0 and id = 1 to be
// evicted.
- f.Process(FragmentID{ID: 3}, 0, 0, true, 0xFF, vv(1, "3"))
+ f.Process(FragmentID{ID: 3}, 0, 0, true, 0xFF, vv(1, "3"), nil)
if _, ok := f.reassemblers[FragmentID{ID: 0}]; ok {
t.Errorf("Memory limits are not respected: id=0 has not been evicted.")
@@ -283,9 +283,9 @@ func TestMemoryLimits(t *testing.T) {
func TestMemoryLimitsIgnoresDuplicates(t *testing.T) {
f := NewFragmentation(minBlockSize, 1, 0, reassembleTimeout, &faketime.NullClock{})
// Send first fragment with id = 0.
- f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"))
+ f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"), nil)
// Send the same packet again.
- f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"))
+ f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"), nil)
got := f.size
want := 1
@@ -377,7 +377,7 @@ func TestErrors(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
f := NewFragmentation(test.blockSize, HighFragThreshold, LowFragThreshold, reassembleTimeout, &faketime.NullClock{})
- _, _, done, err := f.Process(FragmentID{}, test.first, test.last, test.more, 0, vv(len(test.data), test.data))
+ _, _, done, err := f.Process(FragmentID{}, test.first, test.last, test.more, 0, vv(len(test.data), test.data), nil)
if !errors.Is(err, test.err) {
t.Errorf("got Process(_, %d, %d, %t, _, %q) = (_, _, _, %v), want = (_, _, _, %v)", test.first, test.last, test.more, test.data, err, test.err)
}
@@ -403,14 +403,14 @@ func TestPacketFragmenter(t *testing.T) {
tests := []struct {
name string
- innerMTU int
+ fragmentPayloadLen uint32
transportHeaderLen int
payloadSize int
wantFragments []fragmentInfo
}{
{
name: "Packet exactly fits in MTU",
- innerMTU: 1280,
+ fragmentPayloadLen: 1280,
transportHeaderLen: 0,
payloadSize: 1280,
wantFragments: []fragmentInfo{
@@ -419,7 +419,7 @@ func TestPacketFragmenter(t *testing.T) {
},
{
name: "Packet exactly does not fit in MTU",
- innerMTU: 1000,
+ fragmentPayloadLen: 1000,
transportHeaderLen: 0,
payloadSize: 1001,
wantFragments: []fragmentInfo{
@@ -429,7 +429,7 @@ func TestPacketFragmenter(t *testing.T) {
},
{
name: "Packet has a transport header",
- innerMTU: 560,
+ fragmentPayloadLen: 560,
transportHeaderLen: 40,
payloadSize: 560,
wantFragments: []fragmentInfo{
@@ -439,7 +439,7 @@ func TestPacketFragmenter(t *testing.T) {
},
{
name: "Packet has a huge transport header",
- innerMTU: 500,
+ fragmentPayloadLen: 500,
transportHeaderLen: 1300,
payloadSize: 500,
wantFragments: []fragmentInfo{
@@ -458,7 +458,7 @@ func TestPacketFragmenter(t *testing.T) {
originalPayload.AppendView(pkt.TransportHeader().View())
originalPayload.Append(pkt.Data)
var reassembledPayload buffer.VectorisedView
- pf := MakePacketFragmenter(pkt, test.innerMTU, reserve)
+ pf := MakePacketFragmenter(pkt, test.fragmentPayloadLen, reserve)
for i := 0; ; i++ {
fragPkt, offset, copied, more := pf.BuildNextFragment()
wantFragment := test.wantFragments[i]
@@ -474,8 +474,8 @@ func TestPacketFragmenter(t *testing.T) {
if more != wantFragment.more {
t.Errorf("(fragment #%d) got more = %t, want = %t", i, more, wantFragment.more)
}
- if got := fragPkt.Size(); got > test.innerMTU {
- t.Errorf("(fragment #%d) got fragPkt.Size() = %d, want <= %d", i, got, test.innerMTU)
+ if got := uint32(fragPkt.Size()); got > test.fragmentPayloadLen {
+ t.Errorf("(fragment #%d) got fragPkt.Size() = %d, want <= %d", i, got, test.fragmentPayloadLen)
}
if got := fragPkt.AvailableHeaderBytes(); got != reserve {
t.Errorf("(fragment #%d) got fragPkt.AvailableHeaderBytes() = %d, want = %d", i, got, reserve)
@@ -497,3 +497,89 @@ func TestPacketFragmenter(t *testing.T) {
})
}
}
+
+func TestReleaseCallback(t *testing.T) {
+ const (
+ proto = 99
+ )
+
+ var result int
+ var callbackReasonIsTimeout bool
+ cb1 := func(timedOut bool) { result = 1; callbackReasonIsTimeout = timedOut }
+ cb2 := func(timedOut bool) { result = 2; callbackReasonIsTimeout = timedOut }
+
+ tests := []struct {
+ name string
+ callbacks []func(bool)
+ timeout bool
+ wantResult int
+ wantCallbackReasonIsTimeout bool
+ }{
+ {
+ name: "callback runs on release",
+ callbacks: []func(bool){cb1},
+ timeout: false,
+ wantResult: 1,
+ wantCallbackReasonIsTimeout: false,
+ },
+ {
+ name: "first callback is nil",
+ callbacks: []func(bool){nil, cb2},
+ timeout: false,
+ wantResult: 2,
+ wantCallbackReasonIsTimeout: false,
+ },
+ {
+ name: "two callbacks - first one is set",
+ callbacks: []func(bool){cb1, cb2},
+ timeout: false,
+ wantResult: 1,
+ wantCallbackReasonIsTimeout: false,
+ },
+ {
+ name: "callback runs on timeout",
+ callbacks: []func(bool){cb1},
+ timeout: true,
+ wantResult: 1,
+ wantCallbackReasonIsTimeout: true,
+ },
+ {
+ name: "no callbacks",
+ callbacks: []func(bool){nil},
+ timeout: false,
+ wantResult: 0,
+ wantCallbackReasonIsTimeout: false,
+ },
+ }
+
+ id := FragmentID{ID: 0}
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ result = 0
+ callbackReasonIsTimeout = false
+
+ f := NewFragmentation(minBlockSize, HighFragThreshold, LowFragThreshold, reassembleTimeout, &faketime.NullClock{})
+
+ for i, cb := range test.callbacks {
+ _, _, _, err := f.Process(id, uint16(i), uint16(i), true, proto, vv(1, "0"), cb)
+ if err != nil {
+ t.Errorf("f.Process error = %s", err)
+ }
+ }
+
+ r, ok := f.reassemblers[id]
+ if !ok {
+ t.Fatalf("Reassemberr not found")
+ }
+ f.release(r, test.timeout)
+
+ if result != test.wantResult {
+ t.Errorf("got result = %d, want = %d", result, test.wantResult)
+ }
+ if callbackReasonIsTimeout != test.wantCallbackReasonIsTimeout {
+ t.Errorf("got callbackReasonIsTimeout = %t, want = %t", callbackReasonIsTimeout, test.wantCallbackReasonIsTimeout)
+ }
+ })
+ }
+}
diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go
index 9bb051a30..c0cc0bde0 100644
--- a/pkg/tcpip/network/fragmentation/reassembler.go
+++ b/pkg/tcpip/network/fragmentation/reassembler.go
@@ -41,6 +41,7 @@ type reassembler struct {
heap fragHeap
done bool
creationTime int64
+ callback func(bool)
}
func newReassembler(id FragmentID, clock tcpip.Clock) *reassembler {
@@ -123,3 +124,24 @@ func (r *reassembler) checkDoneOrMark() bool {
r.mu.Unlock()
return prev
}
+
+func (r *reassembler) setCallback(c func(bool)) bool {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ if r.callback != nil {
+ return false
+ }
+ r.callback = c
+ return true
+}
+
+func (r *reassembler) release(timedOut bool) {
+ r.mu.Lock()
+ callback := r.callback
+ r.callback = nil
+ r.mu.Unlock()
+
+ if callback != nil {
+ callback(timedOut)
+ }
+}
diff --git a/pkg/tcpip/network/fragmentation/reassembler_test.go b/pkg/tcpip/network/fragmentation/reassembler_test.go
index a0a04a027..fa2a70dc8 100644
--- a/pkg/tcpip/network/fragmentation/reassembler_test.go
+++ b/pkg/tcpip/network/fragmentation/reassembler_test.go
@@ -105,3 +105,26 @@ func TestUpdateHoles(t *testing.T) {
}
}
}
+
+func TestSetCallback(t *testing.T) {
+ result := 0
+ reasonTimeout := false
+
+ cb1 := func(timedOut bool) { result = 1; reasonTimeout = timedOut }
+ cb2 := func(timedOut bool) { result = 2; reasonTimeout = timedOut }
+
+ r := newReassembler(FragmentID{}, &faketime.NullClock{})
+ if !r.setCallback(cb1) {
+ t.Errorf("setCallback failed")
+ }
+ if r.setCallback(cb2) {
+ t.Errorf("setCallback should fail if one is already set")
+ }
+ r.release(true)
+ if result != 1 {
+ t.Errorf("got result = %d, want = 1", result)
+ }
+ if !reasonTimeout {
+ t.Errorf("got reasonTimeout = %t, want = true", reasonTimeout)
+ }
+}
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index f20b94d97..969579601 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -304,6 +304,10 @@ func (t *testInterface) setEnabled(v bool) {
t.mu.disabled = !v
}
+func (*testInterface) WritePacketToRemote(tcpip.LinkAddress, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) *tcpip.Error {
+ return tcpip.ErrNotSupported
+}
+
func TestSourceAddressValidation(t *testing.T) {
rxIPv4ICMP := func(e *channel.Endpoint, src tcpip.Address) {
totalLen := header.IPv4MinimumSize + header.ICMPv4MinimumSize
@@ -690,6 +694,10 @@ func TestIPv4ReceiveControl(t *testing.T) {
view[i] = uint8(i)
}
+ icmp.SetChecksum(0)
+ checksum := ^header.Checksum(icmp, 0 /* initial */)
+ icmp.SetChecksum(checksum)
+
// Give packet to IPv4 endpoint, dispatcher will validate that
// it's ok.
nic.testObject.protocol = 10
diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD
index 7fc12e229..6252614ec 100644
--- a/pkg/tcpip/network/ipv4/BUILD
+++ b/pkg/tcpip/network/ipv4/BUILD
@@ -29,6 +29,7 @@ go_test(
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/checker",
+ "//pkg/tcpip/faketime",
"//pkg/tcpip/header",
"//pkg/tcpip/link/channel",
"//pkg/tcpip/link/sniffer",
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 3407755ed..cf287446e 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -15,6 +15,7 @@
package ipv4
import (
+ "errors"
"fmt"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -23,10 +24,10 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
-// handleControl handles the case when an ICMP packet contains the headers of
-// the original packet that caused the ICMP one to be sent. This information is
-// used to find out which transport endpoint must be notified about the ICMP
-// packet.
+// handleControl handles the case when an ICMP error packet contains the headers
+// of the original packet that caused the ICMP one to be sent. This information
+// is used to find out which transport endpoint must be notified about the ICMP
+// packet. We only expect the payload, not the enclosing ICMP packet.
func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
if !ok {
@@ -73,20 +74,65 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
}
h := header.ICMPv4(v)
+ // Only do in-stack processing if the checksum is correct.
+ if header.ChecksumVV(pkt.Data, 0 /* initial */) != 0xffff {
+ received.Invalid.Increment()
+ // It's possible that a raw socket expects to receive this regardless
+ // of checksum errors. If it's an echo request we know it's safe because
+ // we are the only handler, however other types do not cope well with
+ // packets with checksum errors.
+ switch h.Type() {
+ case header.ICMPv4Echo:
+ e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, pkt)
+ }
+ return
+ }
+
+ iph := header.IPv4(pkt.NetworkHeader().View())
+ var newOptions header.IPv4Options
+ if len(iph) > header.IPv4MinimumSize {
+ // RFC 1122 section 3.2.2.6 (page 43) (and similar for other round trip
+ // type ICMP packets):
+ // If a Record Route and/or Time Stamp option is received in an
+ // ICMP Echo Request, this option (these options) SHOULD be
+ // updated to include the current host and included in the IP
+ // header of the Echo Reply message, without "truncation".
+ // Thus, the recorded route will be for the entire round trip.
+ //
+ // So we need to let the option processor know how it should handle them.
+ var op optionsUsage
+ if h.Type() == header.ICMPv4Echo {
+ op = &optionUsageEcho{}
+ } else {
+ op = &optionUsageReceive{}
+ }
+ aux, tmp, err := processIPOptions(r, iph.Options(), op)
+ if err != nil {
+ switch {
+ case
+ errors.Is(err, header.ErrIPv4OptDuplicate),
+ errors.Is(err, errIPv4RecordRouteOptInvalidLength),
+ errors.Is(err, errIPv4RecordRouteOptInvalidPointer),
+ errors.Is(err, errIPv4TimestampOptInvalidLength),
+ errors.Is(err, errIPv4TimestampOptInvalidPointer),
+ errors.Is(err, errIPv4TimestampOptOverflow):
+ _ = e.protocol.returnError(r, &icmpReasonParamProblem{pointer: aux}, pkt)
+ e.protocol.stack.Stats().MalformedRcvdPackets.Increment()
+ r.Stats().IP.MalformedPacketsReceived.Increment()
+ }
+ return
+ }
+ newOptions = tmp
+ }
+
// TODO(b/112892170): Meaningfully handle all ICMP types.
switch h.Type() {
case header.ICMPv4Echo:
received.Echo.Increment()
- // Only send a reply if the checksum is valid.
- headerChecksum := h.Checksum()
- h.SetChecksum(0)
- calculatedChecksum := ^header.ChecksumVV(pkt.Data, 0 /* initial */)
- h.SetChecksum(headerChecksum)
- if calculatedChecksum != headerChecksum {
- // It's possible that a raw socket still expects to receive this.
- e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, pkt)
- received.Invalid.Increment()
+ sent := stats.ICMP.V4PacketsSent
+ if !r.Stack().AllowICMPMessage() {
+ sent.RateLimited.Increment()
return
}
@@ -98,9 +144,14 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
// waiting endpoints. Consider moving responsibility for doing the copy to
// DeliverTransportPacket so that is is only done when needed.
replyData := pkt.Data.ToOwnedView()
- replyIPHdr := header.IPv4(append(buffer.View(nil), pkt.NetworkHeader().View()...))
+ // It's possible that a raw socket expects to receive this.
e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, pkt)
+ pkt = nil
+ // Take the base of the incoming request IP header but replace the options.
+ replyHeaderLength := uint8(header.IPv4MinimumSize + len(newOptions))
+ replyIPHdr := header.IPv4(append(iph[:header.IPv4MinimumSize:header.IPv4MinimumSize], newOptions...))
+ replyIPHdr.SetHeaderLength(replyHeaderLength)
// As per RFC 1122 section 3.2.1.3, when a host sends any datagram, the IP
// source address MUST be one of its own IP addresses (but not a broadcast
@@ -139,7 +190,8 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
// The fields we need to alter.
//
// We need to produce the entire packet in the data segment in order to
- // use WriteHeaderIncludedPacket().
+ // use WriteHeaderIncludedPacket(). WriteHeaderIncludedPacket sets the
+ // total length and the header checksum so we don't need to set those here.
replyIPHdr.SetSourceAddress(r.LocalAddress)
replyIPHdr.SetDestinationAddress(r.RemoteAddress)
replyIPHdr.SetTTL(r.DefaultTTL())
@@ -157,8 +209,6 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
})
replyPkt.TransportProtocolNumber = header.ICMPv4ProtocolNumber
- // The checksum will be calculated so we don't need to do it here.
- sent := stats.ICMP.V4PacketsSent
if err := r.WriteHeaderIncludedPacket(replyPkt); err != nil {
sent.Dropped.Increment()
return
@@ -182,8 +232,11 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
e.handleControl(stack.ControlPortUnreachable, 0, pkt)
case header.ICMPv4FragmentationNeeded:
- mtu := uint32(h.MTU())
- e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), pkt)
+ networkMTU, err := calculateNetworkMTU(uint32(h.MTU()), header.IPv4MinimumSize)
+ if err != nil {
+ networkMTU = 0
+ }
+ e.handleControl(stack.ControlPacketTooBig, networkMTU, pkt)
}
case header.ICMPv4SrcQuench:
@@ -234,6 +287,21 @@ type icmpReasonProtoUnreachable struct{}
func (*icmpReasonProtoUnreachable) isICMPReason() {}
+// icmpReasonReassemblyTimeout is an error where insufficient fragments are
+// received to complete reassembly of a packet within a configured time after
+// the reception of the first-arriving fragment of that packet.
+type icmpReasonReassemblyTimeout struct{}
+
+func (*icmpReasonReassemblyTimeout) isICMPReason() {}
+
+// icmpReasonParamProblem is an error to use to request a Parameter Problem
+// message to be sent.
+type icmpReasonParamProblem struct {
+ pointer byte
+}
+
+func (*icmpReasonParamProblem) isICMPReason() {}
+
// returnError takes an error descriptor and generates the appropriate ICMP
// error packet for IPv4 and sends it back to the remote device that sent
// the problematic packet. It incorporates as much of that packet as
@@ -374,17 +442,29 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
icmpPkt.TransportProtocolNumber = header.ICMPv4ProtocolNumber
icmpHdr := header.ICMPv4(icmpPkt.TransportHeader().Push(header.ICMPv4MinimumSize))
- switch reason.(type) {
+ var counter *tcpip.StatCounter
+ switch reason := reason.(type) {
case *icmpReasonPortUnreachable:
+ icmpHdr.SetType(header.ICMPv4DstUnreachable)
icmpHdr.SetCode(header.ICMPv4PortUnreachable)
+ counter = sent.DstUnreachable
case *icmpReasonProtoUnreachable:
+ icmpHdr.SetType(header.ICMPv4DstUnreachable)
icmpHdr.SetCode(header.ICMPv4ProtoUnreachable)
+ counter = sent.DstUnreachable
+ case *icmpReasonReassemblyTimeout:
+ icmpHdr.SetType(header.ICMPv4TimeExceeded)
+ icmpHdr.SetCode(header.ICMPv4ReassemblyTimeout)
+ counter = sent.TimeExceeded
+ case *icmpReasonParamProblem:
+ icmpHdr.SetType(header.ICMPv4ParamProblem)
+ icmpHdr.SetCode(header.ICMPv4UnusedCode)
+ icmpHdr.SetPointer(reason.pointer)
+ counter = sent.ParamProblem
default:
panic(fmt.Sprintf("unsupported ICMP type %T", reason))
}
- icmpHdr.SetType(header.ICMPv4DstUnreachable)
icmpHdr.SetChecksum(header.ICMPv4Checksum(icmpHdr, icmpPkt.Data))
- counter := sent.DstUnreachable
if err := route.WritePacket(
nil, /* gso */
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index e7c58ae0a..4592984a5 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -16,7 +16,9 @@
package ipv4
import (
+ "errors"
"fmt"
+ "math"
"sync/atomic"
"time"
@@ -31,6 +33,8 @@ import (
)
const (
+ // ReassembleTimeout is the time a packet stays in the reassembly
+ // system before being evicted.
// As per RFC 791 section 3.2:
// The current recommendation for the initial timer setting is 15 seconds.
// This may be changed as experience with this protocol accumulates.
@@ -38,7 +42,7 @@ const (
// Considering that it is an old recommendation, we use the same reassembly
// timeout that linux defines, which is 30 seconds:
// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ip.h#L138
- reassembleTimeout = 30 * time.Second
+ ReassembleTimeout = 30 * time.Second
// ProtocolNumber is the ipv4 protocol number.
ProtocolNumber = header.IPv4ProtocolNumber
@@ -176,7 +180,11 @@ func (e *endpoint) DefaultTTL() uint8 {
// MTU implements stack.NetworkEndpoint.MTU. It returns the link-layer MTU minus
// the network layer max header length.
func (e *endpoint) MTU() uint32 {
- return calculateMTU(e.nic.MTU())
+ networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv4MinimumSize)
+ if err != nil {
+ return 0
+ }
+ return networkMTU
}
// MaxHeaderLength returns the maximum length needed by ipv4 headers (and
@@ -211,18 +219,15 @@ func (e *endpoint) addIPHeader(r *stack.Route, pkt *stack.PacketBuffer, params s
pkt.NetworkProtocolNumber = ProtocolNumber
}
-func (e *endpoint) packetMustBeFragmented(pkt *stack.PacketBuffer, gso *stack.GSO) bool {
- return (gso == nil || gso.Type == stack.GSONone) && pkt.Size() > int(e.nic.MTU())
-}
-
// handleFragments fragments pkt and calls the handler function on each
// fragment. It returns the number of fragments handled and the number of
// fragments left to be processed. The IP header must already be present in the
-// original packet. The mtu is the maximum size of the packets.
-func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, mtu uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) {
- fragMTU := int(calculateFragmentInnerMTU(mtu, pkt))
+// original packet.
+func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) {
+ // Round the MTU down to align to 8 bytes.
+ fragmentPayloadSize := networkMTU &^ 7
networkHeader := header.IPv4(pkt.NetworkHeader().View())
- pf := fragmentation.MakePacketFragmenter(pkt, fragMTU, pkt.AvailableHeaderBytes()+len(networkHeader))
+ pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadSize, pkt.AvailableHeaderBytes()+len(networkHeader))
var n int
for {
@@ -280,8 +285,14 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet
return nil
}
- if e.packetMustBeFragmented(pkt, gso) {
- sent, remain, err := e.handleFragments(r, gso, e.nic.MTU(), pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
+ networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
+ if err != nil {
+ r.Stats().IP.OutgoingPacketErrors.Increment()
+ return err
+ }
+
+ if packetMustBeFragmented(pkt, networkMTU, gso) {
+ sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
// TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each
// fragment one by one using WritePacket() (current strategy) or if we
// want to create a PacketBufferList from the fragments and feed it to
@@ -292,6 +303,7 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet
r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(remain))
return err
}
+
if err := e.nic.WritePacket(r, gso, ProtocolNumber, pkt); err != nil {
r.Stats().IP.OutgoingPacketErrors.Increment()
return err
@@ -311,17 +323,23 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
e.addIPHeader(r, pkt, params)
- if e.packetMustBeFragmented(pkt, gso) {
+ networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
+ if err != nil {
+ r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len()))
+ return 0, err
+ }
+
+ if packetMustBeFragmented(pkt, networkMTU, gso) {
// Keep track of the packet that is about to be fragmented so it can be
// removed once the fragmentation is done.
originalPkt := pkt
- if _, _, err := e.handleFragments(r, gso, e.nic.MTU(), pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
+ if _, _, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
// Modify the packet list in place with the new fragments.
pkts.InsertAfter(pkt, fragPkt)
pkt = fragPkt
return nil
}); err != nil {
- panic(fmt.Sprintf("e.handleFragments(_, _, %d, _, _) = %s", e.nic.MTU(), err))
+ panic(fmt.Sprintf("e.handleFragments(_, _, %d, _, _) = %s", networkMTU, err))
}
// Remove the packet that was just fragmented and process the rest.
pkts.Remove(originalPkt)
@@ -506,6 +524,28 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
r.Stats().IP.MalformedFragmentsReceived.Increment()
return
}
+
+ // Set up a callback in case we need to send a Time Exceeded Message, as per
+ // RFC 792:
+ //
+ // If a host reassembling a fragmented datagram cannot complete the
+ // reassembly due to missing fragments within its time limit it discards
+ // the datagram, and it may send a time exceeded message.
+ //
+ // If fragment zero is not available then no time exceeded need be sent at
+ // all.
+ var releaseCB func(bool)
+ if start == 0 {
+ pkt := pkt.Clone()
+ r := r.Clone()
+ releaseCB = func(timedOut bool) {
+ if timedOut {
+ _ = e.protocol.returnError(&r, &icmpReasonReassemblyTimeout{}, pkt)
+ }
+ r.Release()
+ }
+ }
+
var ready bool
var err error
proto := h.Protocol()
@@ -523,6 +563,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
h.More(),
proto,
pkt.Data,
+ releaseCB,
)
if err != nil {
r.Stats().IP.MalformedPacketsReceived.Increment()
@@ -532,9 +573,14 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
if !ready {
return
}
- }
+ // The reassembler doesn't take care of fixing up the header, so we need
+ // to do it here.
+ h.SetTotalLength(uint16(pkt.Data.Size() + len((h))))
+ h.SetFlagsFragmentOffset(0, 0)
+ }
r.Stats().IP.PacketsDelivered.Increment()
+
p := h.TransportProtocol()
if p == header.ICMPv4ProtocolNumber {
// TODO(gvisor.dev/issues/3810): when we sort out ICMP and transport
@@ -544,6 +590,27 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
e.handleICMP(r, pkt)
return
}
+ if len(h.Options()) != 0 {
+ // TODO(gvisor.dev/issue/4586):
+ // When we add forwarding support we should use the verified options
+ // rather than just throwing them away.
+ aux, _, err := processIPOptions(r, h.Options(), &optionUsageReceive{})
+ if err != nil {
+ switch {
+ case
+ errors.Is(err, header.ErrIPv4OptDuplicate),
+ errors.Is(err, errIPv4RecordRouteOptInvalidPointer),
+ errors.Is(err, errIPv4RecordRouteOptInvalidLength),
+ errors.Is(err, errIPv4TimestampOptInvalidLength),
+ errors.Is(err, errIPv4TimestampOptInvalidPointer),
+ errors.Is(err, errIPv4TimestampOptOverflow):
+ _ = e.protocol.returnError(r, &icmpReasonParamProblem{pointer: aux}, pkt)
+ e.protocol.stack.Stats().MalformedRcvdPackets.Increment()
+ r.Stats().IP.MalformedPacketsReceived.Increment()
+ }
+ return
+ }
+ }
switch res := e.dispatcher.DeliverTransportPacket(r, p, pkt); res {
case stack.TransportPacketHandled:
@@ -778,26 +845,32 @@ func (p *protocol) SetForwarding(v bool) {
}
}
-// calculateMTU calculates the network-layer payload MTU based on the link-layer
-// payload mtu.
-func calculateMTU(mtu uint32) uint32 {
- if mtu > MaxTotalSize {
- mtu = MaxTotalSize
+// calculateNetworkMTU calculates the network-layer payload MTU based on the
+// link-layer payload mtu.
+func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, *tcpip.Error) {
+ if linkMTU < header.IPv4MinimumMTU {
+ return 0, tcpip.ErrInvalidEndpointState
}
- return mtu - header.IPv4MinimumSize
-}
-// calculateFragmentInnerMTU calculates the maximum number of bytes of
-// fragmentable data a fragment can have, based on the link layer mtu and pkt's
-// network header size.
-func calculateFragmentInnerMTU(mtu uint32, pkt *stack.PacketBuffer) uint32 {
- if mtu > MaxTotalSize {
- mtu = MaxTotalSize
+ // As per RFC 791 section 3.1, an IPv4 header cannot exceed 60 bytes in
+ // length:
+ // The maximal internet header is 60 octets, and a typical internet header
+ // is 20 octets, allowing a margin for headers of higher level protocols.
+ if networkHeaderSize > header.IPv4MaximumHeaderSize {
+ return 0, tcpip.ErrMalformedHeader
}
- mtu -= uint32(pkt.NetworkHeader().View().Size())
- // Round the MTU down to align to 8 bytes.
- mtu &^= 7
- return mtu
+
+ networkMTU := linkMTU
+ if networkMTU > MaxTotalSize {
+ networkMTU = MaxTotalSize
+ }
+
+ return networkMTU - uint32(networkHeaderSize), nil
+}
+
+func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32, gso *stack.GSO) bool {
+ payload := pkt.TransportHeader().View().Size() + pkt.Data.Size()
+ return (gso == nil || gso.Type == stack.GSONone) && uint32(payload) > networkMTU
}
// addressToUint32 translates an IPv4 address into its little endian uint32
@@ -836,7 +909,7 @@ func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
ids: ids,
hashIV: hashIV,
defaultTTL: DefaultTTL,
- fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, reassembleTimeout, s.Clock()),
+ fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock()),
}
}
@@ -862,3 +935,322 @@ func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader head
return fragPkt, more
}
+
+// optionAction describes possible actions that may be taken on an option
+// while processing it.
+type optionAction uint8
+
+const (
+ // optionRemove says that the option should not be in the output option set.
+ optionRemove optionAction = iota
+
+ // optionProcess says that the option should be fully processed.
+ optionProcess
+
+ // optionVerify says the option should be checked and passed unchanged.
+ optionVerify
+
+ // optionPass says to pass the output set without checking.
+ optionPass
+)
+
+// optionActions list what to do for each option in a given scenario.
+type optionActions struct {
+ // timestamp controls what to do with a Timestamp option.
+ timestamp optionAction
+
+ // recordroute controls what to do with a Record Route option.
+ recordRoute optionAction
+
+ // unknown controls what to do with an unknown option.
+ unknown optionAction
+}
+
+// optionsUsage specifies the ways options may be operated upon for a given
+// scenario during packet processing.
+type optionsUsage interface {
+ actions() optionActions
+}
+
+// optionUsageReceive implements optionsUsage for received packets.
+type optionUsageReceive struct{}
+
+// actions implements optionsUsage.
+func (*optionUsageReceive) actions() optionActions {
+ return optionActions{
+ timestamp: optionVerify,
+ recordRoute: optionVerify,
+ unknown: optionPass,
+ }
+}
+
+// TODO(gvisor.dev/issue/4586): Add an entry here for forwarding when it
+// is enabled (Process, Process, Pass) and for fragmenting (Process, Process,
+// Pass for frag1, but Remove,Remove,Remove for all other frags).
+
+// optionUsageEcho implements optionsUsage for echo packet processing.
+type optionUsageEcho struct{}
+
+// actions implements optionsUsage.
+func (*optionUsageEcho) actions() optionActions {
+ return optionActions{
+ timestamp: optionProcess,
+ recordRoute: optionProcess,
+ unknown: optionRemove,
+ }
+}
+
+var (
+ errIPv4TimestampOptInvalidLength = errors.New("invalid Timestamp length")
+ errIPv4TimestampOptInvalidPointer = errors.New("invalid Timestamp pointer")
+ errIPv4TimestampOptOverflow = errors.New("overflow in Timestamp")
+ errIPv4TimestampOptInvalidFlags = errors.New("invalid Timestamp flags")
+)
+
+// handleTimestamp does any required processing on a Timestamp option
+// in place.
+func handleTimestamp(tsOpt header.IPv4OptionTimestamp, localAddress tcpip.Address, clock tcpip.Clock, usage optionsUsage) (uint8, error) {
+ flags := tsOpt.Flags()
+ var entrySize uint8
+ switch flags {
+ case header.IPv4OptionTimestampOnlyFlag:
+ entrySize = header.IPv4OptionTimestampSize
+ case
+ header.IPv4OptionTimestampWithIPFlag,
+ header.IPv4OptionTimestampWithPredefinedIPFlag:
+ entrySize = header.IPv4OptionTimestampWithAddrSize
+ default:
+ return header.IPv4OptTSOFLWAndFLGOffset, errIPv4TimestampOptInvalidFlags
+ }
+
+ pointer := tsOpt.Pointer()
+ // To simplify processing below, base further work on the array of timestamps
+ // beyond the header, rather than on the whole option. Also to aid
+ // calculations set 'nextSlot' to be 0 based as in the packet it is 1 based.
+ nextSlot := pointer - (header.IPv4OptionTimestampHdrLength + 1)
+ optLen := tsOpt.Size()
+ dataLength := optLen - header.IPv4OptionTimestampHdrLength
+
+ // In the section below, we verify the pointer, length and overflow counter
+ // fields of the option. The distinction is in which byte you return as being
+ // in error in the ICMP packet. Offsets 1 (length), 2 pointer)
+ // or 3 (overflowed counter).
+ //
+ // The following RFC sections cover this section:
+ //
+ // RFC 791 (page 22):
+ // If there is some room but not enough room for a full timestamp
+ // to be inserted, or the overflow count itself overflows, the
+ // original datagram is considered to be in error and is discarded.
+ // In either case an ICMP parameter problem message may be sent to
+ // the source host [3].
+ //
+ // You can get this situation in two ways. Firstly if the data area is not
+ // a multiple of the entry size or secondly, if the pointer is not at a
+ // multiple of the entry size. The wording of the RFC suggests that
+ // this is not an error until you actually run out of space.
+ if pointer > optLen {
+ // RFC 791 (page 22) says we should switch to using the overflow count.
+ // If the timestamp data area is already full (the pointer exceeds
+ // the length) the datagram is forwarded without inserting the
+ // timestamp, but the overflow count is incremented by one.
+ if flags == header.IPv4OptionTimestampWithPredefinedIPFlag {
+ // By definition we have nothing to do.
+ return 0, nil
+ }
+
+ if tsOpt.IncOverflow() != 0 {
+ return 0, nil
+ }
+ // The overflow count is also full.
+ return header.IPv4OptTSOFLWAndFLGOffset, errIPv4TimestampOptOverflow
+ }
+ if nextSlot+entrySize > dataLength {
+ // The data area isn't full but there isn't room for a new entry.
+ // Either Length or Pointer could be bad.
+ if false {
+ // We must select Pointer for Linux compatibility, even if
+ // only the length is bad.
+ // The Linux code is at (in October 2020)
+ // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L367-L370
+ // if (optptr[2]+3 > optlen) {
+ // pp_ptr = optptr + 2;
+ // goto error;
+ // }
+ // which doesn't distinguish between which of optptr[2] or optlen
+ // is wrong, but just arbitrarily decides on optptr+2.
+ if dataLength%entrySize != 0 {
+ // The Data section size should be a multiple of the expected
+ // timestamp entry size.
+ return header.IPv4OptionLengthOffset, errIPv4TimestampOptInvalidLength
+ }
+ // If the size is OK, the pointer must be corrupted.
+ }
+ return header.IPv4OptTSPointerOffset, errIPv4TimestampOptInvalidPointer
+ }
+
+ if usage.actions().timestamp == optionProcess {
+ tsOpt.UpdateTimestamp(localAddress, clock)
+ }
+ return 0, nil
+}
+
+var (
+ errIPv4RecordRouteOptInvalidLength = errors.New("invalid length in Record Route")
+ errIPv4RecordRouteOptInvalidPointer = errors.New("invalid pointer in Record Route")
+)
+
+// handleRecordRoute checks and processes a Record route option. It is much
+// like the timestamp type 1 option, but without timestamps. The passed in
+// address is stored in the option in the correct spot if possible.
+func handleRecordRoute(rrOpt header.IPv4OptionRecordRoute, localAddress tcpip.Address, usage optionsUsage) (uint8, error) {
+ optlen := rrOpt.Size()
+
+ if optlen < header.IPv4AddressSize+header.IPv4OptionRecordRouteHdrLength {
+ return header.IPv4OptionLengthOffset, errIPv4RecordRouteOptInvalidLength
+ }
+
+ nextSlot := rrOpt.Pointer() - 1 // Pointer is 1 based.
+
+ // RFC 791 page 21 says
+ // If the route data area is already full (the pointer exceeds the
+ // length) the datagram is forwarded without inserting the address
+ // into the recorded route. If there is some room but not enough
+ // room for a full address to be inserted, the original datagram is
+ // considered to be in error and is discarded. In either case an
+ // ICMP parameter problem message may be sent to the source
+ // host.
+ // The use of the words "In either case" suggests that a 'full' RR option
+ // could generate an ICMP at every hop after it fills up. We chose to not
+ // do this (as do most implementations). It is probable that the inclusion
+ // of these words is a copy/paste error from the timestamp option where
+ // there are two failure reasons given.
+ if nextSlot >= optlen {
+ return 0, nil
+ }
+
+ // The data area isn't full but there isn't room for a new entry.
+ // Either Length or Pointer could be bad. We must select Pointer for Linux
+ // compatibility, even if only the length is bad.
+ if nextSlot+header.IPv4AddressSize > optlen {
+ if false {
+ // This is what we would do if we were not being Linux compatible.
+ // Check for bad pointer or length value. Must be a multiple of 4 after
+ // accounting for the 3 byte header and not within that header.
+ // RFC 791, page 20 says:
+ // The pointer is relative to this option, and the
+ // smallest legal value for the pointer is 4.
+ //
+ // A recorded route is composed of a series of internet addresses.
+ // Each internet address is 32 bits or 4 octets.
+ // Linux skips this test so we must too. See Linux code at:
+ // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L338-L341
+ // if (optptr[2]+3 > optlen) {
+ // pp_ptr = optptr + 2;
+ // goto error;
+ // }
+ if (optlen-header.IPv4OptionRecordRouteHdrLength)%header.IPv4AddressSize != 0 {
+ // Length is bad, not on integral number of slots.
+ return header.IPv4OptionLengthOffset, errIPv4RecordRouteOptInvalidLength
+ }
+ // If not length, the fault must be with the pointer.
+ }
+ return header.IPv4OptRRPointerOffset, errIPv4RecordRouteOptInvalidPointer
+ }
+ if usage.actions().recordRoute == optionVerify {
+ return 0, nil
+ }
+ rrOpt.StoreAddress(localAddress)
+ return 0, nil
+}
+
+// processIPOptions parses the IPv4 options and produces a new set of options
+// suitable for use in the next step of packet processing as informed by usage.
+// The original will not be touched.
+//
+// Returns
+// - The location of an error if there was one (or 0 if no error)
+// - If there is an error, information as to what it was was.
+// - The replacement option set.
+func processIPOptions(r *stack.Route, orig header.IPv4Options, usage optionsUsage) (uint8, header.IPv4Options, error) {
+
+ opts := header.IPv4Options(orig)
+ optIter := opts.MakeIterator()
+
+ // Each option other than NOP must only appear (RFC 791 section 3.1, at the
+ // definition of every type). Keep track of each of the possible types in
+ // the 8 bit 'type' field.
+ var seenOptions [math.MaxUint8 + 1]bool
+
+ // TODO(gvisor.dev/issue/4586):
+ // This will need tweaking when we start really forwarding packets
+ // as we may need to get two addresses, for rx and tx interfaces.
+ // We will also have to take usage into account.
+ prefixedAddress, err := r.Stack().GetMainNICAddress(r.NICID(), ProtocolNumber)
+ localAddress := prefixedAddress.Address
+ if err != nil {
+ if r.IsInboundBroadcast() || header.IsV4MulticastAddress(r.LocalAddress) {
+ return 0 /* errCursor */, nil, header.ErrIPv4OptionAddress
+ }
+ localAddress = r.LocalAddress
+ }
+
+ for {
+ option, done, err := optIter.Next()
+ if done || err != nil {
+ return optIter.ErrCursor, optIter.Finalize(), err
+ }
+ optType := option.Type()
+ if optType == header.IPv4OptionNOPType {
+ optIter.PushNOPOrEnd(optType)
+ continue
+ }
+ if optType == header.IPv4OptionListEndType {
+ optIter.PushNOPOrEnd(optType)
+ return 0 /* errCursor */, optIter.Finalize(), nil /* err */
+ }
+
+ // check for repeating options (multiple NOPs are OK)
+ if seenOptions[optType] {
+ return optIter.ErrCursor, nil, header.ErrIPv4OptDuplicate
+ }
+ seenOptions[optType] = true
+
+ optLen := int(option.Size())
+ switch option := option.(type) {
+ case *header.IPv4OptionTimestamp:
+ r.Stats().IP.OptionTSReceived.Increment()
+ if usage.actions().timestamp != optionRemove {
+ clock := r.Stack().Clock()
+ newBuffer := optIter.RemainingBuffer()[:len(*option)]
+ _ = copy(newBuffer, option.Contents())
+ offset, err := handleTimestamp(header.IPv4OptionTimestamp(newBuffer), localAddress, clock, usage)
+ if err != nil {
+ return optIter.ErrCursor + offset, nil, err
+ }
+ optIter.ConsumeBuffer(optLen)
+ }
+
+ case *header.IPv4OptionRecordRoute:
+ r.Stats().IP.OptionRRReceived.Increment()
+ if usage.actions().recordRoute != optionRemove {
+ newBuffer := optIter.RemainingBuffer()[:len(*option)]
+ _ = copy(newBuffer, option.Contents())
+ offset, err := handleRecordRoute(header.IPv4OptionRecordRoute(newBuffer), localAddress, usage)
+ if err != nil {
+ return optIter.ErrCursor + offset, nil, err
+ }
+ optIter.ConsumeBuffer(optLen)
+ }
+
+ default:
+ r.Stats().IP.OptionUnknownReceived.Increment()
+ if usage.actions().unknown == optionPass {
+ newBuffer := optIter.RemainingBuffer()[:optLen]
+ // Arguments already heavily checked.. ignore result.
+ _ = copy(newBuffer, option.Contents())
+ optIter.ConsumeBuffer(optLen)
+ }
+ }
+ }
+}
diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index fee11bb38..61672a5ff 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -21,11 +21,13 @@ import (
"math"
"net"
"testing"
+ "time"
"github.com/google/go-cmp/cmp"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/checker"
+ "gvisor.dev/gvisor/pkg/tcpip/faketime"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/link/channel"
"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
@@ -39,7 +41,10 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
-const extraHeaderReserve = 50
+const (
+ extraHeaderReserve = 50
+ defaultMTU = 65536
+)
func TestExcludeBroadcast(t *testing.T) {
s := stack.New(stack.Options{
@@ -47,7 +52,6 @@ func TestExcludeBroadcast(t *testing.T) {
TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
- const defaultMTU = 65536
ep := stack.LinkEndpoint(channel.New(256, defaultMTU, ""))
if testing.Verbose() {
ep = sniffer.New(ep)
@@ -103,7 +107,6 @@ func TestExcludeBroadcast(t *testing.T) {
// checks the response.
func TestIPv4Sanity(t *testing.T) {
const (
- defaultMTU = header.IPv6MinimumMTU
ttl = 255
nicID = 1
randomSequence = 123
@@ -118,27 +121,29 @@ func TestIPv4Sanity(t *testing.T) {
)
tests := []struct {
- name string
- headerLength uint8 // value of 0 means "use correct size"
- badHeaderChecksum bool
- maxTotalLength uint16
- transportProtocol uint8
- TTL uint8
- shouldFail bool
- expectICMP bool
- ICMPType header.ICMPv4Type
- ICMPCode header.ICMPv4Code
- options []byte
+ name string
+ headerLength uint8 // value of 0 means "use correct size"
+ badHeaderChecksum bool
+ maxTotalLength uint16
+ transportProtocol uint8
+ TTL uint8
+ options []byte
+ replyOptions []byte // if succeeds, reply should look like this
+ shouldFail bool
+ expectErrorICMP bool
+ ICMPType header.ICMPv4Type
+ ICMPCode header.ICMPv4Code
+ paramProblemPointer uint8
}{
{
- name: "valid",
- maxTotalLength: defaultMTU,
+ name: "valid no options",
+ maxTotalLength: ipv4.MaxTotalSize,
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: ttl,
},
{
name: "bad header checksum",
- maxTotalLength: defaultMTU,
+ maxTotalLength: ipv4.MaxTotalSize,
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: ttl,
badHeaderChecksum: true,
@@ -157,47 +162,47 @@ func TestIPv4Sanity(t *testing.T) {
// received with TTL less than 2.
{
name: "zero TTL",
- maxTotalLength: defaultMTU,
+ maxTotalLength: ipv4.MaxTotalSize,
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: 0,
- shouldFail: false,
},
{
name: "one TTL",
- maxTotalLength: defaultMTU,
+ maxTotalLength: ipv4.MaxTotalSize,
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: 1,
- shouldFail: false,
},
{
name: "End options",
- maxTotalLength: defaultMTU,
+ maxTotalLength: ipv4.MaxTotalSize,
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: ttl,
options: []byte{0, 0, 0, 0},
+ replyOptions: []byte{0, 0, 0, 0},
},
{
name: "NOP options",
- maxTotalLength: defaultMTU,
+ maxTotalLength: ipv4.MaxTotalSize,
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: ttl,
options: []byte{1, 1, 1, 1},
+ replyOptions: []byte{1, 1, 1, 1},
},
{
name: "NOP and End options",
- maxTotalLength: defaultMTU,
+ maxTotalLength: ipv4.MaxTotalSize,
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: ttl,
options: []byte{1, 1, 0, 0},
+ replyOptions: []byte{1, 1, 0, 0},
},
{
name: "bad header length",
headerLength: header.IPv4MinimumSize - 1,
- maxTotalLength: defaultMTU,
+ maxTotalLength: ipv4.MaxTotalSize,
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: ttl,
shouldFail: true,
- expectICMP: false,
},
{
name: "bad total length (0)",
@@ -205,7 +210,6 @@ func TestIPv4Sanity(t *testing.T) {
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: ttl,
shouldFail: true,
- expectICMP: false,
},
{
name: "bad total length (ip - 1)",
@@ -213,7 +217,6 @@ func TestIPv4Sanity(t *testing.T) {
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: ttl,
shouldFail: true,
- expectICMP: false,
},
{
name: "bad total length (ip + icmp - 1)",
@@ -221,28 +224,361 @@ func TestIPv4Sanity(t *testing.T) {
transportProtocol: uint8(header.ICMPv4ProtocolNumber),
TTL: ttl,
shouldFail: true,
- expectICMP: false,
},
{
name: "bad protocol",
- maxTotalLength: defaultMTU,
+ maxTotalLength: ipv4.MaxTotalSize,
transportProtocol: 99,
TTL: ttl,
shouldFail: true,
- expectICMP: true,
+ expectErrorICMP: true,
ICMPType: header.ICMPv4DstUnreachable,
ICMPCode: header.ICMPv4ProtoUnreachable,
},
+ {
+ name: "timestamp option overflow",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 68, 12, 13, 0x11,
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ },
+ replyOptions: []byte{
+ 68, 12, 13, 0x21,
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ },
+ },
+ {
+ name: "timestamp option overflow full",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 68, 12, 13, 0xF1,
+ // ^ Counter full (15/0xF)
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ },
+ shouldFail: true,
+ expectErrorICMP: true,
+ ICMPType: header.ICMPv4ParamProblem,
+ ICMPCode: header.ICMPv4UnusedCode,
+ paramProblemPointer: header.IPv4MinimumSize + 3,
+ replyOptions: []byte{},
+ },
+ {
+ name: "unknown option",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{10, 4, 9, 0},
+ // ^^
+ // The unknown option should be stripped out of the reply.
+ replyOptions: []byte{},
+ },
+ {
+ name: "bad option - length 0",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 68, 0, 9, 0,
+ // ^
+ 1, 2, 3, 4,
+ },
+ shouldFail: true,
+ },
+ {
+ name: "bad option - length big",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 68, 9, 9, 0,
+ // ^
+ // There are only 8 bytes allocated to options so 9 bytes of timestamp
+ // space is not possible. (Second byte)
+ 1, 2, 3, 4,
+ },
+ shouldFail: true,
+ },
+ {
+ // This tests for some linux compatible behaviour.
+ // The ICMP pointer returned is 22 for Linux but the
+ // error is actually in spot 21.
+ name: "bad option - length bad",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ // Timestamps are in multiples of 4 or 8 but never 7.
+ // The option space should be padded out.
+ options: []byte{
+ 68, 7, 5, 0,
+ // ^ ^ Linux points here which is wrong.
+ // | Not a multiple of 4
+ 1, 2, 3,
+ },
+ shouldFail: true,
+ expectErrorICMP: true,
+ ICMPType: header.ICMPv4ParamProblem,
+ ICMPCode: header.ICMPv4UnusedCode,
+ paramProblemPointer: header.IPv4MinimumSize + 2,
+ },
+ {
+ name: "multiple type 0 with room",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 68, 24, 21, 0x00,
+ 1, 2, 3, 4,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ 0, 0, 0, 0,
+ },
+ replyOptions: []byte{
+ 68, 24, 25, 0x00,
+ 1, 2, 3, 4,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ 0x00, 0xad, 0x1c, 0x40, // time we expect from fakeclock
+ },
+ },
+ {
+ // The timestamp area is full so add to the overflow count.
+ name: "multiple type 1 timestamps",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 68, 20, 21, 0x11,
+ // ^
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ 192, 168, 1, 13,
+ 5, 6, 7, 8,
+ },
+ // Overflow count is the top nibble of the 4th byte.
+ replyOptions: []byte{
+ 68, 20, 21, 0x21,
+ // ^
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ 192, 168, 1, 13,
+ 5, 6, 7, 8,
+ },
+ },
+ {
+ name: "multiple type 1 timestamps with room",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 68, 28, 21, 0x01,
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ 192, 168, 1, 13,
+ 5, 6, 7, 8,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ replyOptions: []byte{
+ 68, 28, 29, 0x01,
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ 192, 168, 1, 13,
+ 5, 6, 7, 8,
+ 192, 168, 1, 58, // New IP Address.
+ 0x00, 0xad, 0x1c, 0x40, // time we expect from fakeclock
+ },
+ },
+ {
+ // Needs 8 bytes for a type 1 timestamp but there are only 4 free.
+ name: "bad timer element alignment",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 68, 20, 17, 0x01,
+ // ^^ ^^ 20 byte area, next free spot at 17.
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ shouldFail: true,
+ expectErrorICMP: true,
+ ICMPType: header.ICMPv4ParamProblem,
+ ICMPCode: header.ICMPv4UnusedCode,
+ paramProblemPointer: header.IPv4MinimumSize + 2,
+ },
+ // End of option list with illegal option after it, which should be ignored.
+ {
+ name: "end of options list",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 68, 12, 13, 0x11,
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ 0, 10, 3, 99,
+ },
+ replyOptions: []byte{
+ 68, 12, 13, 0x21,
+ 192, 168, 1, 12,
+ 1, 2, 3, 4,
+ 0, 0, 0, 0, // 3 bytes unknown option
+ }, // ^ End of options hides following bytes.
+ },
+ {
+ // Timestamp with a size too small.
+ name: "timestamp truncated",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{68, 1, 0, 0},
+ // ^ Smallest possible is 8.
+ shouldFail: true,
+ },
+ {
+ name: "single record route with room",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 7, 7, 4, // 3 byte header
+ 0, 0, 0, 0,
+ 0,
+ },
+ replyOptions: []byte{
+ 7, 7, 8, // 3 byte header
+ 192, 168, 1, 58, // New IP Address.
+ 0, // padding to multiple of 4 bytes.
+ },
+ },
+ {
+ name: "multiple record route with room",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 7, 23, 20, // 3 byte header
+ 1, 2, 3, 4,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ 0, 0, 0, 0,
+ 0,
+ },
+ replyOptions: []byte{
+ 7, 23, 24,
+ 1, 2, 3, 4,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ 192, 168, 1, 58, // New IP Address.
+ 0, // padding to multiple of 4 bytes.
+ },
+ },
+ {
+ name: "single record route with no room",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 7, 7, 8, // 3 byte header
+ 1, 2, 3, 4,
+ 0,
+ },
+ replyOptions: []byte{
+ 7, 7, 8, // 3 byte header
+ 1, 2, 3, 4,
+ 0, // padding to multiple of 4 bytes.
+ },
+ },
+ {
+ // Unlike timestamp, this should just succeed.
+ name: "multiple record route with no room",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 7, 23, 24, // 3 byte header
+ 1, 2, 3, 4,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ 17, 18, 19, 20,
+ 0,
+ },
+ replyOptions: []byte{
+ 7, 23, 24,
+ 1, 2, 3, 4,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ 17, 18, 19, 20,
+ 0, // padding to multiple of 4 bytes.
+ },
+ },
+ {
+ // Confirm linux bug for bug compatibility.
+ // Linux returns slot 22 but the error is in slot 21.
+ name: "multiple record route with not enough room",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 7, 8, 8, // 3 byte header
+ // ^ ^ Linux points here. We must too.
+ // | Not enough room. 1 byte free, need 4.
+ 1, 2, 3, 4,
+ 0,
+ },
+ shouldFail: true,
+ expectErrorICMP: true,
+ ICMPType: header.ICMPv4ParamProblem,
+ ICMPCode: header.ICMPv4UnusedCode,
+ paramProblemPointer: header.IPv4MinimumSize + 2,
+ replyOptions: []byte{},
+ },
+ {
+ name: "duplicate record route",
+ maxTotalLength: ipv4.MaxTotalSize,
+ transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+ TTL: ttl,
+ options: []byte{
+ 7, 7, 8, // 3 byte header
+ 1, 2, 3, 4,
+ 7, 7, 8, // 3 byte header
+ 1, 2, 3, 4,
+ 0, 0, // pad
+ },
+ shouldFail: true,
+ expectErrorICMP: true,
+ ICMPType: header.ICMPv4ParamProblem,
+ ICMPCode: header.ICMPv4UnusedCode,
+ paramProblemPointer: header.IPv4MinimumSize + 7,
+ replyOptions: []byte{},
+ },
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
+ clock := faketime.NewManualClock()
s := stack.New(stack.Options{
NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol4},
+ Clock: clock,
})
// We expect at most a single packet in response to our ICMP Echo Request.
- e := channel.New(1, defaultMTU, "")
+ e := channel.New(1, ipv4.MaxTotalSize, "")
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
}
@@ -250,6 +586,9 @@ func TestIPv4Sanity(t *testing.T) {
if err := s.AddProtocolAddress(nicID, ipv4ProtoAddr); err != nil {
t.Fatalf("AddProtocolAddress(%d, %#v): %s", nicID, ipv4ProtoAddr, err)
}
+ // Advance the clock by some unimportant amount to make
+ // sure it's all set up.
+ clock.Advance(time.Millisecond * 0x10203040)
// Default routes for IPv4 so ICMP can find a route to the remote
// node when attempting to send the ICMP Echo Reply.
@@ -312,14 +651,20 @@ func TestIPv4Sanity(t *testing.T) {
reply, ok := e.Read()
if !ok {
if test.shouldFail {
- if test.expectICMP {
- t.Fatal("expected ICMP error response missing")
+ if test.expectErrorICMP {
+ t.Fatalf("ICMP error response (type %d, code %d) missing", test.ICMPType, test.ICMPCode)
}
return // Expected silent failure.
}
t.Fatal("expected ICMP echo reply missing")
}
+ // We didn't expect a packet. Register our surprise but carry on to
+ // provide more information about what we got.
+ if test.shouldFail && !test.expectErrorICMP {
+ t.Error("unexpected packet response")
+ }
+
// Check the route that brought the packet to us.
if reply.Route.LocalAddress != ipv4Addr.Address {
t.Errorf("got pkt.Route.LocalAddress = %s, want = %s", reply.Route.LocalAddress, ipv4Addr.Address)
@@ -328,57 +673,90 @@ func TestIPv4Sanity(t *testing.T) {
t.Errorf("got pkt.Route.RemoteAddress = %s, want = %s", reply.Route.RemoteAddress, remoteIPv4Addr)
}
- // Make sure it's all in one buffer.
- vv := buffer.NewVectorisedView(reply.Pkt.Size(), reply.Pkt.Views())
- replyIPHeader := header.IPv4(vv.ToView())
+ // Make sure it's all in one buffer for checker.
+ replyIPHeader := header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader()))
- // At this stage we only know it's an IP header so verify that much.
+ // At this stage we only know it's probably an IP+ICMP header so verify
+ // that much.
checker.IPv4(t, replyIPHeader,
checker.SrcAddr(ipv4Addr.Address),
checker.DstAddr(remoteIPv4Addr),
+ checker.ICMPv4(
+ checker.ICMPv4Checksum(),
+ ),
)
- // All expected responses are ICMP packets.
- if got, want := replyIPHeader.Protocol(), uint8(header.ICMPv4ProtocolNumber); got != want {
- t.Fatalf("not ICMP response, got protocol %d, want = %d", got, want)
+ // Don't proceed any further if the checker found problems.
+ if t.Failed() {
+ t.FailNow()
}
- replyICMPHeader := header.ICMPv4(replyIPHeader.Payload())
- // Sanity check the response.
+ // OK it's ICMP. We can safely look at the type now.
+ replyICMPHeader := header.ICMPv4(replyIPHeader.Payload())
switch replyICMPHeader.Type() {
- case header.ICMPv4DstUnreachable:
+ case header.ICMPv4ParamProblem:
+ if !test.shouldFail {
+ t.Fatalf("got Parameter Problem with pointer %d, wanted Echo Reply", replyICMPHeader.Pointer())
+ }
+ if !test.expectErrorICMP {
+ t.Fatalf("got Parameter Problem with pointer %d, wanted no response", replyICMPHeader.Pointer())
+ }
checker.IPv4(t, replyIPHeader,
checker.IPFullLength(uint16(header.IPv4MinimumSize+header.ICMPv4MinimumSize+requestPkt.Size())),
checker.IPv4HeaderLength(header.IPv4MinimumSize),
checker.ICMPv4(
+ checker.ICMPv4Type(test.ICMPType),
checker.ICMPv4Code(test.ICMPCode),
- checker.ICMPv4Checksum(),
+ checker.ICMPv4Pointer(test.paramProblemPointer),
checker.ICMPv4Payload([]byte(hdr.View())),
),
)
- if !test.shouldFail || !test.expectICMP {
- t.Fatalf("unexpected packet rejection, got ICMP error packet type %d, code %d",
+ return
+ case header.ICMPv4DstUnreachable:
+ if !test.shouldFail {
+ t.Fatalf("got ICMP error packet type %d, code %d, wanted Echo Reply",
header.ICMPv4DstUnreachable, replyICMPHeader.Code())
}
+ if !test.expectErrorICMP {
+ t.Fatalf("got ICMP error packet type %d, code %d, wanted no response",
+ header.ICMPv4DstUnreachable, replyICMPHeader.Code())
+ }
+ checker.IPv4(t, replyIPHeader,
+ checker.IPFullLength(uint16(header.IPv4MinimumSize+header.ICMPv4MinimumSize+requestPkt.Size())),
+ checker.IPv4HeaderLength(header.IPv4MinimumSize),
+ checker.ICMPv4(
+ checker.ICMPv4Type(test.ICMPType),
+ checker.ICMPv4Code(test.ICMPCode),
+ checker.ICMPv4Payload([]byte(hdr.View())),
+ ),
+ )
return
case header.ICMPv4EchoReply:
+ if test.shouldFail {
+ if !test.expectErrorICMP {
+ t.Error("got Echo Reply packet, want no response")
+ } else {
+ t.Errorf("got Echo Reply, want ICMP error type %d, code %d", test.ICMPType, test.ICMPCode)
+ }
+ }
+ // If the IP options change size then the packet will change size, so
+ // some IP header fields will need to be adjusted for the checks.
+ sizeChange := len(test.replyOptions) - len(test.options)
+
checker.IPv4(t, replyIPHeader,
- checker.IPv4HeaderLength(ipHeaderLength),
- checker.IPv4Options(test.options),
- checker.IPFullLength(uint16(requestPkt.Size())),
+ checker.IPv4HeaderLength(ipHeaderLength+sizeChange),
+ checker.IPv4Options(test.replyOptions),
+ checker.IPFullLength(uint16(requestPkt.Size()+sizeChange)),
checker.ICMPv4(
+ checker.ICMPv4Checksum(),
checker.ICMPv4Code(header.ICMPv4UnusedCode),
checker.ICMPv4Seq(randomSequence),
checker.ICMPv4Ident(randomIdent),
- checker.ICMPv4Checksum(),
),
)
- if test.shouldFail {
- t.Fatalf("unexpected Echo Reply packet\n")
- }
default:
- t.Fatalf("unexpected ICMP response, got type %d, want = %d or %d",
- replyICMPHeader.Type(), header.ICMPv4EchoReply, header.ICMPv4DstUnreachable)
+ t.Fatalf("unexpected ICMP response, got type %d, want = %d, %d or %d",
+ replyICMPHeader.Type(), header.ICMPv4EchoReply, header.ICMPv4DstUnreachable, header.ICMPv4ParamProblem)
}
})
}
@@ -462,7 +840,7 @@ var fragmentationTests = []struct {
wantFragments []fragmentInfo
}{
{
- description: "No Fragmentation",
+ description: "No fragmentation",
mtu: 1280,
gso: nil,
transportHeaderLength: 0,
@@ -483,6 +861,30 @@ var fragmentationTests = []struct {
},
},
{
+ description: "Fragmented with the minimum mtu",
+ mtu: header.IPv4MinimumMTU,
+ gso: nil,
+ transportHeaderLength: 0,
+ payloadSize: 100,
+ wantFragments: []fragmentInfo{
+ {offset: 0, payloadSize: 48, more: true},
+ {offset: 48, payloadSize: 48, more: true},
+ {offset: 96, payloadSize: 4, more: false},
+ },
+ },
+ {
+ description: "Fragmented with mtu not a multiple of 8",
+ mtu: header.IPv4MinimumMTU + 1,
+ gso: nil,
+ transportHeaderLength: 0,
+ payloadSize: 100,
+ wantFragments: []fragmentInfo{
+ {offset: 0, payloadSize: 48, more: true},
+ {offset: 48, payloadSize: 48, more: true},
+ {offset: 96, payloadSize: 4, more: false},
+ },
+ },
+ {
description: "No fragmentation with big header",
mtu: 2000,
gso: nil,
@@ -647,43 +1049,50 @@ func TestFragmentationWritePackets(t *testing.T) {
}
}
-// TestFragmentationErrors checks that errors are returned from write packet
+// TestFragmentationErrors checks that errors are returned from WritePacket
// correctly.
func TestFragmentationErrors(t *testing.T) {
const ttl = 42
- expectedError := tcpip.ErrAborted
- fragTests := []struct {
+ tests := []struct {
description string
mtu uint32
transportHeaderLength int
payloadSize int
allowPackets int
- fragmentCount int
+ outgoingErrors int
+ mockError *tcpip.Error
+ wantError *tcpip.Error
}{
{
description: "No frag",
mtu: 2000,
- transportHeaderLength: 0,
payloadSize: 1000,
+ transportHeaderLength: 0,
allowPackets: 0,
- fragmentCount: 1,
+ outgoingErrors: 1,
+ mockError: tcpip.ErrAborted,
+ wantError: tcpip.ErrAborted,
},
{
description: "Error on first frag",
mtu: 500,
- transportHeaderLength: 0,
payloadSize: 1000,
+ transportHeaderLength: 0,
allowPackets: 0,
- fragmentCount: 3,
+ outgoingErrors: 3,
+ mockError: tcpip.ErrAborted,
+ wantError: tcpip.ErrAborted,
},
{
description: "Error on second frag",
mtu: 500,
- transportHeaderLength: 0,
payloadSize: 1000,
+ transportHeaderLength: 0,
allowPackets: 1,
- fragmentCount: 3,
+ outgoingErrors: 2,
+ mockError: tcpip.ErrAborted,
+ wantError: tcpip.ErrAborted,
},
{
description: "Error on first frag MTU smaller than header",
@@ -691,28 +1100,40 @@ func TestFragmentationErrors(t *testing.T) {
transportHeaderLength: 1000,
payloadSize: 500,
allowPackets: 0,
- fragmentCount: 4,
+ outgoingErrors: 4,
+ mockError: tcpip.ErrAborted,
+ wantError: tcpip.ErrAborted,
+ },
+ {
+ description: "Error when MTU is smaller than IPv4 minimum MTU",
+ mtu: header.IPv4MinimumMTU - 1,
+ transportHeaderLength: 0,
+ payloadSize: 500,
+ allowPackets: 0,
+ outgoingErrors: 1,
+ mockError: nil,
+ wantError: tcpip.ErrInvalidEndpointState,
},
}
- for _, ft := range fragTests {
+ for _, ft := range tests {
t.Run(ft.description, func(t *testing.T) {
- ep := testutil.NewMockLinkEndpoint(ft.mtu, expectedError, ft.allowPackets)
- r := buildRoute(t, ep)
pkt := testutil.MakeRandPkt(ft.transportHeaderLength, extraHeaderReserve+header.IPv4MinimumSize, []int{ft.payloadSize}, header.IPv4ProtocolNumber)
+ ep := testutil.NewMockLinkEndpoint(ft.mtu, ft.mockError, ft.allowPackets)
+ r := buildRoute(t, ep)
err := r.WritePacket(&stack.GSO{}, stack.NetworkHeaderParams{
Protocol: tcp.ProtocolNumber,
TTL: ttl,
TOS: stack.DefaultTOS,
}, pkt)
- if err != expectedError {
- t.Errorf("got WritePacket(_, _, _) = %s, want = %s", err, expectedError)
+ if err != ft.wantError {
+ t.Errorf("got WritePacket(_, _, _) = %s, want = %s", err, ft.wantError)
}
- if got, want := len(ep.WrittenPackets), int(r.Stats().IP.PacketsSent.Value()); err != nil && got != want {
- t.Errorf("got len(ep.WrittenPackets) = %d, want = %d", got, want)
+ if got := int(r.Stats().IP.PacketsSent.Value()); got != ft.allowPackets {
+ t.Errorf("got r.Stats().IP.PacketsSent.Value() = %d, want = %d", got, ft.allowPackets)
}
- if got, want := int(r.Stats().IP.OutgoingPacketErrors.Value()), ft.fragmentCount-ft.allowPackets; got != want {
- t.Errorf("got r.Stats().IP.OutgoingPacketErrors.Value() = %d, want = %d", got, want)
+ if got := int(r.Stats().IP.OutgoingPacketErrors.Value()); got != ft.outgoingErrors {
+ t.Errorf("got r.Stats().IP.OutgoingPacketErrors.Value() = %d, want = %d", got, ft.outgoingErrors)
}
})
}
@@ -744,7 +1165,6 @@ func TestInvalidFragments(t *testing.T) {
autoChecksum bool // if true, the Checksum field will be overwritten.
}
- // These packets have both IHL and TotalLength set to 0.
tests := []struct {
name string
fragments []fragmentData
@@ -984,7 +1404,6 @@ func TestInvalidFragments(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
-
s := stack.New(stack.Options{
NetworkProtocols: []stack.NetworkProtocolFactory{
ipv4.NewProtocol,
@@ -1027,6 +1446,259 @@ func TestInvalidFragments(t *testing.T) {
}
}
+func TestFragmentReassemblyTimeout(t *testing.T) {
+ const (
+ nicID = 1
+ linkAddr = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e")
+ addr1 = "\x0a\x00\x00\x01"
+ addr2 = "\x0a\x00\x00\x02"
+ tos = 0
+ ident = 1
+ ttl = 48
+ protocol = 99
+ data = "TEST_FRAGMENT_REASSEMBLY_TIMEOUT"
+ )
+
+ type fragmentData struct {
+ ipv4fields header.IPv4Fields
+ payload []byte
+ }
+
+ tests := []struct {
+ name string
+ fragments []fragmentData
+ expectICMP bool
+ }{
+ {
+ name: "first fragment only",
+ fragments: []fragmentData{
+ {
+ ipv4fields: header.IPv4Fields{
+ IHL: header.IPv4MinimumSize,
+ TOS: tos,
+ TotalLength: header.IPv4MinimumSize + 16,
+ ID: ident,
+ Flags: header.IPv4FlagMoreFragments,
+ FragmentOffset: 0,
+ TTL: ttl,
+ Protocol: protocol,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ payload: []byte(data)[:16],
+ },
+ },
+ expectICMP: true,
+ },
+ {
+ name: "two first fragments",
+ fragments: []fragmentData{
+ {
+ ipv4fields: header.IPv4Fields{
+ IHL: header.IPv4MinimumSize,
+ TOS: tos,
+ TotalLength: header.IPv4MinimumSize + 16,
+ ID: ident,
+ Flags: header.IPv4FlagMoreFragments,
+ FragmentOffset: 0,
+ TTL: ttl,
+ Protocol: protocol,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ payload: []byte(data)[:16],
+ },
+ {
+ ipv4fields: header.IPv4Fields{
+ IHL: header.IPv4MinimumSize,
+ TOS: tos,
+ TotalLength: header.IPv4MinimumSize + 16,
+ ID: ident,
+ Flags: header.IPv4FlagMoreFragments,
+ FragmentOffset: 0,
+ TTL: ttl,
+ Protocol: protocol,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ payload: []byte(data)[:16],
+ },
+ },
+ expectICMP: true,
+ },
+ {
+ name: "second fragment only",
+ fragments: []fragmentData{
+ {
+ ipv4fields: header.IPv4Fields{
+ IHL: header.IPv4MinimumSize,
+ TOS: tos,
+ TotalLength: uint16(header.IPv4MinimumSize + len(data) - 16),
+ ID: ident,
+ Flags: 0,
+ FragmentOffset: 8,
+ TTL: ttl,
+ Protocol: protocol,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ payload: []byte(data)[16:],
+ },
+ },
+ expectICMP: false,
+ },
+ {
+ name: "two fragments with a gap",
+ fragments: []fragmentData{
+ {
+ ipv4fields: header.IPv4Fields{
+ IHL: header.IPv4MinimumSize,
+ TOS: tos,
+ TotalLength: header.IPv4MinimumSize + 8,
+ ID: ident,
+ Flags: header.IPv4FlagMoreFragments,
+ FragmentOffset: 0,
+ TTL: ttl,
+ Protocol: protocol,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ payload: []byte(data)[:8],
+ },
+ {
+ ipv4fields: header.IPv4Fields{
+ IHL: header.IPv4MinimumSize,
+ TOS: tos,
+ TotalLength: uint16(header.IPv4MinimumSize + len(data) - 16),
+ ID: ident,
+ Flags: 0,
+ FragmentOffset: 16,
+ TTL: ttl,
+ Protocol: protocol,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ payload: []byte(data)[16:],
+ },
+ },
+ expectICMP: true,
+ },
+ {
+ name: "two fragments with a gap in reverse order",
+ fragments: []fragmentData{
+ {
+ ipv4fields: header.IPv4Fields{
+ IHL: header.IPv4MinimumSize,
+ TOS: tos,
+ TotalLength: uint16(header.IPv4MinimumSize + len(data) - 16),
+ ID: ident,
+ Flags: 0,
+ FragmentOffset: 16,
+ TTL: ttl,
+ Protocol: protocol,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ payload: []byte(data)[16:],
+ },
+ {
+ ipv4fields: header.IPv4Fields{
+ IHL: header.IPv4MinimumSize,
+ TOS: tos,
+ TotalLength: header.IPv4MinimumSize + 8,
+ ID: ident,
+ Flags: header.IPv4FlagMoreFragments,
+ FragmentOffset: 0,
+ TTL: ttl,
+ Protocol: protocol,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ payload: []byte(data)[:8],
+ },
+ },
+ expectICMP: true,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ clock := faketime.NewManualClock()
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{
+ ipv4.NewProtocol,
+ },
+ Clock: clock,
+ })
+ e := channel.New(1, 1500, linkAddr)
+ if err := s.CreateNIC(nicID, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+ }
+ if err := s.AddAddress(nicID, ipv4.ProtocolNumber, addr2); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv4ProtocolNumber, addr2, err)
+ }
+ s.SetRouteTable([]tcpip.Route{{
+ Destination: header.IPv4EmptySubnet,
+ NIC: nicID,
+ }})
+
+ var firstFragmentSent buffer.View
+ for _, f := range test.fragments {
+ pktSize := header.IPv4MinimumSize
+ hdr := buffer.NewPrependable(pktSize)
+
+ ip := header.IPv4(hdr.Prepend(pktSize))
+ ip.Encode(&f.ipv4fields)
+
+ ip.SetChecksum(0)
+ ip.SetChecksum(^ip.CalculateChecksum())
+
+ vv := hdr.View().ToVectorisedView()
+ vv.AppendView(f.payload)
+
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: vv,
+ })
+
+ if firstFragmentSent == nil && ip.FragmentOffset() == 0 {
+ firstFragmentSent = stack.PayloadSince(pkt.NetworkHeader())
+ }
+
+ e.InjectInbound(header.IPv4ProtocolNumber, pkt)
+ }
+
+ clock.Advance(ipv4.ReassembleTimeout)
+
+ reply, ok := e.Read()
+ if !test.expectICMP {
+ if ok {
+ t.Fatalf("unexpected ICMP error message received: %#v", reply)
+ }
+ return
+ }
+ if !ok {
+ t.Fatal("expected ICMP error message missing")
+ }
+ if firstFragmentSent == nil {
+ t.Fatalf("unexpected ICMP error message received: %#v", reply)
+ }
+
+ checker.IPv4(t, stack.PayloadSince(reply.Pkt.NetworkHeader()),
+ checker.SrcAddr(addr2),
+ checker.DstAddr(addr1),
+ checker.IPFullLength(uint16(header.IPv4MinimumSize+header.ICMPv4MinimumSize+firstFragmentSent.Size())),
+ checker.IPv4HeaderLength(header.IPv4MinimumSize),
+ checker.ICMPv4(
+ checker.ICMPv4Type(header.ICMPv4TimeExceeded),
+ checker.ICMPv4Code(header.ICMPv4ReassemblyTimeout),
+ checker.ICMPv4Checksum(),
+ checker.ICMPv4Payload([]byte(firstFragmentSent)),
+ ),
+ )
+ })
+ }
+}
+
// TestReceiveFragments feeds fragments in through the incoming packet path to
// test reassembly
func TestReceiveFragments(t *testing.T) {
@@ -1577,7 +2249,7 @@ func TestWriteStats(t *testing.T) {
t.Run(writer.name, func(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- ep := testutil.NewMockLinkEndpoint(header.IPv4MinimumSize+header.UDPMinimumSize, tcpip.ErrInvalidEndpointState, test.allowPackets)
+ ep := testutil.NewMockLinkEndpoint(header.IPv4MinimumMTU, tcpip.ErrInvalidEndpointState, test.allowPackets)
rt := buildRoute(t, ep)
var pkts stack.PacketBufferList
@@ -1783,7 +2455,7 @@ func TestPacketQueing(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- e := channel.New(1, header.IPv6MinimumMTU, host1NICLinkAddr)
+ e := channel.New(1, defaultMTU, host1NICLinkAddr)
e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
s := stack.New(stack.Options{
NetworkProtocols: []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol},
diff --git a/pkg/tcpip/network/ipv6/BUILD b/pkg/tcpip/network/ipv6/BUILD
index a30437f02..0ac24a6fb 100644
--- a/pkg/tcpip/network/ipv6/BUILD
+++ b/pkg/tcpip/network/ipv6/BUILD
@@ -36,6 +36,7 @@ go_test(
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/checker",
+ "//pkg/tcpip/faketime",
"//pkg/tcpip/header",
"//pkg/tcpip/link/channel",
"//pkg/tcpip/link/sniffer",
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index ead6bedcb..3c15e41a7 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -170,8 +170,11 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
return
}
pkt.Data.TrimFront(header.ICMPv6PacketTooBigMinimumSize)
- mtu := header.ICMPv6(hdr).MTU()
- e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), pkt)
+ networkMTU, err := calculateNetworkMTU(header.ICMPv6(hdr).MTU(), header.IPv6MinimumSize)
+ if err != nil {
+ networkMTU = 0
+ }
+ e.handleControl(stack.ControlPacketTooBig, networkMTU, pkt)
case header.ICMPv6DstUnreachable:
received.DstUnreachable.Increment()
@@ -284,7 +287,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
received.Invalid.Increment()
return
} else if e.nud != nil {
- e.nud.HandleProbe(r.RemoteAddress, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
+ e.nud.HandleProbe(r.RemoteAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
} else {
e.linkAddrCache.AddLinkAddress(e.nic.ID(), r.RemoteAddress, sourceLinkAddr)
}
@@ -555,7 +558,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
if e.nud != nil {
// A RS with a specified source IP address modifies the NUD state
// machine in the same way a reachability probe would.
- e.nud.HandleProbe(r.RemoteAddress, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
+ e.nud.HandleProbe(r.RemoteAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
}
}
@@ -605,7 +608,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
// If the RA has the source link layer option, update the link address
// cache with the link address for the advertised router.
if len(sourceLinkAddr) != 0 && e.nud != nil {
- e.nud.HandleProbe(routerAddr, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
+ e.nud.HandleProbe(routerAddr, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
}
e.mu.Lock()
@@ -648,52 +651,46 @@ func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
}
// LinkAddressRequest implements stack.LinkAddressResolver.
-func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP stack.LinkEndpoint) *tcpip.Error {
- // TODO(b/148672031): Use stack.FindRoute instead of manually creating the
- // route here. Note, we would need the nicID to do this properly so the right
- // NIC (associated to linkEP) is used to send the NDP NS message.
- r := stack.Route{
- LocalAddress: localAddr,
- RemoteAddress: addr,
- LocalLinkAddress: linkEP.LinkAddress(),
- RemoteLinkAddress: remoteLinkAddr,
+func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, nic stack.NetworkInterface) *tcpip.Error {
+ remoteAddr := targetAddr
+ if len(remoteLinkAddr) == 0 {
+ remoteAddr = header.SolicitedNodeAddr(targetAddr)
+ remoteLinkAddr = header.EthernetAddressFromMulticastIPv6Address(remoteAddr)
}
- // If a remote address is not already known, then send a multicast
- // solicitation since multicast addresses have a static mapping to link
- // addresses.
- if len(r.RemoteLinkAddress) == 0 {
- r.RemoteAddress = header.SolicitedNodeAddr(addr)
- r.RemoteLinkAddress = header.EthernetAddressFromMulticastIPv6Address(r.RemoteAddress)
+ r, err := p.stack.FindRoute(nic.ID(), localAddr, remoteAddr, ProtocolNumber, false /* multicastLoop */)
+ if err != nil {
+ return err
}
+ defer r.Release()
+ r.ResolveWith(remoteLinkAddr)
optsSerializer := header.NDPOptionsSerializer{
- header.NDPSourceLinkLayerAddressOption(linkEP.LinkAddress()),
+ header.NDPSourceLinkLayerAddressOption(nic.LinkAddress()),
}
neighborSolicitSize := header.ICMPv6NeighborSolicitMinimumSize + optsSerializer.Length()
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- ReserveHeaderBytes: int(linkEP.MaxHeaderLength()) + header.IPv6MinimumSize + neighborSolicitSize,
+ ReserveHeaderBytes: int(r.MaxHeaderLength()) + neighborSolicitSize,
})
pkt.TransportProtocolNumber = header.ICMPv6ProtocolNumber
packet := header.ICMPv6(pkt.TransportHeader().Push(neighborSolicitSize))
packet.SetType(header.ICMPv6NeighborSolicit)
ns := header.NDPNeighborSolicit(packet.NDPPayload())
- ns.SetTargetAddress(addr)
+ ns.SetTargetAddress(targetAddr)
ns.Options().Serialize(optsSerializer)
packet.SetChecksum(header.ICMPv6Checksum(packet, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
- length := uint16(pkt.Size())
- ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize))
- ip.Encode(&header.IPv6Fields{
- PayloadLength: length,
- NextHeader: uint8(header.ICMPv6ProtocolNumber),
- HopLimit: header.NDPHopLimit,
- SrcAddr: r.LocalAddress,
- DstAddr: r.RemoteAddress,
- })
+ stat := p.stack.Stats().ICMP.V6PacketsSent
+ if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{
+ Protocol: header.ICMPv6ProtocolNumber,
+ TTL: header.NDPHopLimit,
+ }, pkt); err != nil {
+ stat.Dropped.Increment()
+ return err
+ }
- // TODO(stijlist): count this in ICMP stats.
- return linkEP.WritePacket(&r, nil /* gso */, ProtocolNumber, pkt)
+ stat.NeighborSolicit.Increment()
+ return nil
}
// ResolveStaticAddress implements stack.LinkAddressResolver.
@@ -747,6 +744,13 @@ type icmpReasonPortUnreachable struct{}
func (*icmpReasonPortUnreachable) isICMPReason() {}
+// icmpReasonReassemblyTimeout is an error where insufficient fragments are
+// received to complete reassembly of a packet within a configured time after
+// the reception of the first-arriving fragment of that packet.
+type icmpReasonReassemblyTimeout struct{}
+
+func (*icmpReasonReassemblyTimeout) isICMPReason() {}
+
// returnError takes an error descriptor and generates the appropriate ICMP
// error packet for IPv6 and sends it.
func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.PacketBuffer) *tcpip.Error {
@@ -839,7 +843,9 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
if payloadLen > available {
payloadLen = available
}
- payload := buffer.NewVectorisedView(pkt.Size(), pkt.Views())
+ payload := network.ToVectorisedView()
+ payload.AppendView(transport)
+ payload.Append(pkt.Data)
payload.CapLength(payloadLen)
newPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -860,6 +866,10 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
icmpHdr.SetType(header.ICMPv6DstUnreachable)
icmpHdr.SetCode(header.ICMPv6PortUnreachable)
counter = sent.DstUnreachable
+ case *icmpReasonReassemblyTimeout:
+ icmpHdr.SetType(header.ICMPv6TimeExceeded)
+ icmpHdr.SetCode(header.ICMPv6ReassemblyTimeout)
+ counter = sent.TimeExceeded
default:
panic(fmt.Sprintf("unsupported ICMP type %T", reason))
}
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 8dc33c560..aa8b5f2e5 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -51,6 +51,7 @@ const (
var (
lladdr0 = header.LinkLocalAddr(linkAddr0)
lladdr1 = header.LinkLocalAddr(linkAddr1)
+ lladdr2 = header.LinkLocalAddr(linkAddr2)
)
type stubLinkEndpoint struct {
@@ -108,31 +109,27 @@ type stubNUDHandler struct {
var _ stack.NUDHandler = (*stubNUDHandler)(nil)
-func (s *stubNUDHandler) HandleProbe(remoteAddr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes stack.LinkAddressResolver) {
+func (s *stubNUDHandler) HandleProbe(tcpip.Address, tcpip.NetworkProtocolNumber, tcpip.LinkAddress, stack.LinkAddressResolver) {
s.probeCount++
}
-func (s *stubNUDHandler) HandleConfirmation(addr tcpip.Address, linkAddr tcpip.LinkAddress, flags stack.ReachabilityConfirmationFlags) {
+func (s *stubNUDHandler) HandleConfirmation(tcpip.Address, tcpip.LinkAddress, stack.ReachabilityConfirmationFlags) {
s.confirmationCount++
}
-func (*stubNUDHandler) HandleUpperLevelConfirmation(addr tcpip.Address) {
+func (*stubNUDHandler) HandleUpperLevelConfirmation(tcpip.Address) {
}
var _ stack.NetworkInterface = (*testInterface)(nil)
type testInterface struct {
- stack.NetworkLinkEndpoint
-
- linkAddr tcpip.LinkAddress
-}
+ stack.LinkEndpoint
-func (i *testInterface) LinkAddress() tcpip.LinkAddress {
- return i.linkAddr
+ nicID tcpip.NICID
}
func (*testInterface) ID() tcpip.NICID {
- return 0
+ return nicID
}
func (*testInterface) IsLoopback() bool {
@@ -147,6 +144,14 @@ func (*testInterface) Enabled() bool {
return true
}
+func (t *testInterface) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+ r := stack.Route{
+ NetProto: protocol,
+ RemoteLinkAddress: remoteLinkAddr,
+ }
+ return t.LinkEndpoint.WritePacket(&r, gso, protocol, pkt)
+}
+
func TestICMPCounts(t *testing.T) {
tests := []struct {
name string
@@ -1235,26 +1240,72 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
}
func TestLinkAddressRequest(t *testing.T) {
+ const nicID = 1
+
snaddr := header.SolicitedNodeAddr(lladdr0)
mcaddr := header.EthernetAddressFromMulticastIPv6Address(snaddr)
tests := []struct {
- name string
- remoteLinkAddr tcpip.LinkAddress
- expectedLinkAddr tcpip.LinkAddress
- expectedAddr tcpip.Address
+ name string
+ nicAddr tcpip.Address
+ localAddr tcpip.Address
+ remoteLinkAddr tcpip.LinkAddress
+
+ expectedErr *tcpip.Error
+ expectedRemoteAddr tcpip.Address
+ expectedRemoteLinkAddr tcpip.LinkAddress
}{
{
- name: "Unicast",
- remoteLinkAddr: linkAddr1,
- expectedLinkAddr: linkAddr1,
- expectedAddr: lladdr0,
+ name: "Unicast",
+ nicAddr: lladdr1,
+ localAddr: lladdr1,
+ remoteLinkAddr: linkAddr1,
+ expectedRemoteAddr: lladdr0,
+ expectedRemoteLinkAddr: linkAddr1,
+ },
+ {
+ name: "Multicast",
+ nicAddr: lladdr1,
+ localAddr: lladdr1,
+ remoteLinkAddr: "",
+ expectedRemoteAddr: snaddr,
+ expectedRemoteLinkAddr: mcaddr,
+ },
+ {
+ name: "Unicast with unspecified source",
+ nicAddr: lladdr1,
+ remoteLinkAddr: linkAddr1,
+ expectedRemoteAddr: lladdr0,
+ expectedRemoteLinkAddr: linkAddr1,
},
{
- name: "Multicast",
- remoteLinkAddr: "",
- expectedLinkAddr: mcaddr,
- expectedAddr: snaddr,
+ name: "Multicast with unspecified source",
+ nicAddr: lladdr1,
+ remoteLinkAddr: "",
+ expectedRemoteAddr: snaddr,
+ expectedRemoteLinkAddr: mcaddr,
+ },
+ {
+ name: "Unicast with unassigned address",
+ localAddr: lladdr1,
+ remoteLinkAddr: linkAddr1,
+ expectedErr: tcpip.ErrNetworkUnreachable,
+ },
+ {
+ name: "Multicast with unassigned address",
+ localAddr: lladdr1,
+ remoteLinkAddr: "",
+ expectedErr: tcpip.ErrNetworkUnreachable,
+ },
+ {
+ name: "Unicast with no local address available",
+ remoteLinkAddr: linkAddr1,
+ expectedErr: tcpip.ErrNetworkUnreachable,
+ },
+ {
+ name: "Multicast with no local address available",
+ remoteLinkAddr: "",
+ expectedErr: tcpip.ErrNetworkUnreachable,
},
}
@@ -1269,26 +1320,43 @@ func TestLinkAddressRequest(t *testing.T) {
}
linkEP := channel.New(defaultChannelSize, defaultMTU, linkAddr0)
- if err := linkRes.LinkAddressRequest(lladdr0, lladdr1, test.remoteLinkAddr, linkEP); err != nil {
- t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", lladdr0, lladdr1, test.remoteLinkAddr, err)
+ if err := s.CreateNIC(nicID, linkEP); err != nil {
+ t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
+ }
+ if len(test.nicAddr) != 0 {
+ if err := s.AddAddress(nicID, ProtocolNumber, test.nicAddr); err != nil {
+ t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, ProtocolNumber, test.nicAddr, err)
+ }
+ }
+
+ // We pass a test network interface to LinkAddressRequest with the same NIC
+ // ID and link endpoint used by the NIC we created earlier so that we can
+ // mock a link address request and observe the packets sent to the link
+ // endpoint even though the stack uses the real NIC.
+ if err := linkRes.LinkAddressRequest(lladdr0, test.localAddr, test.remoteLinkAddr, &testInterface{LinkEndpoint: linkEP, nicID: nicID}); err != test.expectedErr {
+ t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s, want = %s", lladdr0, test.localAddr, test.remoteLinkAddr, err, test.expectedErr)
+ }
+
+ if test.expectedErr != nil {
+ return
}
pkt, ok := linkEP.Read()
if !ok {
t.Fatal("expected to send a link address request")
}
- if pkt.Route.RemoteLinkAddress != test.expectedLinkAddr {
- t.Errorf("got pkt.Route.RemoteLinkAddress = %s, want = %s", pkt.Route.RemoteLinkAddress, test.expectedLinkAddr)
+ if pkt.Route.RemoteLinkAddress != test.expectedRemoteLinkAddr {
+ t.Errorf("got pkt.Route.RemoteLinkAddress = %s, want = %s", pkt.Route.RemoteLinkAddress, test.expectedRemoteLinkAddr)
}
- if pkt.Route.RemoteAddress != test.expectedAddr {
- t.Errorf("got pkt.Route.RemoteAddress = %s, want = %s", pkt.Route.RemoteAddress, test.expectedAddr)
+ if pkt.Route.RemoteAddress != test.expectedRemoteAddr {
+ t.Errorf("got pkt.Route.RemoteAddress = %s, want = %s", pkt.Route.RemoteAddress, test.expectedRemoteAddr)
}
if pkt.Route.LocalAddress != lladdr1 {
t.Errorf("got pkt.Route.LocalAddress = %s, want = %s", pkt.Route.LocalAddress, lladdr1)
}
checker.IPv6(t, stack.PayloadSince(pkt.Pkt.NetworkHeader()),
checker.SrcAddr(lladdr1),
- checker.DstAddr(test.expectedAddr),
+ checker.DstAddr(test.expectedRemoteAddr),
checker.TTL(header.NDPHopLimit),
checker.NDPNS(
checker.NDPNSTargetAddress(lladdr0),
@@ -1698,7 +1766,7 @@ func TestCallsToNeighborCache(t *testing.T) {
t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
}
nudHandler := &stubNUDHandler{}
- ep := netProto.NewEndpoint(&testInterface{linkAddr: linkAddr0}, &stubLinkAddressCache{}, nudHandler, &stubDispatcher{})
+ ep := netProto.NewEndpoint(&testInterface{LinkEndpoint: channel.New(0, header.IPv6MinimumMTU, linkAddr0)}, &stubLinkAddressCache{}, nudHandler, &stubDispatcher{})
defer ep.Close()
if err := ep.Enable(); err != nil {
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 9670696c7..1e38f3a9d 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -41,12 +41,12 @@ const (
//
// Linux also uses 60 seconds for reassembly timeout:
// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ipv6.h#L456
- reassembleTimeout = 60 * time.Second
+ ReassembleTimeout = 60 * time.Second
// ProtocolNumber is the ipv6 protocol number.
ProtocolNumber = header.IPv6ProtocolNumber
- // maxTotalSize is maximum size that can be encoded in the 16-bit
+ // maxPayloadSize is the maximum size that can be encoded in the 16-bit
// PayloadLength field of the ipv6 header.
maxPayloadSize = 0xffff
@@ -363,7 +363,11 @@ func (e *endpoint) DefaultTTL() uint8 {
// MTU implements stack.NetworkEndpoint.MTU. It returns the link-layer MTU minus
// the network layer max header length.
func (e *endpoint) MTU() uint32 {
- return calculateMTU(e.nic.MTU())
+ networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv6MinimumSize)
+ if err != nil {
+ return 0
+ }
+ return networkMTU
}
// MaxHeaderLength returns the maximum length needed by ipv6 headers (and
@@ -386,27 +390,40 @@ func (e *endpoint) addIPHeader(r *stack.Route, pkt *stack.PacketBuffer, params s
pkt.NetworkProtocolNumber = ProtocolNumber
}
-func (e *endpoint) packetMustBeFragmented(pkt *stack.PacketBuffer, gso *stack.GSO) bool {
- return (gso == nil || gso.Type == stack.GSONone) && pkt.Size() > int(e.nic.MTU())
+func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32, gso *stack.GSO) bool {
+ payload := pkt.TransportHeader().View().Size() + pkt.Data.Size()
+ return (gso == nil || gso.Type == stack.GSONone) && uint32(payload) > networkMTU
}
// handleFragments fragments pkt and calls the handler function on each
// fragment. It returns the number of fragments handled and the number of
// fragments left to be processed. The IP header must already be present in the
-// original packet. The mtu is the maximum size of the packets. The transport
-// header protocol number is required to avoid parsing the IPv6 extension
-// headers.
-func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, mtu uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) {
- fragMTU := int(calculateFragmentInnerMTU(mtu, pkt))
- if fragMTU < pkt.TransportHeader().View().Size() {
+// original packet. The transport header protocol number is required to avoid
+// parsing the IPv6 extension headers.
+func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) {
+ networkHeader := header.IPv6(pkt.NetworkHeader().View())
+
+ // TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are
+ // supported for outbound packets, their length should not affect the fragment
+ // maximum payload length because they should only be transmitted once.
+ fragmentPayloadLen := (networkMTU - header.IPv6FragmentHeaderSize) &^ 7
+ if fragmentPayloadLen < header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit {
+ // We need at least 8 bytes of space left for the fragmentable part because
+ // the fragment payload must obviously be non-zero and must be a multiple
+ // of 8 as per RFC 8200 section 4.5:
+ // Each complete fragment, except possibly the last ("rightmost") one, is
+ // an integer multiple of 8 octets long.
+ return 0, 1, tcpip.ErrMessageTooLong
+ }
+
+ if fragmentPayloadLen < uint32(pkt.TransportHeader().View().Size()) {
// As per RFC 8200 Section 4.5, the Transport Header is expected to be small
// enough to fit in the first fragment.
return 0, 1, tcpip.ErrMessageTooLong
}
- pf := fragmentation.MakePacketFragmenter(pkt, fragMTU, calculateFragmentReserve(pkt))
+ pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadLen, calculateFragmentReserve(pkt))
id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, e.protocol.hashIV)%buckets], 1)
- networkHeader := header.IPv6(pkt.NetworkHeader().View())
var n int
for {
@@ -468,8 +485,14 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet
return nil
}
- if e.packetMustBeFragmented(pkt, gso) {
- sent, remain, err := e.handleFragments(r, gso, e.nic.MTU(), pkt, protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
+ networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
+ if err != nil {
+ r.Stats().IP.OutgoingPacketErrors.Increment()
+ return err
+ }
+
+ if packetMustBeFragmented(pkt, networkMTU, gso) {
+ sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
// TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each
// fragment one by one using WritePacket() (current strategy) or if we
// want to create a PacketBufferList from the fragments and feed it to
@@ -499,13 +522,20 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
return pkts.Len(), nil
}
+ linkMTU := e.nic.MTU()
for pb := pkts.Front(); pb != nil; pb = pb.Next() {
e.addIPHeader(r, pb, params)
- if e.packetMustBeFragmented(pb, gso) {
+
+ networkMTU, err := calculateNetworkMTU(linkMTU, uint32(pb.NetworkHeader().View().Size()))
+ if err != nil {
+ r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len()))
+ return 0, err
+ }
+ if packetMustBeFragmented(pb, networkMTU, gso) {
// Keep track of the packet that is about to be fragmented so it can be
// removed once the fragmentation is done.
originalPkt := pb
- if _, _, err := e.handleFragments(r, gso, e.nic.MTU(), pb, params.Protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
+ if _, _, err := e.handleFragments(r, gso, networkMTU, pb, params.Protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
// Modify the packet list in place with the new fragments.
pkts.InsertAfter(pb, fragPkt)
pb = fragPkt
@@ -569,7 +599,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
return n + len(dropped), nil
}
-// WriteHeaderIncludedPacker implements stack.NetworkEndpoint.
+// WriteHeaderIncludedPacket implements stack.NetworkEndpoint.
func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error {
// The packet already has an IP header, but there are a few required checks.
h, ok := pkt.Data.PullUp(header.IPv6MinimumSize)
@@ -747,6 +777,8 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
continue
}
+ fragmentFieldOffset := it.ParseOffset()
+
// Don't consume the iterator if we have the first fragment because we
// will use it to validate that the first fragment holds the upper layer
// header.
@@ -804,17 +836,59 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
return
}
+ // As per RFC 2460 Section 4.5:
+ //
+ // If the length of a fragment, as derived from the fragment packet's
+ // Payload Length field, is not a multiple of 8 octets and the M flag
+ // of that fragment is 1, then that fragment must be discarded and an
+ // ICMP Parameter Problem, Code 0, message should be sent to the source
+ // of the fragment, pointing to the Payload Length field of the
+ // fragment packet.
+ if extHdr.More() && fragmentPayloadLen%header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit != 0 {
+ r.Stats().IP.MalformedPacketsReceived.Increment()
+ r.Stats().IP.MalformedFragmentsReceived.Increment()
+ _ = e.protocol.returnError(r, &icmpReasonParameterProblem{
+ code: header.ICMPv6ErroneousHeader,
+ pointer: header.IPv6PayloadLenOffset,
+ }, pkt)
+ return
+ }
+
// The packet is a fragment, let's try to reassemble it.
start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit
- // Drop the fragment if the size of the reassembled payload would exceed
- // the maximum payload size.
+ // As per RFC 2460 Section 4.5:
+ //
+ // If the length and offset of a fragment are such that the Payload
+ // Length of the packet reassembled from that fragment would exceed
+ // 65,535 octets, then that fragment must be discarded and an ICMP
+ // Parameter Problem, Code 0, message should be sent to the source of
+ // the fragment, pointing to the Fragment Offset field of the fragment
+ // packet.
if int(start)+fragmentPayloadLen > header.IPv6MaximumPayloadSize {
r.Stats().IP.MalformedPacketsReceived.Increment()
r.Stats().IP.MalformedFragmentsReceived.Increment()
+ _ = e.protocol.returnError(r, &icmpReasonParameterProblem{
+ code: header.ICMPv6ErroneousHeader,
+ pointer: fragmentFieldOffset,
+ }, pkt)
return
}
+ // Set up a callback in case we need to send a Time Exceeded Message as
+ // per RFC 2460 Section 4.5.
+ var releaseCB func(bool)
+ if start == 0 {
+ pkt := pkt.Clone()
+ r := r.Clone()
+ releaseCB = func(timedOut bool) {
+ if timedOut {
+ _ = e.protocol.returnError(&r, &icmpReasonReassemblyTimeout{}, pkt)
+ }
+ r.Release()
+ }
+ }
+
// Note that pkt doesn't have its transport header set after reassembly,
// and won't until DeliverNetworkPacket sets it.
data, proto, ready, err := e.protocol.fragmentation.Process(
@@ -830,6 +904,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
extHdr.More(),
uint8(rawPayload.Identifier),
rawPayload.Buf,
+ releaseCB,
)
if err != nil {
r.Stats().IP.MalformedPacketsReceived.Increment()
@@ -1427,14 +1502,31 @@ func (p *protocol) SetForwarding(v bool) {
}
}
-// calculateMTU calculates the network-layer payload MTU based on the link-layer
-// payload mtu.
-func calculateMTU(mtu uint32) uint32 {
- mtu -= header.IPv6MinimumSize
- if mtu <= maxPayloadSize {
- return mtu
+// calculateNetworkMTU calculates the network-layer payload MTU based on the
+// link-layer payload MTU and the length of every IPv6 header.
+// Note that this is different than the Payload Length field of the IPv6 header,
+// which includes the length of the extension headers.
+func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, *tcpip.Error) {
+ if linkMTU < header.IPv6MinimumMTU {
+ return 0, tcpip.ErrInvalidEndpointState
}
- return maxPayloadSize
+
+ // As per RFC 7112 section 5, we should discard packets if their IPv6 header
+ // is bigger than 1280 bytes (ie, the minimum link MTU) since we do not
+ // support PMTU discovery:
+ // Hosts that do not discover the Path MTU MUST limit the IPv6 Header Chain
+ // length to 1280 bytes. Limiting the IPv6 Header Chain length to 1280
+ // bytes ensures that the header chain length does not exceed the IPv6
+ // minimum MTU.
+ if networkHeadersLen > header.IPv6MinimumMTU {
+ return 0, tcpip.ErrMalformedHeader
+ }
+
+ networkMTU := linkMTU - uint32(networkHeadersLen)
+ if networkMTU > maxPayloadSize {
+ networkMTU = maxPayloadSize
+ }
+ return networkMTU, nil
}
// Options holds options to configure a new protocol.
@@ -1488,7 +1580,7 @@ func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory {
return func(s *stack.Stack) stack.NetworkProtocol {
p := &protocol{
stack: s,
- fragmentation: fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, reassembleTimeout, s.Clock()),
+ fragmentation: fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock()),
ids: ids,
hashIV: hashIV,
@@ -1509,23 +1601,6 @@ func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
return NewProtocolWithOptions(Options{})(s)
}
-// calculateFragmentInnerMTU calculates the maximum number of bytes of
-// fragmentable data a fragment can have, based on the link layer mtu and pkt's
-// network header size.
-func calculateFragmentInnerMTU(mtu uint32, pkt *stack.PacketBuffer) uint32 {
- // TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are
- // supported for outbound packets, their length should not affect the fragment
- // MTU because they should only be transmitted once.
- mtu -= uint32(pkt.NetworkHeader().View().Size())
- mtu -= header.IPv6FragmentHeaderSize
- // Round the MTU down to align to 8 bytes.
- mtu &^= 7
- if mtu <= maxPayloadSize {
- return mtu
- }
- return maxPayloadSize
-}
-
func calculateFragmentReserve(pkt *stack.PacketBuffer) int {
return pkt.AvailableHeaderBytes() + pkt.NetworkHeader().View().Size() + header.IPv6FragmentHeaderSize
}
diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go
index 297868f24..c593c0004 100644
--- a/pkg/tcpip/network/ipv6/ipv6_test.go
+++ b/pkg/tcpip/network/ipv6/ipv6_test.go
@@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/checker"
+ "gvisor.dev/gvisor/pkg/tcpip/faketime"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/link/channel"
"gvisor.dev/gvisor/pkg/tcpip/network/testutil"
@@ -238,7 +239,7 @@ func TestReceiveOnAllNodesMulticastAddr(t *testing.T) {
NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
TransportProtocols: []stack.TransportProtocolFactory{test.protocolFactory},
})
- e := channel.New(10, 1280, linkAddr1)
+ e := channel.New(10, header.IPv6MinimumMTU, linkAddr1)
if err := s.CreateNIC(1, e); err != nil {
t.Fatalf("CreateNIC(_) = %s", err)
}
@@ -271,7 +272,7 @@ func TestReceiveOnSolicitedNodeAddr(t *testing.T) {
NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
TransportProtocols: []stack.TransportProtocolFactory{test.protocolFactory},
})
- e := channel.New(1, 1280, linkAddr1)
+ e := channel.New(1, header.IPv6MinimumMTU, linkAddr1)
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
@@ -825,7 +826,7 @@ func TestReceiveIPv6ExtHdrs(t *testing.T) {
NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
- e := channel.New(1, 1280, linkAddr1)
+ e := channel.New(1, header.IPv6MinimumMTU, linkAddr1)
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
@@ -1844,7 +1845,7 @@ func TestReceiveIPv6Fragments(t *testing.T) {
NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
})
- e := channel.New(0, 1280, linkAddr1)
+ e := channel.New(0, header.IPv6MinimumMTU, linkAddr1)
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
@@ -1912,16 +1913,19 @@ func TestReceiveIPv6Fragments(t *testing.T) {
func TestInvalidIPv6Fragments(t *testing.T) {
const (
- nicID = 1
- fragmentExtHdrLen = 8
+ addr1 = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01"
+ addr2 = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
+ linkAddr1 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e")
+ nicID = 1
+ hoplimit = 255
+ ident = 1
+ data = "TEST_INVALID_IPV6_FRAGMENTS"
)
- payloadGen := func(payloadLen int) []byte {
- payload := make([]byte, payloadLen)
- for i := 0; i < len(payload); i++ {
- payload[i] = 0x30
- }
- return payload
+ type fragmentData struct {
+ ipv6Fields header.IPv6Fields
+ ipv6FragmentFields header.IPv6FragmentFields
+ payload []byte
}
tests := []struct {
@@ -1929,31 +1933,64 @@ func TestInvalidIPv6Fragments(t *testing.T) {
fragments []fragmentData
wantMalformedIPPackets uint64
wantMalformedFragments uint64
+ expectICMP bool
+ expectICMPType header.ICMPv6Type
+ expectICMPCode header.ICMPv6Code
+ expectICMPTypeSpecific uint32
}{
{
+ name: "fragment size is not a multiple of 8 and the M flag is true",
+ fragments: []fragmentData{
+ {
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: header.IPv6FragmentHeaderSize + 9,
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: 0 >> 3,
+ M: true,
+ Identification: ident,
+ },
+ payload: []byte(data)[:9],
+ },
+ },
+ wantMalformedIPPackets: 1,
+ wantMalformedFragments: 1,
+ expectICMP: true,
+ expectICMPType: header.ICMPv6ParamProblem,
+ expectICMPCode: header.ICMPv6ErroneousHeader,
+ expectICMPTypeSpecific: header.IPv6PayloadLenOffset,
+ },
+ {
name: "fragments reassembled into a payload exceeding the max IPv6 payload size",
fragments: []fragmentData{
{
- srcAddr: addr1,
- dstAddr: addr2,
- nextHdr: fragmentExtHdrID,
- data: buffer.NewVectorisedView(
- fragmentExtHdrLen+(header.IPv6MaximumPayloadSize+1)-16,
- []buffer.View{
- // Fragment extension header.
- // Fragment offset = 8190, More = false, ID = 1
- buffer.View([]byte{uint8(header.UDPProtocolNumber), 0,
- ((header.IPv6MaximumPayloadSize + 1) - 16) >> 8,
- ((header.IPv6MaximumPayloadSize + 1) - 16) & math.MaxUint8,
- 0, 0, 0, 1}),
- // Payload length = 16
- payloadGen(16),
- },
- ),
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: header.IPv6FragmentHeaderSize + 16,
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: ((header.IPv6MaximumPayloadSize + 1) - 16) >> 3,
+ M: false,
+ Identification: ident,
+ },
+ payload: []byte(data)[:16],
},
},
wantMalformedIPPackets: 1,
wantMalformedFragments: 1,
+ expectICMP: true,
+ expectICMPType: header.ICMPv6ParamProblem,
+ expectICMPCode: header.ICMPv6ErroneousHeader,
+ expectICMPTypeSpecific: header.IPv6MinimumSize + 2, /* offset for 'Fragment Offset' in the fragment header */
},
}
@@ -1964,33 +2001,40 @@ func TestInvalidIPv6Fragments(t *testing.T) {
NewProtocol,
},
})
- e := channel.New(0, 1500, linkAddr1)
+ e := channel.New(1, 1500, linkAddr1)
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
if err := s.AddAddress(nicID, ProtocolNumber, addr2); err != nil {
t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, addr2, err)
}
+ s.SetRouteTable([]tcpip.Route{{
+ Destination: header.IPv6EmptySubnet,
+ NIC: nicID,
+ }})
+ var expectICMPPayload buffer.View
for _, f := range test.fragments {
- hdr := buffer.NewPrependable(header.IPv6MinimumSize)
+ hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.IPv6FragmentHeaderSize)
- // Serialize IPv6 fixed header.
- ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
- ip.Encode(&header.IPv6Fields{
- PayloadLength: uint16(f.data.Size()),
- NextHeader: f.nextHdr,
- HopLimit: 255,
- SrcAddr: f.srcAddr,
- DstAddr: f.dstAddr,
- })
+ ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize + header.IPv6FragmentHeaderSize))
+ ip.Encode(&f.ipv6Fields)
+
+ fragHDR := header.IPv6Fragment(hdr.View()[header.IPv6MinimumSize:])
+ fragHDR.Encode(&f.ipv6FragmentFields)
vv := hdr.View().ToVectorisedView()
- vv.Append(f.data)
+ vv.AppendView(f.payload)
- e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
Data: vv,
- }))
+ })
+
+ if test.expectICMP {
+ expectICMPPayload = stack.PayloadSince(pkt.NetworkHeader())
+ }
+
+ e.InjectInbound(ProtocolNumber, pkt)
}
if got, want := s.Stats().IP.MalformedPacketsReceived.Value(), test.wantMalformedIPPackets; got != want {
@@ -1999,6 +2043,287 @@ func TestInvalidIPv6Fragments(t *testing.T) {
if got, want := s.Stats().IP.MalformedFragmentsReceived.Value(), test.wantMalformedFragments; got != want {
t.Errorf("got Stats.IP.MalformedFragmentsReceived = %d, want = %d", got, want)
}
+
+ reply, ok := e.Read()
+ if !test.expectICMP {
+ if ok {
+ t.Fatalf("unexpected ICMP error message received: %#v", reply)
+ }
+ return
+ }
+ if !ok {
+ t.Fatal("expected ICMP error message missing")
+ }
+
+ checker.IPv6(t, stack.PayloadSince(reply.Pkt.NetworkHeader()),
+ checker.SrcAddr(addr2),
+ checker.DstAddr(addr1),
+ checker.IPFullLength(uint16(header.IPv6MinimumSize+header.ICMPv6MinimumSize+expectICMPPayload.Size())),
+ checker.ICMPv6(
+ checker.ICMPv6Type(test.expectICMPType),
+ checker.ICMPv6Code(test.expectICMPCode),
+ checker.ICMPv6TypeSpecific(test.expectICMPTypeSpecific),
+ checker.ICMPv6Payload([]byte(expectICMPPayload)),
+ ),
+ )
+ })
+ }
+}
+
+func TestFragmentReassemblyTimeout(t *testing.T) {
+ const (
+ addr1 = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01"
+ addr2 = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
+ linkAddr1 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e")
+ nicID = 1
+ hoplimit = 255
+ ident = 1
+ data = "TEST_FRAGMENT_REASSEMBLY_TIMEOUT"
+ )
+
+ type fragmentData struct {
+ ipv6Fields header.IPv6Fields
+ ipv6FragmentFields header.IPv6FragmentFields
+ payload []byte
+ }
+
+ tests := []struct {
+ name string
+ fragments []fragmentData
+ expectICMP bool
+ }{
+ {
+ name: "first fragment only",
+ fragments: []fragmentData{
+ {
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: header.IPv6FragmentHeaderSize + 16,
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: 0,
+ M: true,
+ Identification: ident,
+ },
+ payload: []byte(data)[:16],
+ },
+ },
+ expectICMP: true,
+ },
+ {
+ name: "two first fragments",
+ fragments: []fragmentData{
+ {
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: header.IPv6FragmentHeaderSize + 16,
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: 0,
+ M: true,
+ Identification: ident,
+ },
+ payload: []byte(data)[:16],
+ },
+ {
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: header.IPv6FragmentHeaderSize + 16,
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: 0,
+ M: true,
+ Identification: ident,
+ },
+ payload: []byte(data)[:16],
+ },
+ },
+ expectICMP: true,
+ },
+ {
+ name: "second fragment only",
+ fragments: []fragmentData{
+ {
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: uint16(header.IPv6FragmentHeaderSize + len(data) - 16),
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: 8,
+ M: false,
+ Identification: ident,
+ },
+ payload: []byte(data)[16:],
+ },
+ },
+ expectICMP: false,
+ },
+ {
+ name: "two fragments with a gap",
+ fragments: []fragmentData{
+ {
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: header.IPv6FragmentHeaderSize + 16,
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: 0,
+ M: true,
+ Identification: ident,
+ },
+ payload: []byte(data)[:16],
+ },
+ {
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: uint16(header.IPv6FragmentHeaderSize + len(data) - 16),
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: 8,
+ M: false,
+ Identification: ident,
+ },
+ payload: []byte(data)[16:],
+ },
+ },
+ expectICMP: true,
+ },
+ {
+ name: "two fragments with a gap in reverse order",
+ fragments: []fragmentData{
+ {
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: uint16(header.IPv6FragmentHeaderSize + len(data) - 16),
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: 8,
+ M: false,
+ Identification: ident,
+ },
+ payload: []byte(data)[16:],
+ },
+ {
+ ipv6Fields: header.IPv6Fields{
+ PayloadLength: header.IPv6FragmentHeaderSize + 16,
+ NextHeader: header.IPv6FragmentHeader,
+ HopLimit: hoplimit,
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ },
+ ipv6FragmentFields: header.IPv6FragmentFields{
+ NextHeader: uint8(header.UDPProtocolNumber),
+ FragmentOffset: 0,
+ M: true,
+ Identification: ident,
+ },
+ payload: []byte(data)[:16],
+ },
+ },
+ expectICMP: true,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ clock := faketime.NewManualClock()
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{
+ NewProtocol,
+ },
+ Clock: clock,
+ })
+
+ e := channel.New(1, 1500, linkAddr1)
+ if err := s.CreateNIC(nicID, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+ }
+ if err := s.AddAddress(nicID, ProtocolNumber, addr2); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv6ProtocolNumber, addr2, err)
+ }
+ s.SetRouteTable([]tcpip.Route{{
+ Destination: header.IPv6EmptySubnet,
+ NIC: nicID,
+ }})
+
+ var firstFragmentSent buffer.View
+ for _, f := range test.fragments {
+ hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.IPv6FragmentHeaderSize)
+
+ ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize + header.IPv6FragmentHeaderSize))
+ ip.Encode(&f.ipv6Fields)
+
+ fragHDR := header.IPv6Fragment(hdr.View()[header.IPv6MinimumSize:])
+ fragHDR.Encode(&f.ipv6FragmentFields)
+
+ vv := hdr.View().ToVectorisedView()
+ vv.AppendView(f.payload)
+
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: vv,
+ })
+
+ if firstFragmentSent == nil && fragHDR.FragmentOffset() == 0 {
+ firstFragmentSent = stack.PayloadSince(pkt.NetworkHeader())
+ }
+
+ e.InjectInbound(ProtocolNumber, pkt)
+ }
+
+ clock.Advance(ReassembleTimeout)
+
+ reply, ok := e.Read()
+ if !test.expectICMP {
+ if ok {
+ t.Fatalf("unexpected ICMP error message received: %#v", reply)
+ }
+ return
+ }
+ if !ok {
+ t.Fatal("expected ICMP error message missing")
+ }
+ if firstFragmentSent == nil {
+ t.Fatalf("unexpected ICMP error message received: %#v", reply)
+ }
+
+ checker.IPv6(t, stack.PayloadSince(reply.Pkt.NetworkHeader()),
+ checker.SrcAddr(addr2),
+ checker.DstAddr(addr1),
+ checker.IPFullLength(uint16(header.IPv6MinimumSize+header.ICMPv6MinimumSize+firstFragmentSent.Size())),
+ checker.ICMPv6(
+ checker.ICMPv6Type(header.ICMPv6TimeExceeded),
+ checker.ICMPv6Code(header.ICMPv6ReassemblyTimeout),
+ checker.ICMPv6Payload([]byte(firstFragmentSent)),
+ ),
+ )
})
}
}
@@ -2230,8 +2555,8 @@ var fragmentationTests = []struct {
wantFragments []fragmentInfo
}{
{
- description: "No Fragmentation",
- mtu: 1280,
+ description: "No fragmentation",
+ mtu: header.IPv6MinimumMTU,
gso: nil,
transHdrLen: 0,
payloadSize: 1000,
@@ -2241,7 +2566,18 @@ var fragmentationTests = []struct {
},
{
description: "Fragmented",
- mtu: 1280,
+ mtu: header.IPv6MinimumMTU,
+ gso: nil,
+ transHdrLen: 0,
+ payloadSize: 2000,
+ wantFragments: []fragmentInfo{
+ {offset: 0, payloadSize: 1240, more: true},
+ {offset: 154, payloadSize: 776, more: false},
+ },
+ },
+ {
+ description: "Fragmented with mtu not a multiple of 8",
+ mtu: header.IPv6MinimumMTU + 1,
gso: nil,
transHdrLen: 0,
payloadSize: 2000,
@@ -2262,7 +2598,7 @@ var fragmentationTests = []struct {
},
{
description: "Fragmented with gso none",
- mtu: 1280,
+ mtu: header.IPv6MinimumMTU,
gso: &stack.GSO{Type: stack.GSONone},
transHdrLen: 0,
payloadSize: 1400,
@@ -2273,7 +2609,7 @@ var fragmentationTests = []struct {
},
{
description: "Fragmented with big header",
- mtu: 1280,
+ mtu: header.IPv6MinimumMTU,
gso: nil,
transHdrLen: 100,
payloadSize: 1200,
@@ -2448,8 +2784,8 @@ func TestFragmentationErrors(t *testing.T) {
wantError: tcpip.ErrAborted,
},
{
- description: "Error on packet with MTU smaller than transport header",
- mtu: 1280,
+ description: "Error when MTU is smaller than transport header",
+ mtu: header.IPv6MinimumMTU,
transHdrLen: 1500,
payloadSize: 500,
allowPackets: 0,
@@ -2457,6 +2793,16 @@ func TestFragmentationErrors(t *testing.T) {
mockError: nil,
wantError: tcpip.ErrMessageTooLong,
},
+ {
+ description: "Error when MTU is smaller than IPv6 minimum MTU",
+ mtu: header.IPv6MinimumMTU - 1,
+ transHdrLen: 0,
+ payloadSize: 500,
+ allowPackets: 0,
+ outgoingErrors: 1,
+ mockError: nil,
+ wantError: tcpip.ErrInvalidEndpointState,
+ },
}
for _, ft := range tests {