diff options
-rw-r--r-- | WORKSPACE | 12 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/netlink/socket.go | 22 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/netstack.go | 12 | ||||
-rw-r--r-- | pkg/tcpip/BUILD | 3 | ||||
-rw-r--r-- | pkg/tcpip/buffer/BUILD | 3 | ||||
-rw-r--r-- | pkg/tcpip/header/BUILD | 3 | ||||
-rw-r--r-- | pkg/tcpip/header/ipv4.go | 2 | ||||
-rw-r--r-- | pkg/tcpip/iptables/BUILD | 4 | ||||
-rw-r--r-- | pkg/tcpip/link/fdbased/endpoint.go | 13 | ||||
-rw-r--r-- | pkg/tcpip/network/fragmentation/BUILD | 5 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/ipv4.go | 16 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/ipv4_test.go | 104 | ||||
-rw-r--r-- | pkg/tcpip/seqnum/BUILD | 4 | ||||
-rw-r--r-- | pkg/tcpip/stack/BUILD | 5 | ||||
-rw-r--r-- | pkg/tcpip/tcpip.go | 8 | ||||
-rw-r--r-- | pkg/tcpip/transport/icmp/BUILD | 4 | ||||
-rw-r--r-- | pkg/tcpip/transport/raw/BUILD | 4 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/BUILD | 5 | ||||
-rw-r--r-- | pkg/tcpip/transport/udp/BUILD | 5 | ||||
-rw-r--r-- | pkg/waiter/BUILD | 5 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 2 | ||||
-rw-r--r-- | test/syscalls/linux/socket_netlink_route.cc | 153 | ||||
-rw-r--r-- | test/syscalls/linux/socket_netlink_util.cc | 7 |
24 files changed, 359 insertions, 43 deletions
@@ -3,19 +3,19 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "io_bazel_rules_go", - sha256 = "513c12397db1bc9aa46dd62f02dd94b49a9b5d17444d49b5a04c5a89f3053c1c", + sha256 = "078f2a9569fa9ed846e60805fb5fb167d6f6c4ece48e6d409bf5fb2154eaf0d8", urls = [ - "https://storage.googleapis.com/bazel-mirror/github.com/bazelbuild/rules_go/releases/download/v0.19.5/rules_go-v0.19.5.tar.gz", - "https://github.com/bazelbuild/rules_go/releases/download/v0.19.5/rules_go-v0.19.5.tar.gz", + "https://storage.googleapis.com/bazel-mirror/github.com/bazelbuild/rules_go/releases/download/v0.20.0/rules_go-v0.20.0.tar.gz", + "https://github.com/bazelbuild/rules_go/releases/download/v0.20.0/rules_go-v0.20.0.tar.gz", ], ) http_archive( name = "bazel_gazelle", - sha256 = "7fc87f4170011201b1690326e8c16c5d802836e3a0d617d8f75c3af2b23180c4", + sha256 = "41bff2a0b32b02f20c227d234aa25ef3783998e5453f7eade929704dcff7cd4b", urls = [ - "https://storage.googleapis.com/bazel-mirror/github.com/bazelbuild/bazel-gazelle/releases/download/0.18.2/bazel-gazelle-0.18.2.tar.gz", - "https://github.com/bazelbuild/bazel-gazelle/releases/download/0.18.2/bazel-gazelle-0.18.2.tar.gz", + "https://storage.googleapis.com/bazel-mirror/github.com/bazelbuild/bazel-gazelle/releases/download/v0.19.0/bazel-gazelle-v0.19.0.tar.gz", + "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.19.0/bazel-gazelle-v0.19.0.tar.gz", ], ) diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 45ebb2a0e..7da68384e 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", "//pkg/sentry/kernel/time", + "//pkg/sentry/safemem", "//pkg/sentry/socket", "//pkg/sentry/socket/netlink/port", "//pkg/sentry/socket/unix", diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index d0aab293d..b2732ca29 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -28,6 +28,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port" "gvisor.dev/gvisor/pkg/sentry/socket/unix" @@ -416,6 +417,24 @@ func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, have Peek: flags&linux.MSG_PEEK != 0, } + // If MSG_TRUNC is set with a zero byte destination then we still need + // to read the message and discard it, or in the case where MSG_PEEK is + // set, leave it be. In both cases the full message length must be + // returned. However, the memory manager for the destination will not read + // the endpoint if the destination is zero length. + // + // In order for the endpoint to be read when the destination size is zero, + // we must cause a read of the endpoint by using a separate fake zero + // length block sequence and calling the EndpointReader directly. + if trunc && dst.Addrs.NumBytes() == 0 { + // Perform a read to a zero byte block sequence. We can ignore the + // original destination since it was zero bytes. The length returned by + // ReadToBlocks is ignored and we return the full message length to comply + // with MSG_TRUNC. + _, err := r.ReadToBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(make([]byte, 0)))) + return int(r.MsgSize), linux.MSG_TRUNC, from, fromLen, socket.ControlMessages{}, syserr.FromError(err) + } + if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 { var mflags int if n < int64(r.MsgSize) { @@ -499,6 +518,9 @@ func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error PortID: uint32(ms.PortID), }) + // Add the dump_done_errno payload. + m.Put(int64(0)) + _, notify, err := s.connection.Send([][]byte{m.Finalize()}, transport.ControlMessages{}, tcpip.FullAddress{}) if err != nil && err != syserr.ErrWouldBlock { return err diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index e5b15a289..09db2109f 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -137,11 +137,13 @@ var Metrics = tcpip.Stats{ }, }, IP: tcpip.IPStats{ - PacketsReceived: mustCreateMetric("/netstack/ip/packets_received", "Total number of IP packets received from the link layer in nic.DeliverNetworkPacket."), - InvalidAddressesReceived: mustCreateMetric("/netstack/ip/invalid_addresses_received", "Total number of IP packets received with an unknown or invalid destination address."), - PacketsDelivered: mustCreateMetric("/netstack/ip/packets_delivered", "Total number of incoming IP packets that are successfully delivered to the transport layer via HandlePacket."), - PacketsSent: mustCreateMetric("/netstack/ip/packets_sent", "Total number of IP packets sent via WritePacket."), - OutgoingPacketErrors: mustCreateMetric("/netstack/ip/outgoing_packet_errors", "Total number of IP packets which failed to write to a link-layer endpoint."), + PacketsReceived: mustCreateMetric("/netstack/ip/packets_received", "Total number of IP packets received from the link layer in nic.DeliverNetworkPacket."), + InvalidAddressesReceived: mustCreateMetric("/netstack/ip/invalid_addresses_received", "Total number of IP packets received with an unknown or invalid destination address."), + PacketsDelivered: mustCreateMetric("/netstack/ip/packets_delivered", "Total number of incoming IP packets that are successfully delivered to the transport layer via HandlePacket."), + PacketsSent: mustCreateMetric("/netstack/ip/packets_sent", "Total number of IP packets sent via WritePacket."), + OutgoingPacketErrors: mustCreateMetric("/netstack/ip/outgoing_packet_errors", "Total number of IP packets which failed to write to a link-layer endpoint."), + MalformedPacketsReceived: mustCreateMetric("/netstack/ip/malformed_packets_received", "Total number of IP packets which failed IP header validation checks."), + MalformedFragmentsReceived: mustCreateMetric("/netstack/ip/malformed_fragments_received", "Total number of IP fragments which failed IP fragment validation checks."), }, TCP: tcpip.TCPStats{ ActiveConnectionOpenings: mustCreateMetric("/netstack/tcp/active_connection_openings", "Number of connections opened successfully via Connect."), diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD index 3fd9e3134..3c2b2b5ea 100644 --- a/pkg/tcpip/BUILD +++ b/pkg/tcpip/BUILD @@ -1,9 +1,8 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) -load("//tools/go_stateify:defs.bzl", "go_library") - go_library( name = "tcpip", srcs = [ diff --git a/pkg/tcpip/buffer/BUILD b/pkg/tcpip/buffer/BUILD index b4e8d6810..d6c31bfa2 100644 --- a/pkg/tcpip/buffer/BUILD +++ b/pkg/tcpip/buffer/BUILD @@ -1,9 +1,8 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) -load("//tools/go_stateify:defs.bzl", "go_library") - go_library( name = "buffer", srcs = [ diff --git a/pkg/tcpip/header/BUILD b/pkg/tcpip/header/BUILD index b558350c3..a255231a3 100644 --- a/pkg/tcpip/header/BUILD +++ b/pkg/tcpip/header/BUILD @@ -1,9 +1,8 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) -load("//tools/go_stateify:defs.bzl", "go_library") - go_library( name = "header", srcs = [ diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go index 554632a64..e5360e7c1 100644 --- a/pkg/tcpip/header/ipv4.go +++ b/pkg/tcpip/header/ipv4.go @@ -284,7 +284,7 @@ func (b IPv4) IsValid(pktSize int) bool { hlen := int(b.HeaderLength()) tlen := int(b.TotalLength()) - if hlen > tlen || tlen > pktSize { + if hlen < IPv4MinimumSize || hlen > tlen || tlen > pktSize { return false } diff --git a/pkg/tcpip/iptables/BUILD b/pkg/tcpip/iptables/BUILD index 3fc14bacd..cc5f531e2 100644 --- a/pkg/tcpip/iptables/BUILD +++ b/pkg/tcpip/iptables/BUILD @@ -1,7 +1,7 @@ -package(licenses = ["notice"]) - load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_library( name = "iptables", srcs = [ diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 7636418b1..f80ac3435 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -178,6 +178,14 @@ type Options struct { RXChecksumOffload bool } +// fanoutID is used for AF_PACKET based endpoints to enable PACKET_FANOUT +// support in the host kernel. This allows us to use multiple FD's to receive +// from the same underlying NIC. The fanoutID needs to be the same for a given +// set of FD's that point to the same NIC. Trying to set the PACKET_FANOUT +// option for an FD with a fanoutID already in use by another FD for a different +// NIC will return an EINVAL. +var fanoutID = 1 + // New creates a new fd-based endpoint. // // Makes fd non-blocking, but does not take ownership of fd, which must remain @@ -245,6 +253,10 @@ func New(opts *Options) (stack.LinkEndpoint, error) { e.inboundDispatchers = append(e.inboundDispatchers, inboundDispatcher) } + // Increment fanoutID to ensure that we don't re-use the same fanoutID for + // the next endpoint. + fanoutID++ + return e, nil } @@ -265,7 +277,6 @@ func createInboundDispatcher(e *endpoint, fd int, isSocket bool) (linkDispatcher case *unix.SockaddrLinklayer: // enable PACKET_FANOUT mode is the underlying socket is // of type AF_PACKET. - const fanoutID = 1 const fanoutType = 0x8000 // PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG fanoutArg := fanoutID | fanoutType<<16 if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_FANOUT, fanoutArg); err != nil { diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD index c5c7aad86..825ff3392 100644 --- a/pkg/tcpip/network/fragmentation/BUILD +++ b/pkg/tcpip/network/fragmentation/BUILD @@ -1,10 +1,9 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") - -package(licenses = ["notice"]) - load("//tools/go_generics:defs.bzl", "go_template_instance") load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_template_instance( name = "reassembler_list", out = "reassembler_list.go", diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 162aa1b4d..fb6358fbb 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -296,6 +296,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { headerView := vv.First() h := header.IPv4(headerView) if !h.IsValid(vv.Size()) { + r.Stats().IP.MalformedPacketsReceived.Increment() return } @@ -306,8 +307,23 @@ func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { more := (h.Flags() & header.IPv4FlagMoreFragments) != 0 if more || h.FragmentOffset() != 0 { + if vv.Size() == 0 { + // Drop the packet as it's marked as a fragment but has + // no payload. + r.Stats().IP.MalformedPacketsReceived.Increment() + r.Stats().IP.MalformedFragmentsReceived.Increment() + return + } // The packet is a fragment, let's try to reassemble it. last := h.FragmentOffset() + uint16(vv.Size()) - 1 + // Drop the packet if the fragmentOffset is incorrect. i.e the + // combination of fragmentOffset and vv.size() causes a wrap + // around resulting in last being less than the offset. + if last < h.FragmentOffset() { + r.Stats().IP.MalformedPacketsReceived.Increment() + r.Stats().IP.MalformedFragmentsReceived.Increment() + return + } var ready bool vv, ready = e.fragmentation.Process(hash.IPv4FragmentHash(h), h.FragmentOffset(), last, more, vv) if !ready { diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go index 8b7500095..3dcb503e2 100644 --- a/pkg/tcpip/network/ipv4/ipv4_test.go +++ b/pkg/tcpip/network/ipv4/ipv4_test.go @@ -366,3 +366,107 @@ func TestFragmentationErrors(t *testing.T) { }) } } + +func TestInvalidFragments(t *testing.T) { + // These packets have both IHL and TotalLength set to 0. + testCases := []struct { + name string + packets [][]byte + wantMalformedIPPackets uint64 + wantMalformedFragments uint64 + }{ + { + "ihl_totallen_zero_valid_frag_offset", + [][]byte{ + {0x40, 0x30, 0x00, 0x00, 0x6c, 0x74, 0x7d, 0x30, 0x30, 0x30, 0x30, 0x30, 0x39, 0x32, 0x39, 0x33, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + }, + 1, + 0, + }, + { + "ihl_totallen_zero_invalid_frag_offset", + [][]byte{ + {0x40, 0x30, 0x00, 0x00, 0x6c, 0x74, 0x20, 0x00, 0x30, 0x30, 0x30, 0x30, 0x39, 0x32, 0x39, 0x33, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + }, + 1, + 0, + }, + { + // Total Length of 37(20 bytes IP header + 17 bytes of + // payload) + // Frag Offset of 0x1ffe = 8190*8 = 65520 + // Leading to the fragment end to be past 65535. + "ihl_totallen_valid_invalid_frag_offset_1", + [][]byte{ + {0x45, 0x30, 0x00, 0x25, 0x6c, 0x74, 0x1f, 0xfe, 0x30, 0x30, 0x30, 0x30, 0x39, 0x32, 0x39, 0x33, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + }, + 1, + 1, + }, + // The following 3 tests were found by running a fuzzer and were + // triggering a panic in the IPv4 reassembler code. + { + "ihl_less_than_ipv4_minimum_size_1", + [][]byte{ + {0x42, 0x30, 0x0, 0x30, 0x30, 0x40, 0x0, 0xf3, 0x30, 0x1, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + {0x42, 0x30, 0x0, 0x8, 0x30, 0x40, 0x20, 0x0, 0x30, 0x1, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + }, + 2, + 0, + }, + { + "ihl_less_than_ipv4_minimum_size_2", + [][]byte{ + {0x42, 0x30, 0x0, 0x30, 0x30, 0x40, 0xb3, 0x12, 0x30, 0x6, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + {0x42, 0x30, 0x0, 0x8, 0x30, 0x40, 0x20, 0x0, 0x30, 0x6, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + }, + 2, + 0, + }, + { + "ihl_less_than_ipv4_minimum_size_3", + [][]byte{ + {0x42, 0x30, 0x0, 0x30, 0x30, 0x40, 0xb3, 0x30, 0x30, 0x6, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + {0x42, 0x30, 0x0, 0x8, 0x30, 0x40, 0x20, 0x0, 0x30, 0x6, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + }, + 2, + 0, + }, + { + "fragment_with_short_total_len_extra_payload", + [][]byte{ + {0x46, 0x30, 0x00, 0x30, 0x30, 0x40, 0x0e, 0x12, 0x30, 0x06, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + {0x46, 0x30, 0x00, 0x18, 0x30, 0x40, 0x20, 0x00, 0x30, 0x06, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + }, + 1, + 1, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + const nicid tcpip.NICID = 42 + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{ + ipv4.NewProtocol(), + }, + }) + + var linkAddr = tcpip.LinkAddress([]byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30}) + var remoteLinkAddr = tcpip.LinkAddress([]byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x31}) + ep := channel.New(10, 1500, linkAddr) + s.CreateNIC(nicid, sniffer.New(ep)) + + for _, pkt := range tc.packets { + ep.InjectLinkAddr(header.IPv4ProtocolNumber, remoteLinkAddr, buffer.NewVectorisedView(len(pkt), []buffer.View{pkt})) + } + + if got, want := s.Stats().IP.MalformedPacketsReceived.Value(), tc.wantMalformedIPPackets; got != want { + t.Errorf("incorrect Stats.IP.MalformedPacketsReceived, got: %d, want: %d", got, want) + } + if got, want := s.Stats().IP.MalformedFragmentsReceived.Value(), tc.wantMalformedFragments; got != want { + t.Errorf("incorrect Stats.IP.MalformedFragmentsReceived, got: %d, want: %d", got, want) + } + }) + } +} diff --git a/pkg/tcpip/seqnum/BUILD b/pkg/tcpip/seqnum/BUILD index 76b5f4ffa..29b7d761c 100644 --- a/pkg/tcpip/seqnum/BUILD +++ b/pkg/tcpip/seqnum/BUILD @@ -1,7 +1,7 @@ -package(licenses = ["notice"]) - load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_library( name = "seqnum", srcs = ["seqnum.go"], diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index baf88bfab..6a78432c9 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -1,10 +1,9 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") - -package(licenses = ["notice"]) - load("//tools/go_generics:defs.bzl", "go_template_instance") load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_template_instance( name = "linkaddrentry_list", out = "linkaddrentry_list.go", diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 60ba98a4c..444ac1a5b 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -853,6 +853,14 @@ type IPStats struct { // OutgoingPacketErrors is the total number of IP packets which failed // to write to a link-layer endpoint. OutgoingPacketErrors *StatCounter + + // MalformedPacketsReceived is the total number of IP Packets that were + // dropped due to the IP packet header failing validation checks. + MalformedPacketsReceived *StatCounter + + // MalformedFragmentsReceived is the total number of IP Fragments that were + // dropped due to the fragment failing validation checks. + MalformedFragmentsReceived *StatCounter } // TCPStats collects TCP-specific stats. diff --git a/pkg/tcpip/transport/icmp/BUILD b/pkg/tcpip/transport/icmp/BUILD index d78a162b8..9254c3dea 100644 --- a/pkg/tcpip/transport/icmp/BUILD +++ b/pkg/tcpip/transport/icmp/BUILD @@ -1,8 +1,8 @@ -package(licenses = ["notice"]) - load("//tools/go_generics:defs.bzl", "go_template_instance") load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_template_instance( name = "icmp_packet_list", out = "icmp_packet_list.go", diff --git a/pkg/tcpip/transport/raw/BUILD b/pkg/tcpip/transport/raw/BUILD index 7241f6c19..fba598d51 100644 --- a/pkg/tcpip/transport/raw/BUILD +++ b/pkg/tcpip/transport/raw/BUILD @@ -1,8 +1,8 @@ -package(licenses = ["notice"]) - load("//tools/go_generics:defs.bzl", "go_template_instance") load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_template_instance( name = "packet_list", out = "packet_list.go", diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index a42e1f4a2..aed70e06f 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -1,10 +1,9 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") - -package(licenses = ["notice"]) - load("//tools/go_generics:defs.bzl", "go_template_instance") load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_template_instance( name = "tcp_segment_list", out = "tcp_segment_list.go", diff --git a/pkg/tcpip/transport/udp/BUILD b/pkg/tcpip/transport/udp/BUILD index 7a635ab8d..c9460aa0d 100644 --- a/pkg/tcpip/transport/udp/BUILD +++ b/pkg/tcpip/transport/udp/BUILD @@ -1,10 +1,9 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") - -package(licenses = ["notice"]) - load("//tools/go_generics:defs.bzl", "go_template_instance") load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_template_instance( name = "udp_packet_list", out = "udp_packet_list.go", diff --git a/pkg/waiter/BUILD b/pkg/waiter/BUILD index 8dc88becb..1f7efb064 100644 --- a/pkg/waiter/BUILD +++ b/pkg/waiter/BUILD @@ -1,10 +1,9 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") - -package(licenses = ["notice"]) - load("//tools/go_generics:defs.bzl", "go_template_instance") load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_template_instance( name = "waiter_list", out = "waiter_list.go", diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 591abe458..3d9ced1b6 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -92,7 +92,7 @@ func ValidateSpec(spec *specs.Spec) error { log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile) } - // TODO(b/72226747): Apply seccomp to application inside sandbox. + // TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox. if spec.Linux != nil && spec.Linux.Seccomp != nil { log.Warningf("Seccomp spec is being ignored") } diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index 32fe0d6d1..dd4a11655 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -539,6 +539,159 @@ TEST(NetlinkRouteTest, GetRouteDump) { EXPECT_TRUE(dstFound); } +// RecvmsgTrunc tests the recvmsg MSG_TRUNC flag with zero length output +// buffer. MSG_TRUNC with a zero length buffer should consume subsequent +// messages off the socket. +TEST(NetlinkRouteTest, RecvmsgTrunc) { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + iov.iov_base = NULL; + iov.iov_len = 0; + + int trunclen, trunclen2; + + // Note: This test assumes at least two messages are returned by the + // RTM_GETADDR request. That means at least one RTM_NEWLINK message and one + // NLMSG_DONE message. We cannot read all the messages without blocking + // because we would need to read the message into a buffer and check the + // nlmsg_type for NLMSG_DONE. However, the test depends on reading into a + // zero-length buffer. + + // First, call recvmsg with MSG_TRUNC. This will read the full message from + // the socket and return it's full length. Subsequent calls to recvmsg will + // read the next messages from the socket. + ASSERT_THAT(trunclen = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC), + SyscallSucceeds()); + + // Message should always be truncated. However, While the destination iov is + // zero length, MSG_TRUNC returns the size of the next message so it should + // not be zero. + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + ASSERT_NE(trunclen, 0); + // Returned length is at least the header and ifaddrmsg. + EXPECT_GE(trunclen, sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)); + + // Reset the msg_flags to make sure that the recvmsg call is setting them + // properly. + msg.msg_flags = 0; + + // Make a second recvvmsg call to get the next message. + ASSERT_THAT(trunclen2 = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC), + SyscallSucceeds()); + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + ASSERT_NE(trunclen2, 0); + + // Assert that the received messages are not the same. + // + // We are calling recvmsg with a zero length buffer so we have no way to + // inspect the messages to make sure they are not equal in value. The best + // we can do is to compare their lengths. + ASSERT_NE(trunclen, trunclen2); +} + +// RecvmsgTruncPeek tests recvmsg with the combination of the MSG_TRUNC and +// MSG_PEEK flags and a zero length output buffer. This is normally used to +// read the full length of the next message on the socket without consuming +// it, so a properly sized buffer can be allocated to store the message. This +// test tests that scenario. +TEST(NetlinkRouteTest, RecvmsgTruncPeek) { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + int type = -1; + do { + int peeklen; + int len; + + iov.iov_base = NULL; + iov.iov_len = 0; + + // Call recvmsg with MSG_PEEK and MSG_TRUNC. This will peek at the message + // and return it's full length. + // See: MSG_TRUNC http://man7.org/linux/man-pages/man2/recv.2.html + ASSERT_THAT( + peeklen = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_PEEK | MSG_TRUNC), + SyscallSucceeds()); + + // Message should always be truncated. + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + ASSERT_NE(peeklen, 0); + + // Reset the message flags for the next call. + msg.msg_flags = 0; + + // Make the actual call to recvmsg to get the actual data. We will use + // the length returned from the peek call for the allocated buffer size.. + std::vector<char> buf(peeklen); + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + ASSERT_THAT(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0), + SyscallSucceeds()); + + // Message should not be truncated since we allocated the correct buffer + // size. + EXPECT_NE(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + + // MSG_PEEK should have left data on the socket and the subsequent call + // with should have retrieved the same data. Both calls should have + // returned the message's full length so they should be equal. + ASSERT_NE(len, 0); + ASSERT_EQ(peeklen, len); + + for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data()); + NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) { + type = hdr->nlmsg_type; + } + } while (type != NLMSG_DONE && type != NLMSG_ERROR); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc index 36b6560c2..fcb8f8a88 100644 --- a/test/syscalls/linux/socket_netlink_util.cc +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -91,6 +91,13 @@ PosixError NetlinkRequestResponse( NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) { fn(hdr); type = hdr->nlmsg_type; + // Done should include an integer payload for dump_done_errno. + // See net/netlink/af_netlink.c:netlink_dump + // Some tools like the 'ip' tool check the minimum length of the + // NLMSG_DONE message. + if (type == NLMSG_DONE) { + EXPECT_GE(hdr->nlmsg_len, NLMSG_LENGTH(sizeof(int))); + } } } while (type != NLMSG_DONE && type != NLMSG_ERROR); |