summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRyan Heacock <rheacock@google.com>2019-12-24 08:48:14 -0800
committergVisor bot <gvisor-bot@google.com>2019-12-24 08:49:39 -0800
commite013c48c78c9a7daf245b7de9563e3a0bd8a1e97 (patch)
tree4569d1851e72e61faebc63c82997555afdde04a9
parent574e988f2bc6060078a17f37a377441703c52a22 (diff)
Enable IP_RECVTOS socket option for datagram sockets
Added the ability to get/set the IP_RECVTOS socket option on UDP endpoints. If enabled, TOS from the incoming Network Header passed as ancillary data in the ControlMessages. Test: * Added unit test to udp_test.go that tests getting/setting as well as verifying that we receive expected TOS from incoming packet. * Added a syscall test PiperOrigin-RevId: 287029703
-rw-r--r--pkg/sentry/socket/control/control.go2
-rw-r--r--pkg/sentry/socket/netstack/netstack.go42
-rw-r--r--pkg/tcpip/checker/checker.go16
-rw-r--r--pkg/tcpip/stack/nic.go2
-rw-r--r--pkg/tcpip/stack/stack.go2
-rw-r--r--pkg/tcpip/tcpip.go6
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go2
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go31
-rw-r--r--pkg/tcpip/transport/udp/udp_test.go69
-rw-r--r--test/syscalls/linux/socket_ip_udp_generic.cc40
-rw-r--r--test/syscalls/linux/udp_socket_test_cases.cc8
11 files changed, 201 insertions, 19 deletions
diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go
index af1a4e95f..b649dd021 100644
--- a/pkg/sentry/socket/control/control.go
+++ b/pkg/sentry/socket/control/control.go
@@ -327,7 +327,7 @@ func PackInq(t *kernel.Task, inq int32, buf []byte) []byte {
}
// PackTOS packs an IP_TOS socket control message.
-func PackTOS(t *kernel.Task, tos int8, buf []byte) []byte {
+func PackTOS(t *kernel.Task, tos uint8, buf []byte) []byte {
return putCmsgStruct(
buf,
linux.SOL_IP,
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 140851c17..d2f263402 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -1323,6 +1323,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
}
return int32(v), nil
+ case linux.IP_RECVTOS:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ var v tcpip.ReceiveTOSOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ if v {
+ return int32(1), nil
+ }
+ return int32(0), nil
+
default:
emitUnimplementedEventIP(t, name)
}
@@ -1808,6 +1823,16 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
}
return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.IPv4TOSOption(v)))
+ case linux.IP_RECVTOS:
+ v, err := parseIntOrChar(optVal)
+ if err != nil {
+ return err
+ }
+
+ return syserr.TranslateNetstackError(ep.SetSockOpt(
+ tcpip.ReceiveTOSOption(v != 0),
+ ))
+
case linux.IP_ADD_SOURCE_MEMBERSHIP,
linux.IP_BIND_ADDRESS_NO_PORT,
linux.IP_BLOCK_SOURCE,
@@ -1828,7 +1853,6 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
linux.IP_RECVFRAGSIZE,
linux.IP_RECVOPTS,
linux.IP_RECVORIGDSTADDR,
- linux.IP_RECVTOS,
linux.IP_RECVTTL,
linux.IP_RETOPTS,
linux.IP_TRANSPARENT,
@@ -2139,6 +2163,21 @@ func (s *SocketOperations) fillCmsgInq(cmsg *socket.ControlMessages) {
cmsg.IP.Inq = int32(len(s.readView) + rcvBufUsed)
}
+func (s *SocketOperations) fillCmsgTOS(cmsg *socket.ControlMessages) {
+ if s.skType != linux.SOCK_DGRAM {
+ return
+ }
+ var receiveTOS tcpip.ReceiveTOSOption
+ if err := s.Endpoint.GetSockOpt(&receiveTOS); err != nil {
+ return
+ }
+ if !receiveTOS {
+ return
+ }
+ cmsg.IP.HasTOS = s.readCM.HasTOS
+ cmsg.IP.TOS = s.readCM.TOS
+}
+
// nonBlockingRead issues a non-blocking read.
//
// TODO(b/78348848): Support timestamps for stream sockets.
@@ -2244,6 +2283,7 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe
cmsg := s.controlMessages()
s.fillCmsgInq(&cmsg)
+ s.fillCmsgTOS(&cmsg)
return n, flags, addr, addrLen, cmsg, syserr.FromError(err)
}
diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go
index 2f15bf1f1..542abc99d 100644
--- a/pkg/tcpip/checker/checker.go
+++ b/pkg/tcpip/checker/checker.go
@@ -33,6 +33,9 @@ type NetworkChecker func(*testing.T, []header.Network)
// TransportChecker is a function to check a property of a transport packet.
type TransportChecker func(*testing.T, header.Transport)
+// ControlMessagesChecker is a function to check a property of ancillary data.
+type ControlMessagesChecker func(*testing.T, tcpip.ControlMessages)
+
// IPv4 checks the validity and properties of the given IPv4 packet. It is
// expected to be used in conjunction with other network checkers for specific
// properties. For example, to check the source and destination address, one
@@ -158,6 +161,19 @@ func FragmentFlags(flags uint8) NetworkChecker {
}
}
+// ReceiveTOS creates a checker that checks the TOS field in ControlMessages.
+func ReceiveTOS(want uint8) ControlMessagesChecker {
+ return func(t *testing.T, cm tcpip.ControlMessages) {
+ t.Helper()
+ if !cm.HasTOS {
+ t.Fatalf("got cm.HasTOS = %t, want cm.TOS = %d", cm.HasTOS, want)
+ }
+ if got := cm.TOS; got != want {
+ t.Fatalf("got cm.TOS = %d, want %d", got, want)
+ }
+ }
+}
+
// TOS creates a checker that checks the TOS field.
func TOS(tos uint8, label uint32) NetworkChecker {
return func(t *testing.T, h []header.Network) {
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index ddd014658..a4556674b 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -575,7 +575,7 @@ func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) {
n.mu.Unlock()
}
-// Subnets returns the Subnets associated with this NIC.
+// AddressRanges returns the Subnets associated with this NIC.
func (n *NIC) AddressRanges() []tcpip.Subnet {
n.mu.RLock()
defer n.mu.RUnlock()
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 7a9600679..251336224 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -829,7 +829,7 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool {
return false
}
-// NICSubnets returns a map of NICIDs to their associated subnets.
+// NICAddressRanges returns a map of NICIDs to their associated subnets.
func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet {
s.mu.RLock()
defer s.mu.RUnlock()
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index f62fd729f..5c7b2af88 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -322,7 +322,7 @@ type ControlMessages struct {
HasTOS bool
// TOS is the IPv4 type of service of the associated packet.
- TOS int8
+ TOS uint8
// HasTClass indicates whether Tclass is valid/set.
HasTClass bool
@@ -666,6 +666,10 @@ type IPv4TOSOption uint8
// for all subsequent outgoing IPv6 packets from the endpoint.
type IPv6TrafficClassOption uint8
+// ReceiveTOSOption is used by SetSockOpt/GetSockOpt to specify if the TOS
+// ancillary message is passed with incoming packets.
+type ReceiveTOSOption bool
+
// Route is a row in the routing table. It specifies through which NIC (and
// gateway) sets of packets should be routed. A row is considered viable if the
// masked target address matches the destination address in the row.
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index 5aafe2615..6d23ab5a1 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -510,7 +510,7 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
}
// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
-func (ep *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error {
+func (e *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error {
return tcpip.ErrUnknownProtocolOption
}
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 1ac4705af..269470ed4 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -32,6 +32,7 @@ type udpPacket struct {
senderAddress tcpip.FullAddress
data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
timestamp int64
+ tos uint8
}
// EndpointState represents the state of a UDP endpoint.
@@ -114,6 +115,10 @@ type endpoint struct {
// applied while sending packets. Defaults to 0 as on Linux.
sendTOS uint8
+ // receiveTOS determines if the incoming IPv4 TOS header field is passed
+ // as ancillary data to ControlMessages on Read.
+ receiveTOS bool
+
// shutdownFlags represent the current shutdown state of the endpoint.
shutdownFlags tcpip.ShutdownFlags
@@ -244,7 +249,12 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess
*addr = p.senderAddress
}
- return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil
+ return p.data.ToView(), tcpip.ControlMessages{
+ HasTimestamp: true,
+ Timestamp: p.timestamp,
+ HasTOS: e.receiveTOS,
+ TOS: p.tos,
+ }, nil
}
// prepareForWrite prepares the endpoint for sending data. In particular, it
@@ -656,6 +666,12 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
e.sendTOS = uint8(v)
e.mu.Unlock()
return nil
+
+ case tcpip.ReceiveTOSOption:
+ e.mu.Lock()
+ e.receiveTOS = bool(v)
+ e.mu.Unlock()
+ return nil
}
return nil
}
@@ -792,6 +808,12 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
e.mu.RUnlock()
return nil
+ case *tcpip.ReceiveTOSOption:
+ e.mu.RLock()
+ *o = tcpip.ReceiveTOSOption(e.receiveTOS)
+ e.mu.RUnlock()
+ return nil
+
default:
return tcpip.ErrUnknownProtocolOption
}
@@ -1238,6 +1260,13 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
e.rcvList.PushBack(packet)
e.rcvBufSize += pkt.Data.Size()
+ // Save any useful information from the NetworkHeader to the packet.
+ switch r.NetProto {
+ case header.IPv4ProtocolNumber:
+ // This packet has already been validated before being passed up the stack.
+ packet.tos, _ = header.IPv4(pkt.NetworkHeader).TOS()
+ }
+
packet.timestamp = e.stack.NowNanoseconds()
e.rcvMu.Unlock()
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index 7051a7a9c..43b8b35ba 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -56,6 +56,7 @@ const (
multicastAddr = "\xe8\x2b\xd3\xea"
multicastV6Addr = "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
broadcastAddr = header.IPv4Broadcast
+ testTOS = 0x80
// defaultMTU is the MTU, in bytes, used throughout the tests, except
// where another value is explicitly used. It is chosen to match the MTU
@@ -453,6 +454,7 @@ func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool
ip := header.IPv4(buf)
ip.Encode(&header.IPv4Fields{
IHL: header.IPv4MinimumSize,
+ TOS: testTOS,
TotalLength: uint16(len(buf)),
TTL: 65,
Protocol: uint8(udp.ProtocolNumber),
@@ -556,8 +558,8 @@ func TestBindToDeviceOption(t *testing.T) {
// testReadInternal sends a packet of the given test flow into the stack by
// injecting it into the link endpoint. It then attempts to read it from the
// UDP endpoint and depending on if this was expected to succeed verifies its
-// correctness.
-func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool) {
+// correctness including any additional checker functions provided.
+func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool, checkers ...checker.ControlMessagesChecker) {
c.t.Helper()
payload := newPayload()
@@ -572,12 +574,12 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe
epstats := c.ep.Stats().(*tcpip.TransportEndpointStats).Clone()
var addr tcpip.FullAddress
- v, _, err := c.ep.Read(&addr)
+ v, cm, err := c.ep.Read(&addr)
if err == tcpip.ErrWouldBlock {
// Wait for data to become available.
select {
case <-ch:
- v, _, err = c.ep.Read(&addr)
+ v, cm, err = c.ep.Read(&addr)
case <-time.After(300 * time.Millisecond):
if packetShouldBeDropped {
@@ -610,15 +612,21 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe
if !bytes.Equal(payload, v) {
c.t.Fatalf("bad payload: got %x, want %x", v, payload)
}
+
+ // Run any checkers against the ControlMessages.
+ for _, f := range checkers {
+ f(c.t, cm)
+ }
+
c.checkEndpointReadStats(1, epstats, err)
}
// testRead sends a packet of the given test flow into the stack by injecting it
// into the link endpoint. It then reads it from the UDP endpoint and verifies
-// its correctness.
-func testRead(c *testContext, flow testFlow) {
+// its correctness including any additional checker functions provided.
+func testRead(c *testContext, flow testFlow, checkers ...checker.ControlMessagesChecker) {
c.t.Helper()
- testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */)
+ testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */, checkers...)
}
// testFailingRead sends a packet of the given test flow into the stack by
@@ -1286,7 +1294,7 @@ func TestTOSV4(t *testing.T) {
c.createEndpointForFlow(flow)
- const tos = 0xC0
+ const tos = testTOS
var v tcpip.IPv4TOSOption
if err := c.ep.GetSockOpt(&v); err != nil {
c.t.Errorf("GetSockopt failed: %s", err)
@@ -1321,7 +1329,7 @@ func TestTOSV6(t *testing.T) {
c.createEndpointForFlow(flow)
- const tos = 0xC0
+ const tos = testTOS
var v tcpip.IPv6TrafficClassOption
if err := c.ep.GetSockOpt(&v); err != nil {
c.t.Errorf("GetSockopt failed: %s", err)
@@ -1348,6 +1356,49 @@ func TestTOSV6(t *testing.T) {
}
}
+func TestReceiveTOSV4(t *testing.T) {
+ for _, flow := range []testFlow{unicastV4, broadcast} {
+ t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) {
+ c := newDualTestContext(t, defaultMTU)
+ defer c.cleanup()
+
+ c.createEndpointForFlow(flow)
+
+ // Verify that setting and reading the option works.
+ const recvTos = true
+ var v tcpip.ReceiveTOSOption
+ if err := c.ep.GetSockOpt(&v); err != nil {
+ c.t.Errorf("GetSockopt failed: %s", err)
+ }
+ // Test for expected default value.
+ if v != false {
+ c.t.Errorf("got GetSockOpt(...) = %t, want = %t", v, false)
+ }
+
+ if err := c.ep.SetSockOpt(tcpip.ReceiveTOSOption(recvTos)); err != nil {
+ c.t.Errorf("SetSockOpt(%#v) failed: %s", tcpip.ReceiveTOSOption(recvTos), err)
+ }
+
+ if err := c.ep.GetSockOpt(&v); err != nil {
+ c.t.Errorf("GetSockopt failed: %s", err)
+ }
+
+ if want := tcpip.ReceiveTOSOption(recvTos); v != want {
+ c.t.Errorf("got GetSockOpt(...) = %t, want = %t", v, want)
+ }
+
+ // Bind to wildcard.
+ if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
+ c.t.Fatalf("Bind failed: %s", err)
+ }
+
+ // Verify that the correct received TOS is actually handed through as
+ // ancillary data to the ControlMessages struct.
+ testRead(c, flow, checker.ReceiveTOS(testTOS))
+ })
+ }
+}
+
func TestMulticastInterfaceOption(t *testing.T) {
for _, flow := range []testFlow{multicastV4, multicastV4in6, multicastV6, multicastV6Only} {
t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) {
diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc
index 66eb68857..53290bed7 100644
--- a/test/syscalls/linux/socket_ip_udp_generic.cc
+++ b/test/syscalls/linux/socket_ip_udp_generic.cc
@@ -209,6 +209,46 @@ TEST_P(UDPSocketPairTest, SetMulticastLoopChar) {
EXPECT_EQ(get, kSockOptOn);
}
+// Ensure that Receiving TOS is off by default.
+TEST_P(UDPSocketPairTest, RecvTosDefault) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(
+ getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOff);
+}
+
+// Test that setting and getting IP_RECVTOS works as expected.
+TEST_P(UDPSocketPairTest, SetRecvTos) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS,
+ &kSockOptOff, sizeof(kSockOptOff)),
+ SyscallSucceeds());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(
+ getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOff);
+
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS,
+ &kSockOptOn, sizeof(kSockOptOn)),
+ SyscallSucceeds());
+
+ ASSERT_THAT(
+ getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOn);
+}
+
TEST_P(UDPSocketPairTest, ReuseAddrDefault) {
auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc
index dc35c2f50..68e0a8109 100644
--- a/test/syscalls/linux/udp_socket_test_cases.cc
+++ b/test/syscalls/linux/udp_socket_test_cases.cc
@@ -1349,8 +1349,9 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) {
// outgoing packets, and that a receiving socket with IP_RECVTOS or
// IPV6_RECVTCLASS will create the corresponding control message.
TEST_P(UdpSocketTest, SetAndReceiveTOS) {
- // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack.
- SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet());
+ // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack.
+ SKIP_IF((GetParam() != AddressFamily::kIpv4) && IsRunningOnGvisor() &&
+ !IsRunningWithHostinet());
ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds());
@@ -1421,7 +1422,8 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) {
// TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or
// IPV6_RECVTCLASS will create the corresponding control message.
TEST_P(UdpSocketTest, SendAndReceiveTOS) {
- // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack.
+ // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack.
+ // TODO(b/146661005): Setting TOS via cmsg not supported for netstack.
SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet());
ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds());