From debd213da61cf35d7c91346820e93fc87bfa5896 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 13 Jan 2020 14:45:31 -0800 Subject: Allow dual stack sockets to operate on AF_INET Fixes #1490 Fixes #1495 PiperOrigin-RevId: 289523250 --- pkg/sentry/socket/netstack/netstack.go | 65 ++++++++++++++++++++++++++-------- pkg/sentry/socket/unix/unix.go | 5 ++- 2 files changed, 55 insertions(+), 15 deletions(-) (limited to 'pkg/sentry/socket') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 099319327..c020c11cb 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -324,22 +324,15 @@ func bytesToIPAddress(addr []byte) tcpip.Address { // converts it to the FullAddress format. It supports AF_UNIX, AF_INET, // AF_INET6, and AF_PACKET addresses. // -// strict indicates whether addresses with the AF_UNSPEC family are accepted of not. -// // AddressAndFamily returns an address and its family. -func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, uint16, *syserr.Error) { +func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) { // Make sure we have at least 2 bytes for the address family. if len(addr) < 2 { return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument } - family := usermem.ByteOrder.Uint16(addr) - if family != uint16(sfamily) && (strict || family != linux.AF_UNSPEC) { - return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported - } - // Get the rest of the fields based on the address family. - switch family { + switch family := usermem.ByteOrder.Uint16(addr); family { case linux.AF_UNIX: path := addr[2:] if len(path) > linux.UnixPathMax { @@ -638,10 +631,40 @@ func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask { return r } +func (s *SocketOperations) checkFamily(family uint16, exact bool) *syserr.Error { + if family == uint16(s.family) { + return nil + } + if !exact && family == linux.AF_INET && s.family == linux.AF_INET6 { + v, err := s.Endpoint.GetSockOptBool(tcpip.V6OnlyOption) + if err != nil { + return syserr.TranslateNetstackError(err) + } + if !v { + return nil + } + } + return syserr.ErrInvalidArgument +} + +// mapFamily maps the AF_INET ANY address to the IPv4-mapped IPv6 ANY if the +// receiver's family is AF_INET6. +// +// This is a hack to work around the fact that both IPv4 and IPv6 ANY are +// represented by the empty string. +// +// TODO(gvisor.dev/issues/1556): remove this function. +func (s *SocketOperations) mapFamily(addr tcpip.FullAddress, family uint16) tcpip.FullAddress { + if len(addr.Addr) == 0 && s.family == linux.AF_INET6 && family == linux.AF_INET { + addr.Addr = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00" + } + return addr +} + // Connect implements the linux syscall connect(2) for sockets backed by // tpcip.Endpoint. func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { - addr, family, err := AddressAndFamily(s.family, sockaddr, false /* strict */) + addr, family, err := AddressAndFamily(sockaddr) if err != nil { return err } @@ -653,6 +676,12 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo } return syserr.TranslateNetstackError(err) } + + if err := s.checkFamily(family, false /* exact */); err != nil { + return err + } + addr = s.mapFamily(addr, family) + // Always return right away in the non-blocking case. if !blocking { return syserr.TranslateNetstackError(s.Endpoint.Connect(addr)) @@ -681,10 +710,14 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo // Bind implements the linux syscall bind(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { - addr, _, err := AddressAndFamily(s.family, sockaddr, true /* strict */) + addr, family, err := AddressAndFamily(sockaddr) if err != nil { return err } + if err := s.checkFamily(family, true /* exact */); err != nil { + return err + } + addr = s.mapFamily(addr, family) // Issue the bind request to the endpoint. return syserr.TranslateNetstackError(s.Endpoint.Bind(addr)) @@ -2080,8 +2113,8 @@ func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32) case linux.AF_INET6: var out linux.SockAddrInet6 - if len(addr.Addr) == 4 { - // Copy address is v4-mapped format. + if len(addr.Addr) == header.IPv4AddressSize { + // Copy address in v4-mapped format. copy(out.Addr[12:], addr.Addr) out.Addr[10] = 0xff out.Addr[11] = 0xff @@ -2395,10 +2428,14 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] var addr *tcpip.FullAddress if len(to) > 0 { - addrBuf, _, err := AddressAndFamily(s.family, to, true /* strict */) + addrBuf, family, err := AddressAndFamily(to) if err != nil { return 0, err } + if err := s.checkFamily(family, false /* exact */); err != nil { + return 0, err + } + addrBuf = s.mapFamily(addrBuf, family) addr = &addrBuf } diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 91effe89a..7f49ba864 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -116,13 +116,16 @@ func (s *SocketOperations) Endpoint() transport.Endpoint { // extractPath extracts and validates the address. func extractPath(sockaddr []byte) (string, *syserr.Error) { - addr, _, err := netstack.AddressAndFamily(linux.AF_UNIX, sockaddr, true /* strict */) + addr, family, err := netstack.AddressAndFamily(sockaddr) if err != nil { if err == syserr.ErrAddressFamilyNotSupported { err = syserr.ErrInvalidArgument } return "", err } + if family != linux.AF_UNIX { + return "", syserr.ErrInvalidArgument + } // The address is trimmed by GetAddress. p := string(addr.Addr) -- cgit v1.2.3 From 50625cee59aaff834c7968771ab385ad0e7b0e1f Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Tue, 14 Jan 2020 13:31:52 -0800 Subject: Implement {g,s}etsockopt(IP_RECVTOS) for UDP sockets PiperOrigin-RevId: 289718534 --- pkg/sentry/socket/control/control.go | 2 +- pkg/sentry/socket/netstack/netstack.go | 36 +++++++++++++-- pkg/tcpip/checker/checker.go | 16 +++++++ pkg/tcpip/stack/nic.go | 2 +- pkg/tcpip/stack/stack.go | 2 +- pkg/tcpip/tcpip.go | 8 +++- pkg/tcpip/transport/udp/endpoint.go | 40 +++++++++++++++-- pkg/tcpip/transport/udp/udp_test.go | 67 ++++++++++++++++++++++++---- test/syscalls/linux/socket_ip_udp_generic.cc | 40 +++++++++++++++++ test/syscalls/linux/udp_socket_test_cases.cc | 8 ++-- 10 files changed, 197 insertions(+), 24 deletions(-) (limited to 'pkg/sentry/socket') diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 4301b697c..1684dfc24 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -327,7 +327,7 @@ func PackInq(t *kernel.Task, inq int32, buf []byte) []byte { } // PackTOS packs an IP_TOS socket control message. -func PackTOS(t *kernel.Task, tos int8, buf []byte) []byte { +func PackTOS(t *kernel.Task, tos uint8, buf []byte) []byte { return putCmsgStruct( buf, linux.SOL_IP, diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index c020c11cb..d2f7e987d 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1268,11 +1268,11 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf if err != nil { return nil, syserr.TranslateNetstackError(err) } - var o uint32 + var o int32 if v { o = 1 } - return int32(o), nil + return o, nil case linux.IPV6_PATHMTU: t.Kernel().EmitUnimplementedEvent(t) @@ -1377,6 +1377,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in } return int32(v), nil + case linux.IP_RECVTOS: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptBool(tcpip.ReceiveTOSOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + var o int32 + if v { + o = 1 + } + return o, nil + default: emitUnimplementedEventIP(t, name) } @@ -1895,6 +1910,13 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.IPv4TOSOption(v))) + case linux.IP_RECVTOS: + v, err := parseIntOrChar(optVal) + if err != nil { + return err + } + return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTOSOption, v != 0)) + case linux.IP_ADD_SOURCE_MEMBERSHIP, linux.IP_BIND_ADDRESS_NO_PORT, linux.IP_BLOCK_SOURCE, @@ -1915,7 +1937,6 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s linux.IP_RECVFRAGSIZE, linux.IP_RECVOPTS, linux.IP_RECVORIGDSTADDR, - linux.IP_RECVTOS, linux.IP_RECVTTL, linux.IP_RETOPTS, linux.IP_TRANSPARENT, @@ -2335,7 +2356,14 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe } func (s *SocketOperations) controlMessages() socket.ControlMessages { - return socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp, Timestamp: s.readCM.Timestamp}} + return socket.ControlMessages{ + IP: tcpip.ControlMessages{ + HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp, + Timestamp: s.readCM.Timestamp, + HasTOS: s.readCM.HasTOS, + TOS: s.readCM.TOS, + }, + } } // updateTimestamp sets the timestamp for SIOCGSTAMP. It should be called after diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index 2f15bf1f1..542abc99d 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -33,6 +33,9 @@ type NetworkChecker func(*testing.T, []header.Network) // TransportChecker is a function to check a property of a transport packet. type TransportChecker func(*testing.T, header.Transport) +// ControlMessagesChecker is a function to check a property of ancillary data. +type ControlMessagesChecker func(*testing.T, tcpip.ControlMessages) + // IPv4 checks the validity and properties of the given IPv4 packet. It is // expected to be used in conjunction with other network checkers for specific // properties. For example, to check the source and destination address, one @@ -158,6 +161,19 @@ func FragmentFlags(flags uint8) NetworkChecker { } } +// ReceiveTOS creates a checker that checks the TOS field in ControlMessages. +func ReceiveTOS(want uint8) ControlMessagesChecker { + return func(t *testing.T, cm tcpip.ControlMessages) { + t.Helper() + if !cm.HasTOS { + t.Fatalf("got cm.HasTOS = %t, want cm.TOS = %d", cm.HasTOS, want) + } + if got := cm.TOS; got != want { + t.Fatalf("got cm.TOS = %d, want %d", got, want) + } + } +} + // TOS creates a checker that checks the TOS field. func TOS(tos uint8, label uint32) NetworkChecker { return func(t *testing.T, h []header.Network) { diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index abf73fe33..071221d5a 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -763,7 +763,7 @@ func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) { n.mu.Unlock() } -// Subnets returns the Subnets associated with this NIC. +// AddressRanges returns the Subnets associated with this NIC. func (n *NIC) AddressRanges() []tcpip.Subnet { n.mu.RLock() defer n.mu.RUnlock() diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index f8d89248e..386eb6eec 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -912,7 +912,7 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool { return false } -// NICSubnets returns a map of NICIDs to their associated subnets. +// NICAddressRanges returns a map of NICIDs to their associated subnets. func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet { s.mu.RLock() defer s.mu.RUnlock() diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 4a090ac86..b7813cbc0 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -322,7 +322,7 @@ type ControlMessages struct { HasTOS bool // TOS is the IPv4 type of service of the associated packet. - TOS int8 + TOS uint8 // HasTClass indicates whether Tclass is valid/set. HasTClass bool @@ -500,9 +500,13 @@ type WriteOptions struct { type SockOptBool int const ( + // ReceiveTOSOption is used by SetSockOpt/GetSockOpt to specify if the TOS + // ancillary message is passed with incoming packets. + ReceiveTOSOption SockOptBool = iota + // V6OnlyOption is used by {G,S}etSockOptBool to specify whether an IPv6 // socket is to be restricted to sending and receiving IPv6 packets only. - V6OnlyOption SockOptBool = iota + V6OnlyOption ) // SockOptInt represents socket options which values have the int type. diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 13446f5d9..c9cbed8f4 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -31,6 +31,7 @@ type udpPacket struct { senderAddress tcpip.FullAddress data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 + tos uint8 } // EndpointState represents the state of a UDP endpoint. @@ -113,6 +114,10 @@ type endpoint struct { // applied while sending packets. Defaults to 0 as on Linux. sendTOS uint8 + // receiveTOS determines if the incoming IPv4 TOS header field is passed + // as ancillary data to ControlMessages on Read. + receiveTOS bool + // shutdownFlags represent the current shutdown state of the endpoint. shutdownFlags tcpip.ShutdownFlags @@ -243,7 +248,18 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess *addr = p.senderAddress } - return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil + cm := tcpip.ControlMessages{ + HasTimestamp: true, + Timestamp: p.timestamp, + } + e.mu.RLock() + receiveTOS := e.receiveTOS + e.mu.RUnlock() + if receiveTOS { + cm.HasTOS = true + cm.TOS = p.tos + } + return p.data.ToView(), cm, nil } // prepareForWrite prepares the endpoint for sending data. In particular, it @@ -458,6 +474,12 @@ func (e *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) { // SetSockOptBool implements tcpip.Endpoint.SetSockOptBool. func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { switch opt { + case tcpip.ReceiveTOSOption: + e.mu.Lock() + e.receiveTOS = v + e.mu.Unlock() + return nil + case tcpip.V6OnlyOption: // We only recognize this option on v6 endpoints. if e.NetProto != header.IPv6ProtocolNumber { @@ -664,15 +686,21 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { // GetSockOptBool implements tcpip.Endpoint.GetSockOptBool. func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { switch opt { + case tcpip.ReceiveTOSOption: + e.mu.RLock() + v := e.receiveTOS + e.mu.RUnlock() + return v, nil + case tcpip.V6OnlyOption: // We only recognize this option on v6 endpoints. if e.NetProto != header.IPv6ProtocolNumber { return false, tcpip.ErrUnknownProtocolOption } - e.mu.Lock() + e.mu.RLock() v := e.v6only - e.mu.Unlock() + e.mu.RUnlock() return v, nil } @@ -1215,6 +1243,12 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk e.rcvList.PushBack(packet) e.rcvBufSize += pkt.Data.Size() + // Save any useful information from the network header to the packet. + switch r.NetProto { + case header.IPv4ProtocolNumber: + packet.tos, _ = header.IPv4(pkt.NetworkHeader).TOS() + } + packet.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 0a82bc4fa..ee9d10555 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -56,6 +56,7 @@ const ( multicastAddr = "\xe8\x2b\xd3\xea" multicastV6Addr = "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" broadcastAddr = header.IPv4Broadcast + testTOS = 0x80 // defaultMTU is the MTU, in bytes, used throughout the tests, except // where another value is explicitly used. It is chosen to match the MTU @@ -453,6 +454,7 @@ func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool ip := header.IPv4(buf) ip.Encode(&header.IPv4Fields{ IHL: header.IPv4MinimumSize, + TOS: testTOS, TotalLength: uint16(len(buf)), TTL: 65, Protocol: uint8(udp.ProtocolNumber), @@ -552,8 +554,8 @@ func TestBindToDeviceOption(t *testing.T) { // testReadInternal sends a packet of the given test flow into the stack by // injecting it into the link endpoint. It then attempts to read it from the // UDP endpoint and depending on if this was expected to succeed verifies its -// correctness. -func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool) { +// correctness including any additional checker functions provided. +func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool, checkers ...checker.ControlMessagesChecker) { c.t.Helper() payload := newPayload() @@ -568,12 +570,12 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe epstats := c.ep.Stats().(*tcpip.TransportEndpointStats).Clone() var addr tcpip.FullAddress - v, _, err := c.ep.Read(&addr) + v, cm, err := c.ep.Read(&addr) if err == tcpip.ErrWouldBlock { // Wait for data to become available. select { case <-ch: - v, _, err = c.ep.Read(&addr) + v, cm, err = c.ep.Read(&addr) case <-time.After(300 * time.Millisecond): if packetShouldBeDropped { @@ -606,15 +608,21 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe if !bytes.Equal(payload, v) { c.t.Fatalf("bad payload: got %x, want %x", v, payload) } + + // Run any checkers against the ControlMessages. + for _, f := range checkers { + f(c.t, cm) + } + c.checkEndpointReadStats(1, epstats, err) } // testRead sends a packet of the given test flow into the stack by injecting it // into the link endpoint. It then reads it from the UDP endpoint and verifies -// its correctness. -func testRead(c *testContext, flow testFlow) { +// its correctness including any additional checker functions provided. +func testRead(c *testContext, flow testFlow, checkers ...checker.ControlMessagesChecker) { c.t.Helper() - testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */) + testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */, checkers...) } // testFailingRead sends a packet of the given test flow into the stack by @@ -1282,7 +1290,7 @@ func TestTOSV4(t *testing.T) { c.createEndpointForFlow(flow) - const tos = 0xC0 + const tos = testTOS var v tcpip.IPv4TOSOption if err := c.ep.GetSockOpt(&v); err != nil { c.t.Errorf("GetSockopt failed: %s", err) @@ -1317,7 +1325,7 @@ func TestTOSV6(t *testing.T) { c.createEndpointForFlow(flow) - const tos = 0xC0 + const tos = testTOS var v tcpip.IPv6TrafficClassOption if err := c.ep.GetSockOpt(&v); err != nil { c.t.Errorf("GetSockopt failed: %s", err) @@ -1344,6 +1352,47 @@ func TestTOSV6(t *testing.T) { } } +func TestReceiveTOSV4(t *testing.T) { + for _, flow := range []testFlow{unicastV4, broadcast} { + t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { + c := newDualTestContext(t, defaultMTU) + defer c.cleanup() + + c.createEndpointForFlow(flow) + + // Verify that setting and reading the option works. + v, err := c.ep.GetSockOptBool(tcpip.ReceiveTOSOption) + if err != nil { + c.t.Fatal("GetSockOptBool(tcpip.ReceiveTOSOption) failed:", err) + } + // Test for expected default value. + if v != false { + c.t.Errorf("got GetSockOptBool(tcpip.ReceiveTOSOption) = %t, want = %t", v, false) + } + + want := true + if err := c.ep.SetSockOptBool(tcpip.ReceiveTOSOption, want); err != nil { + c.t.Fatalf("SetSockOptBool(tcpip.ReceiveTOSOption, %t) failed: %s", want, err) + } + + got, err := c.ep.GetSockOptBool(tcpip.ReceiveTOSOption) + if err != nil { + c.t.Fatal("GetSockOptBool(tcpip.ReceiveTOSOption) failed:", err) + } + if got != want { + c.t.Fatalf("got GetSockOptBool(tcpip.ReceiveTOSOption) = %t, want = %t", got, want) + } + + // Verify that the correct received TOS is handed through as + // ancillary data to the ControlMessages struct. + if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { + c.t.Fatal("Bind failed:", err) + } + testRead(c, flow, checker.ReceiveTOS(testTOS)) + }) + } +} + func TestMulticastInterfaceOption(t *testing.T) { for _, flow := range []testFlow{multicastV4, multicastV4in6, multicastV6, multicastV6Only} { t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index 66eb68857..53290bed7 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -209,6 +209,46 @@ TEST_P(UDPSocketPairTest, SetMulticastLoopChar) { EXPECT_EQ(get, kSockOptOn); } +// Ensure that Receiving TOS is off by default. +TEST_P(UDPSocketPairTest, RecvTosDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +// Test that setting and getting IP_RECVTOS works as expected. +TEST_P(UDPSocketPairTest, SetRecvTos) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + TEST_P(UDPSocketPairTest, ReuseAddrDefault) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index dc35c2f50..68e0a8109 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1349,8 +1349,9 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { // outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SetAndReceiveTOS) { - // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. - SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); + // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. + SKIP_IF((GetParam() != AddressFamily::kIpv4) && IsRunningOnGvisor() && + !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1421,7 +1422,8 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { // TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SendAndReceiveTOS) { - // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. + // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. + // TODO(b/146661005): Setting TOS via cmsg not supported for netstack. SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); -- cgit v1.2.3 From 19b4653147c8ec1cd2cf6e2a8f9bfc7865a5f850 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 16 Jan 2020 20:19:40 -0800 Subject: Remove unused rpcinet. PiperOrigin-RevId: 290198756 --- pkg/sentry/fs/proc/BUILD | 2 - pkg/sentry/fs/proc/proc.go | 9 +- pkg/sentry/fs/proc/rpcinet_proc.go | 217 ------ pkg/sentry/fs/proc/sys.go | 9 +- pkg/sentry/socket/rpcinet/BUILD | 69 -- pkg/sentry/socket/rpcinet/conn/BUILD | 18 - pkg/sentry/socket/rpcinet/conn/conn.go | 187 ----- pkg/sentry/socket/rpcinet/device.go | 19 - pkg/sentry/socket/rpcinet/notifier/BUILD | 17 - pkg/sentry/socket/rpcinet/notifier/notifier.go | 231 ------- pkg/sentry/socket/rpcinet/rpcinet.go | 16 - pkg/sentry/socket/rpcinet/socket.go | 909 ------------------------- pkg/sentry/socket/rpcinet/stack.go | 177 ----- pkg/sentry/socket/rpcinet/stack_unsafe.go | 193 ------ pkg/sentry/socket/rpcinet/syscall_rpc.proto | 352 ---------- 15 files changed, 2 insertions(+), 2423 deletions(-) delete mode 100644 pkg/sentry/fs/proc/rpcinet_proc.go delete mode 100644 pkg/sentry/socket/rpcinet/BUILD delete mode 100644 pkg/sentry/socket/rpcinet/conn/BUILD delete mode 100644 pkg/sentry/socket/rpcinet/conn/conn.go delete mode 100644 pkg/sentry/socket/rpcinet/device.go delete mode 100644 pkg/sentry/socket/rpcinet/notifier/BUILD delete mode 100644 pkg/sentry/socket/rpcinet/notifier/notifier.go delete mode 100644 pkg/sentry/socket/rpcinet/rpcinet.go delete mode 100644 pkg/sentry/socket/rpcinet/socket.go delete mode 100644 pkg/sentry/socket/rpcinet/stack.go delete mode 100644 pkg/sentry/socket/rpcinet/stack_unsafe.go delete mode 100644 pkg/sentry/socket/rpcinet/syscall_rpc.proto (limited to 'pkg/sentry/socket') diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index 94d46ab1b..cb37c6c6b 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -18,7 +18,6 @@ go_library( "mounts.go", "net.go", "proc.go", - "rpcinet_proc.go", "stat.go", "sys.go", "sys_net.go", @@ -46,7 +45,6 @@ go_library( "//pkg/sentry/limits", "//pkg/sentry/mm", "//pkg/sentry/socket", - "//pkg/sentry/socket/rpcinet", "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index da9341e4e..29867dc3a 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet" "gvisor.dev/gvisor/pkg/syserror" ) @@ -87,15 +86,9 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string } // Add more contents that need proc to be initialized. + p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc)) p.AddChild(ctx, "sys", p.newSysDir(ctx, msrc)) - // If we're using rpcinet we will let it manage /proc/net. - if _, ok := p.k.NetworkStack().(*rpcinet.Stack); ok { - p.AddChild(ctx, "net", newRPCInetProcNet(ctx, msrc)) - } else { - p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc)) - } - return newProcInode(ctx, p, msrc, fs.SpecialDirectory, nil), nil } diff --git a/pkg/sentry/fs/proc/rpcinet_proc.go b/pkg/sentry/fs/proc/rpcinet_proc.go deleted file mode 100644 index 01ac97530..000000000 --- a/pkg/sentry/fs/proc/rpcinet_proc.go +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "io" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/waiter" -) - -// rpcInetInode implements fs.InodeOperations. -type rpcInetInode struct { - fsutil.SimpleFileInode - - // filepath is the full path of this rpcInetInode. - filepath string - - k *kernel.Kernel -} - -func newRPCInetInode(ctx context.Context, msrc *fs.MountSource, filepath string, mode linux.FileMode) *fs.Inode { - f := &rpcInetInode{ - SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(mode), linux.PROC_SUPER_MAGIC), - filepath: filepath, - k: kernel.KernelFromContext(ctx), - } - return newProcInode(ctx, f, msrc, fs.SpecialFile, nil) -} - -// GetFile implements fs.InodeOperations.GetFile. -func (i *rpcInetInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - flags.Pread = true - flags.Pwrite = true - fops := &rpcInetFile{ - inode: i, - } - return fs.NewFile(ctx, dirent, flags, fops), nil -} - -// rpcInetFile implements fs.FileOperations as RPCs. -type rpcInetFile struct { - fsutil.FileGenericSeek `state:"nosave"` - fsutil.FileNoIoctl `state:"nosave"` - fsutil.FileNoMMap `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileNoopFsync `state:"nosave"` - fsutil.FileNoopRelease `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - waiter.AlwaysReady `state:"nosave"` - - inode *rpcInetInode -} - -// Read implements fs.FileOperations.Read. -// -// This method can panic if an rpcInetInode was created without an rpcinet -// stack. -func (f *rpcInetFile) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { - if offset < 0 { - return 0, syserror.EINVAL - } - s, ok := f.inode.k.NetworkStack().(*rpcinet.Stack) - if !ok { - panic("Network stack is not a rpcinet.") - } - - contents, se := s.RPCReadFile(f.inode.filepath) - if se != nil || offset >= int64(len(contents)) { - return 0, io.EOF - } - - n, err := dst.CopyOut(ctx, contents[offset:]) - return int64(n), err -} - -// Write implements fs.FileOperations.Write. -// -// This method can panic if an rpcInetInode was created without an rpcInet -// stack. -func (f *rpcInetFile) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { - s, ok := f.inode.k.NetworkStack().(*rpcinet.Stack) - if !ok { - panic("Network stack is not a rpcinet.") - } - - if src.NumBytes() == 0 { - return 0, nil - } - - b := make([]byte, src.NumBytes(), src.NumBytes()) - n, err := src.CopyIn(ctx, b) - if err != nil { - return int64(n), err - } - - written, se := s.RPCWriteFile(f.inode.filepath, b) - return int64(written), se.ToError() -} - -// newRPCInetProcNet will build an inode for /proc/net. -func newRPCInetProcNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - contents := map[string]*fs.Inode{ - "arp": newRPCInetInode(ctx, msrc, "/proc/net/arp", 0444), - "dev": newRPCInetInode(ctx, msrc, "/proc/net/dev", 0444), - "if_inet6": newRPCInetInode(ctx, msrc, "/proc/net/if_inet6", 0444), - "ipv6_route": newRPCInetInode(ctx, msrc, "/proc/net/ipv6_route", 0444), - "netlink": newRPCInetInode(ctx, msrc, "/proc/net/netlink", 0444), - "netstat": newRPCInetInode(ctx, msrc, "/proc/net/netstat", 0444), - "packet": newRPCInetInode(ctx, msrc, "/proc/net/packet", 0444), - "protocols": newRPCInetInode(ctx, msrc, "/proc/net/protocols", 0444), - "psched": newRPCInetInode(ctx, msrc, "/proc/net/psched", 0444), - "ptype": newRPCInetInode(ctx, msrc, "/proc/net/ptype", 0444), - "route": newRPCInetInode(ctx, msrc, "/proc/net/route", 0444), - "tcp": newRPCInetInode(ctx, msrc, "/proc/net/tcp", 0444), - "tcp6": newRPCInetInode(ctx, msrc, "/proc/net/tcp6", 0444), - "udp": newRPCInetInode(ctx, msrc, "/proc/net/udp", 0444), - "udp6": newRPCInetInode(ctx, msrc, "/proc/net/udp6", 0444), - } - - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) -} - -// newRPCInetProcSysNet will build an inode for /proc/sys/net. -func newRPCInetProcSysNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - contents := map[string]*fs.Inode{ - "ipv4": newRPCInetSysNetIPv4Dir(ctx, msrc), - "core": newRPCInetSysNetCore(ctx, msrc), - } - - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) -} - -// newRPCInetSysNetCore builds the /proc/sys/net/core directory. -func newRPCInetSysNetCore(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - contents := map[string]*fs.Inode{ - "default_qdisc": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/default_qdisc", 0444), - "message_burst": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/message_burst", 0444), - "message_cost": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/message_cost", 0444), - "optmem_max": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/optmem_max", 0444), - "rmem_default": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/rmem_default", 0444), - "rmem_max": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/rmem_max", 0444), - "somaxconn": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/somaxconn", 0444), - "wmem_default": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/wmem_default", 0444), - "wmem_max": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/wmem_max", 0444), - } - - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) -} - -// newRPCInetSysNetIPv4Dir builds the /proc/sys/net/ipv4 directory. -func newRPCInetSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - contents := map[string]*fs.Inode{ - "ip_local_port_range": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_local_port_range", 0444), - "ip_local_reserved_ports": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_local_reserved_ports", 0444), - "ipfrag_time": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ipfrag_time", 0444), - "ip_nonlocal_bind": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_nonlocal_bind", 0444), - "ip_no_pmtu_disc": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_no_pmtu_disc", 0444), - "tcp_allowed_congestion_control": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_allowed_congestion_control", 0444), - "tcp_available_congestion_control": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_available_congestion_control", 0444), - "tcp_base_mss": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_base_mss", 0444), - "tcp_congestion_control": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_congestion_control", 0644), - "tcp_dsack": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_dsack", 0644), - "tcp_early_retrans": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_early_retrans", 0644), - "tcp_fack": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fack", 0644), - "tcp_fastopen": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fastopen", 0644), - "tcp_fastopen_key": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fastopen_key", 0444), - "tcp_fin_timeout": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fin_timeout", 0644), - "tcp_invalid_ratelimit": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_invalid_ratelimit", 0444), - "tcp_keepalive_intvl": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_intvl", 0644), - "tcp_keepalive_probes": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_probes", 0644), - "tcp_keepalive_time": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_time", 0644), - "tcp_mem": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_mem", 0444), - "tcp_mtu_probing": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_mtu_probing", 0644), - "tcp_no_metrics_save": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_no_metrics_save", 0444), - "tcp_probe_interval": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_probe_interval", 0444), - "tcp_probe_threshold": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_probe_threshold", 0444), - "tcp_retries1": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_retries1", 0644), - "tcp_retries2": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_retries2", 0644), - "tcp_rfc1337": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_rfc1337", 0444), - "tcp_rmem": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_rmem", 0444), - "tcp_sack": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_sack", 0644), - "tcp_slow_start_after_idle": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_slow_start_after_idle", 0644), - "tcp_synack_retries": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_synack_retries", 0644), - "tcp_syn_retries": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_syn_retries", 0644), - "tcp_timestamps": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_timestamps", 0644), - "tcp_wmem": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_wmem", 0444), - } - - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) -} diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go index 1062bd852..2bdcf5f70 100644 --- a/pkg/sentry/fs/proc/sys.go +++ b/pkg/sentry/fs/proc/sys.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet" "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -106,16 +105,10 @@ func (p *proc) newVMDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { func (p *proc) newSysDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { children := map[string]*fs.Inode{ "kernel": p.newKernelDir(ctx, msrc), + "net": p.newSysNetDir(ctx, msrc), "vm": p.newVMDir(ctx, msrc), } - // If we're using rpcinet we will let it manage /proc/sys/net. - if _, ok := p.k.NetworkStack().(*rpcinet.Stack); ok { - children["net"] = newRPCInetProcSysNet(ctx, msrc) - } else { - children["net"] = p.newSysNetDir(ctx, msrc) - } - d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555)) return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } diff --git a/pkg/sentry/socket/rpcinet/BUILD b/pkg/sentry/socket/rpcinet/BUILD deleted file mode 100644 index 4668b87d1..000000000 --- a/pkg/sentry/socket/rpcinet/BUILD +++ /dev/null @@ -1,69 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") - -package(licenses = ["notice"]) - -go_library( - name = "rpcinet", - srcs = [ - "device.go", - "rpcinet.go", - "socket.go", - "stack.go", - "stack_unsafe.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet", - visibility = ["//pkg/sentry:internal"], - deps = [ - ":syscall_rpc_go_proto", - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/hostinet", - "//pkg/sentry/socket/rpcinet/conn", - "//pkg/sentry/socket/rpcinet/notifier", - "//pkg/sentry/unimpl", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/tcpip/buffer", - "//pkg/tcpip/stack", - "//pkg/unet", - "//pkg/waiter", - ], -) - -proto_library( - name = "syscall_rpc_proto", - srcs = ["syscall_rpc.proto"], - visibility = [ - "//visibility:public", - ], -) - -cc_proto_library( - name = "syscall_rpc_cc_proto", - visibility = [ - "//visibility:public", - ], - deps = [":syscall_rpc_proto"], -) - -go_proto_library( - name = "syscall_rpc_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto", - proto = ":syscall_rpc_proto", - visibility = [ - "//visibility:public", - ], -) diff --git a/pkg/sentry/socket/rpcinet/conn/BUILD b/pkg/sentry/socket/rpcinet/conn/BUILD deleted file mode 100644 index b2677c659..000000000 --- a/pkg/sentry/socket/rpcinet/conn/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "conn", - srcs = ["conn.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/binary", - "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", - "//pkg/sync", - "//pkg/syserr", - "//pkg/unet", - "@com_github_golang_protobuf//proto:go_default_library", - ], -) diff --git a/pkg/sentry/socket/rpcinet/conn/conn.go b/pkg/sentry/socket/rpcinet/conn/conn.go deleted file mode 100644 index 02f39c767..000000000 --- a/pkg/sentry/socket/rpcinet/conn/conn.go +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package conn is an RPC connection to a syscall RPC server. -package conn - -import ( - "fmt" - "sync/atomic" - "syscall" - - "github.com/golang/protobuf/proto" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/unet" - - pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" -) - -type request struct { - response []byte - ready chan struct{} - ignoreResult bool -} - -// RPCConnection represents a single RPC connection to a syscall gofer. -type RPCConnection struct { - // reqID is the ID of the last request and must be accessed atomically. - reqID uint64 - - sendMu sync.Mutex - socket *unet.Socket - - reqMu sync.Mutex - requests map[uint64]request -} - -// NewRPCConnection initializes a RPC connection to a socket gofer. -func NewRPCConnection(s *unet.Socket) *RPCConnection { - conn := &RPCConnection{socket: s, requests: map[uint64]request{}} - go func() { // S/R-FIXME(b/77962828) - var nums [16]byte - for { - for n := 0; n < len(nums); { - nn, err := conn.socket.Read(nums[n:]) - if err != nil { - panic(fmt.Sprint("error reading length from socket rpc gofer: ", err)) - } - n += nn - } - - b := make([]byte, binary.LittleEndian.Uint64(nums[:8])) - id := binary.LittleEndian.Uint64(nums[8:]) - - for n := 0; n < len(b); { - nn, err := conn.socket.Read(b[n:]) - if err != nil { - panic(fmt.Sprint("error reading request from socket rpc gofer: ", err)) - } - n += nn - } - - conn.reqMu.Lock() - r := conn.requests[id] - if r.ignoreResult { - delete(conn.requests, id) - } else { - r.response = b - conn.requests[id] = r - } - conn.reqMu.Unlock() - close(r.ready) - } - }() - return conn -} - -// NewRequest makes a request to the RPC gofer and returns the request ID and a -// channel which will be closed once the request completes. -func (c *RPCConnection) NewRequest(req pb.SyscallRequest, ignoreResult bool) (uint64, chan struct{}) { - b, err := proto.Marshal(&req) - if err != nil { - panic(fmt.Sprint("invalid proto: ", err)) - } - - id := atomic.AddUint64(&c.reqID, 1) - ch := make(chan struct{}) - - c.reqMu.Lock() - c.requests[id] = request{ready: ch, ignoreResult: ignoreResult} - c.reqMu.Unlock() - - c.sendMu.Lock() - defer c.sendMu.Unlock() - - var nums [16]byte - binary.LittleEndian.PutUint64(nums[:8], uint64(len(b))) - binary.LittleEndian.PutUint64(nums[8:], id) - for n := 0; n < len(nums); { - nn, err := c.socket.Write(nums[n:]) - if err != nil { - panic(fmt.Sprint("error writing length and ID to socket gofer: ", err)) - } - n += nn - } - - for n := 0; n < len(b); { - nn, err := c.socket.Write(b[n:]) - if err != nil { - panic(fmt.Sprint("error writing request to socket gofer: ", err)) - } - n += nn - } - - return id, ch -} - -// RPCReadFile will execute the ReadFile helper RPC method which avoids the -// common pattern of open(2), read(2), close(2) by doing all three operations -// as a single RPC. It will read the entire file or return EFBIG if the file -// was too large. -func (c *RPCConnection) RPCReadFile(path string) ([]byte, *syserr.Error) { - req := &pb.SyscallRequest_ReadFile{&pb.ReadFileRequest{ - Path: path, - }} - - id, ch := c.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-ch - - res := c.Request(id).Result.(*pb.SyscallResponse_ReadFile).ReadFile.Result - if e, ok := res.(*pb.ReadFileResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.ReadFileResponse_Data).Data, nil -} - -// RPCWriteFile will execute the WriteFile helper RPC method which avoids the -// common pattern of open(2), write(2), write(2), close(2) by doing all -// operations as a single RPC. -func (c *RPCConnection) RPCWriteFile(path string, data []byte) (int64, *syserr.Error) { - req := &pb.SyscallRequest_WriteFile{&pb.WriteFileRequest{ - Path: path, - Content: data, - }} - - id, ch := c.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-ch - - res := c.Request(id).Result.(*pb.SyscallResponse_WriteFile).WriteFile - if e := res.ErrorNumber; e != 0 { - return int64(res.Written), syserr.FromHost(syscall.Errno(e)) - } - - return int64(res.Written), nil -} - -// Request retrieves the request corresponding to the given request ID. -// -// The channel returned by NewRequest must have been closed before Request can -// be called. This will happen automatically, do not manually close the -// channel. -func (c *RPCConnection) Request(id uint64) pb.SyscallResponse { - c.reqMu.Lock() - r := c.requests[id] - delete(c.requests, id) - c.reqMu.Unlock() - - var resp pb.SyscallResponse - if err := proto.Unmarshal(r.response, &resp); err != nil { - panic(fmt.Sprint("invalid proto: ", err)) - } - - return resp -} diff --git a/pkg/sentry/socket/rpcinet/device.go b/pkg/sentry/socket/rpcinet/device.go deleted file mode 100644 index 8cfd5f6e5..000000000 --- a/pkg/sentry/socket/rpcinet/device.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rpcinet - -import "gvisor.dev/gvisor/pkg/sentry/device" - -var socketDevice = device.NewAnonDevice() diff --git a/pkg/sentry/socket/rpcinet/notifier/BUILD b/pkg/sentry/socket/rpcinet/notifier/BUILD deleted file mode 100644 index a5954f22b..000000000 --- a/pkg/sentry/socket/rpcinet/notifier/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "notifier", - srcs = ["notifier.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier", - visibility = ["//:sandbox"], - deps = [ - "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", - "//pkg/sentry/socket/rpcinet/conn", - "//pkg/sync", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier.go b/pkg/sentry/socket/rpcinet/notifier/notifier.go deleted file mode 100644 index 82b75d6dd..000000000 --- a/pkg/sentry/socket/rpcinet/notifier/notifier.go +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package notifier implements an FD notifier implementation over RPC. -package notifier - -import ( - "fmt" - "syscall" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" - pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/waiter" -) - -type fdInfo struct { - queue *waiter.Queue - waiting bool -} - -// Notifier holds all the state necessary to issue notifications when IO events -// occur in the observed FDs. -type Notifier struct { - // rpcConn is the connection that is used for sending RPCs. - rpcConn *conn.RPCConnection - - // epFD is the epoll file descriptor used to register for io - // notifications. - epFD uint32 - - // mu protects fdMap. - mu sync.Mutex - - // fdMap maps file descriptors to their notification queues and waiting - // status. - fdMap map[uint32]*fdInfo -} - -// NewRPCNotifier creates a new notifier object. -func NewRPCNotifier(cn *conn.RPCConnection) (*Notifier, error) { - id, c := cn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCreate1{&pb.EpollCreate1Request{}}}, false /* ignoreResult */) - <-c - - res := cn.Request(id).Result.(*pb.SyscallResponse_EpollCreate1).EpollCreate1.Result - if e, ok := res.(*pb.EpollCreate1Response_ErrorNumber); ok { - return nil, syscall.Errno(e.ErrorNumber) - } - - w := &Notifier{ - rpcConn: cn, - epFD: res.(*pb.EpollCreate1Response_Fd).Fd, - fdMap: make(map[uint32]*fdInfo), - } - - go w.waitAndNotify() // S/R-FIXME(b/77962828) - - return w, nil -} - -// waitFD waits on mask for fd. The fdMap mutex must be hold. -func (n *Notifier) waitFD(fd uint32, fi *fdInfo, mask waiter.EventMask) error { - if !fi.waiting && mask == 0 { - return nil - } - - e := pb.EpollEvent{ - Events: mask.ToLinux() | unix.EPOLLET, - Fd: fd, - } - - switch { - case !fi.waiting && mask != 0: - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCtl{&pb.EpollCtlRequest{Epfd: n.epFD, Op: syscall.EPOLL_CTL_ADD, Fd: fd, Event: &e}}}, false /* ignoreResult */) - <-c - - e := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_EpollCtl).EpollCtl.ErrorNumber - if e != 0 { - return syscall.Errno(e) - } - - fi.waiting = true - case fi.waiting && mask == 0: - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCtl{&pb.EpollCtlRequest{Epfd: n.epFD, Op: syscall.EPOLL_CTL_DEL, Fd: fd}}}, false /* ignoreResult */) - <-c - n.rpcConn.Request(id) - - fi.waiting = false - case fi.waiting && mask != 0: - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCtl{&pb.EpollCtlRequest{Epfd: n.epFD, Op: syscall.EPOLL_CTL_MOD, Fd: fd, Event: &e}}}, false /* ignoreResult */) - <-c - - e := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_EpollCtl).EpollCtl.ErrorNumber - if e != 0 { - return syscall.Errno(e) - } - } - - return nil -} - -// addFD adds an FD to the list of FDs observed by n. -func (n *Notifier) addFD(fd uint32, queue *waiter.Queue) { - n.mu.Lock() - defer n.mu.Unlock() - - // Panic if we're already notifying on this FD. - if _, ok := n.fdMap[fd]; ok { - panic(fmt.Sprintf("File descriptor %d added twice", fd)) - } - - // We have nothing to wait for at the moment. Just add it to the map. - n.fdMap[fd] = &fdInfo{queue: queue} -} - -// updateFD updates the set of events the FD needs to be notified on. -func (n *Notifier) updateFD(fd uint32) error { - n.mu.Lock() - defer n.mu.Unlock() - - if fi, ok := n.fdMap[fd]; ok { - return n.waitFD(fd, fi, fi.queue.Events()) - } - - return nil -} - -// RemoveFD removes an FD from the list of FDs observed by n. -func (n *Notifier) removeFD(fd uint32) { - n.mu.Lock() - defer n.mu.Unlock() - - // Remove from map, then from epoll object. - n.waitFD(fd, n.fdMap[fd], 0) - delete(n.fdMap, fd) -} - -// hasFD returns true if the FD is in the list of observed FDs. -func (n *Notifier) hasFD(fd uint32) bool { - n.mu.Lock() - defer n.mu.Unlock() - - _, ok := n.fdMap[fd] - return ok -} - -// waitAndNotify loops waiting for io event notifications from the epoll -// object. Once notifications arrive, they are dispatched to the -// registered queue. -func (n *Notifier) waitAndNotify() error { - for { - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollWait{&pb.EpollWaitRequest{Fd: n.epFD, NumEvents: 100, Msec: -1}}}, false /* ignoreResult */) - <-c - - res := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_EpollWait).EpollWait.Result - if e, ok := res.(*pb.EpollWaitResponse_ErrorNumber); ok { - err := syscall.Errno(e.ErrorNumber) - // NOTE(magi): I don't think epoll_wait can return EAGAIN but I'm being - // conseratively careful here since exiting the notification thread - // would be really bad. - if err == syscall.EINTR || err == syscall.EAGAIN { - continue - } - return err - } - - n.mu.Lock() - for _, e := range res.(*pb.EpollWaitResponse_Events).Events.Events { - if fi, ok := n.fdMap[e.Fd]; ok { - fi.queue.Notify(waiter.EventMaskFromLinux(e.Events)) - } - } - n.mu.Unlock() - } -} - -// AddFD adds an FD to the list of observed FDs. -func (n *Notifier) AddFD(fd uint32, queue *waiter.Queue) error { - n.addFD(fd, queue) - return nil -} - -// UpdateFD updates the set of events the FD needs to be notified on. -func (n *Notifier) UpdateFD(fd uint32) error { - return n.updateFD(fd) -} - -// RemoveFD removes an FD from the list of observed FDs. -func (n *Notifier) RemoveFD(fd uint32) { - n.removeFD(fd) -} - -// HasFD returns true if the FD is in the list of observed FDs. -// -// This should only be used by tests to assert that FDs are correctly -// registered. -func (n *Notifier) HasFD(fd uint32) bool { - return n.hasFD(fd) -} - -// NonBlockingPoll polls the given fd in non-blocking fashion. It is used just -// to query the FD's current state; this method will block on the RPC response -// although the syscall is non-blocking. -func (n *Notifier) NonBlockingPoll(fd uint32, mask waiter.EventMask) waiter.EventMask { - for { - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Poll{&pb.PollRequest{Fd: fd, Events: mask.ToLinux()}}}, false /* ignoreResult */) - <-c - - res := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_Poll).Poll.Result - if e, ok := res.(*pb.PollResponse_ErrorNumber); ok { - if syscall.Errno(e.ErrorNumber) == syscall.EINTR { - continue - } - return mask - } - - return waiter.EventMaskFromLinux(res.(*pb.PollResponse_Events).Events) - } -} diff --git a/pkg/sentry/socket/rpcinet/rpcinet.go b/pkg/sentry/socket/rpcinet/rpcinet.go deleted file mode 100644 index 5d4fd4dac..000000000 --- a/pkg/sentry/socket/rpcinet/rpcinet.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package rpcinet implements sockets using an RPC for each syscall. -package rpcinet diff --git a/pkg/sentry/socket/rpcinet/socket.go b/pkg/sentry/socket/rpcinet/socket.go deleted file mode 100644 index ddb76d9d4..000000000 --- a/pkg/sentry/socket/rpcinet/socket.go +++ /dev/null @@ -1,909 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rpcinet - -import ( - "sync/atomic" - "syscall" - "time" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/kernel" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier" - pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" - "gvisor.dev/gvisor/pkg/sentry/unimpl" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/buffer" - "gvisor.dev/gvisor/pkg/waiter" -) - -// socketOperations implements fs.FileOperations and socket.Socket for a socket -// implemented using a host socket. -type socketOperations struct { - fsutil.FilePipeSeek `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileNoFsync `state:"nosave"` - fsutil.FileNoMMap `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - socket.SendReceiveTimeout - - family int // Read-only. - stype linux.SockType // Read-only. - protocol int // Read-only. - - fd uint32 // must be O_NONBLOCK - wq *waiter.Queue - rpcConn *conn.RPCConnection - notifier *notifier.Notifier - - // shState is the state of the connection with respect to shutdown. Because - // we're mixing non-blocking semantics on the other side we have to adapt for - // some strange differences between blocking and non-blocking sockets. - shState int32 -} - -// Verify that we actually implement socket.Socket. -var _ = socket.Socket(&socketOperations{}) - -// New creates a new RPC socket. -func newSocketFile(ctx context.Context, stack *Stack, family int, skType linux.SockType, protocol int) (*fs.File, *syserr.Error) { - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Socket{&pb.SocketRequest{Family: int64(family), Type: int64(skType | syscall.SOCK_NONBLOCK), Protocol: int64(protocol)}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Socket).Socket.Result - if e, ok := res.(*pb.SocketResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - fd := res.(*pb.SocketResponse_Fd).Fd - - var wq waiter.Queue - stack.notifier.AddFD(fd, &wq) - - dirent := socket.NewDirent(ctx, socketDevice) - defer dirent.DecRef() - return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &socketOperations{ - family: family, - stype: skType, - protocol: protocol, - wq: &wq, - fd: fd, - rpcConn: stack.rpcConn, - notifier: stack.notifier, - }), nil -} - -func isBlockingErrno(err error) bool { - return err == syscall.EAGAIN || err == syscall.EWOULDBLOCK -} - -func translateIOSyscallError(err error) error { - if isBlockingErrno(err) { - return syserror.ErrWouldBlock - } - return err -} - -// setShutdownFlags will set the shutdown flag so we can handle blocking reads -// after a read shutdown. -func (s *socketOperations) setShutdownFlags(how int) { - var f tcpip.ShutdownFlags - switch how { - case linux.SHUT_RD: - f = tcpip.ShutdownRead - case linux.SHUT_WR: - f = tcpip.ShutdownWrite - case linux.SHUT_RDWR: - f = tcpip.ShutdownWrite | tcpip.ShutdownRead - } - - // Atomically update the flags. - for { - old := atomic.LoadInt32(&s.shState) - if atomic.CompareAndSwapInt32(&s.shState, old, old|int32(f)) { - break - } - } -} - -func (s *socketOperations) resetShutdownFlags() { - atomic.StoreInt32(&s.shState, 0) -} - -func (s *socketOperations) isShutRdSet() bool { - return atomic.LoadInt32(&s.shState)&int32(tcpip.ShutdownRead) != 0 -} - -func (s *socketOperations) isShutWrSet() bool { - return atomic.LoadInt32(&s.shState)&int32(tcpip.ShutdownWrite) != 0 -} - -// Release implements fs.FileOperations.Release. -func (s *socketOperations) Release() { - s.notifier.RemoveFD(s.fd) - - // We always need to close the FD. - _, _ = s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Close{&pb.CloseRequest{Fd: s.fd}}}, true /* ignoreResult */) -} - -// Readiness implements waiter.Waitable.Readiness. -func (s *socketOperations) Readiness(mask waiter.EventMask) waiter.EventMask { - return s.notifier.NonBlockingPoll(s.fd, mask) -} - -// EventRegister implements waiter.Waitable.EventRegister. -func (s *socketOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { - s.wq.EventRegister(e, mask) - s.notifier.UpdateFD(s.fd) -} - -// EventUnregister implements waiter.Waitable.EventUnregister. -func (s *socketOperations) EventUnregister(e *waiter.Entry) { - s.wq.EventUnregister(e) - s.notifier.UpdateFD(s.fd) -} - -func rpcRead(t *kernel.Task, req *pb.SyscallRequest_Read) (*pb.ReadResponse_Data, *syserr.Error) { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Read).Read.Result - if e, ok := res.(*pb.ReadResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.ReadResponse_Data), nil -} - -// Read implements fs.FileOperations.Read. -func (s *socketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) { - req := &pb.SyscallRequest_Read{&pb.ReadRequest{ - Fd: s.fd, - Length: uint32(dst.NumBytes()), - }} - - res, se := rpcRead(ctx.(*kernel.Task), req) - if se == nil { - n, e := dst.CopyOut(ctx, res.Data) - return int64(n), e - } - - return 0, se.ToError() -} - -func rpcWrite(t *kernel.Task, req *pb.SyscallRequest_Write) (uint32, *syserr.Error) { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Write).Write.Result - if e, ok := res.(*pb.WriteResponse_ErrorNumber); ok { - return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.WriteResponse_Length).Length, nil -} - -// Write implements fs.FileOperations.Write. -func (s *socketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { - t := ctx.(*kernel.Task) - v := buffer.NewView(int(src.NumBytes())) - - // Copy all the data into the buffer. - if _, err := src.CopyIn(t, v); err != nil { - return 0, err - } - - n, err := rpcWrite(t, &pb.SyscallRequest_Write{&pb.WriteRequest{Fd: s.fd, Data: v}}) - if n > 0 && n < uint32(src.NumBytes()) { - // The FileOperations.Write interface expects us to return ErrWouldBlock in - // the event of a partial write. - return int64(n), syserror.ErrWouldBlock - } - return int64(n), err.ToError() -} - -func rpcConnect(t *kernel.Task, fd uint32, sockaddr []byte) *syserr.Error { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Connect{&pb.ConnectRequest{Fd: uint32(fd), Address: sockaddr}}}, false /* ignoreResult */) - <-c - - if e := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Connect).Connect.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -// Connect implements socket.Socket.Connect. -func (s *socketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { - if !blocking { - e := rpcConnect(t, s.fd, sockaddr) - if e == nil { - // Reset the shutdown state on new connects. - s.resetShutdownFlags() - } - return e - } - - // Register for notification when the endpoint becomes writable, then - // initiate the connection. - e, ch := waiter.NewChannelEntry(nil) - s.EventRegister(&e, waiter.EventOut|waiter.EventIn|waiter.EventHUp) - defer s.EventUnregister(&e) - for { - if err := rpcConnect(t, s.fd, sockaddr); err == nil || err != syserr.ErrInProgress && err != syserr.ErrAlreadyInProgress { - if err == nil { - // Reset the shutdown state on new connects. - s.resetShutdownFlags() - } - return err - } - - // It's pending, so we have to wait for a notification, and fetch the - // result once the wait completes. - if err := t.Block(ch); err != nil { - return syserr.FromError(err) - } - } -} - -func rpcAccept(t *kernel.Task, fd uint32, peer bool) (*pb.AcceptResponse_ResultPayload, *syserr.Error) { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Accept{&pb.AcceptRequest{Fd: fd, Peer: peer, Flags: syscall.SOCK_NONBLOCK}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Accept).Accept.Result - if e, ok := res.(*pb.AcceptResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - return res.(*pb.AcceptResponse_Payload).Payload, nil -} - -// Accept implements socket.Socket.Accept. -func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) { - payload, se := rpcAccept(t, s.fd, peerRequested) - - // Check if we need to block. - if blocking && se == syserr.ErrTryAgain { - // Register for notifications. - e, ch := waiter.NewChannelEntry(nil) - // FIXME(b/119878986): This waiter.EventHUp is a partial - // measure, need to figure out how to translate linux events to - // internal events. - s.EventRegister(&e, waiter.EventIn|waiter.EventHUp) - defer s.EventUnregister(&e) - - // Try to accept the connection again; if it fails, then wait until we - // get a notification. - for { - if payload, se = rpcAccept(t, s.fd, peerRequested); se != syserr.ErrTryAgain { - break - } - - if err := t.Block(ch); err != nil { - return 0, nil, 0, syserr.FromError(err) - } - } - } - - // Handle any error from accept. - if se != nil { - return 0, nil, 0, se - } - - var wq waiter.Queue - s.notifier.AddFD(payload.Fd, &wq) - - dirent := socket.NewDirent(t, socketDevice) - defer dirent.DecRef() - fileFlags := fs.FileFlags{ - Read: true, - Write: true, - NonSeekable: true, - NonBlocking: flags&linux.SOCK_NONBLOCK != 0, - } - file := fs.NewFile(t, dirent, fileFlags, &socketOperations{ - family: s.family, - stype: s.stype, - protocol: s.protocol, - wq: &wq, - fd: payload.Fd, - rpcConn: s.rpcConn, - notifier: s.notifier, - }) - defer file.DecRef() - - fd, err := t.NewFDFrom(0, file, kernel.FDFlags{ - CloseOnExec: flags&linux.SOCK_CLOEXEC != 0, - }) - if err != nil { - return 0, nil, 0, syserr.FromError(err) - } - t.Kernel().RecordSocket(file) - - if peerRequested { - return fd, socket.UnmarshalSockAddr(s.family, payload.Address.Address), payload.Address.Length, nil - } - - return fd, nil, 0, nil -} - -// Bind implements socket.Socket.Bind. -func (s *socketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Bind{&pb.BindRequest{Fd: s.fd, Address: sockaddr}}}, false /* ignoreResult */) - <-c - - if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Bind).Bind.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -// Listen implements socket.Socket.Listen. -func (s *socketOperations) Listen(t *kernel.Task, backlog int) *syserr.Error { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Listen{&pb.ListenRequest{Fd: s.fd, Backlog: int64(backlog)}}}, false /* ignoreResult */) - <-c - - if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Listen).Listen.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -// Shutdown implements socket.Socket.Shutdown. -func (s *socketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error { - // We save the shutdown state because of strange differences on linux - // related to recvs on blocking vs. non-blocking sockets after a SHUT_RD. - // We need to emulate that behavior on the blocking side. - // TODO(b/120096741): There is a possible race that can exist with loopback, - // where data could possibly be lost. - s.setShutdownFlags(how) - - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Shutdown{&pb.ShutdownRequest{Fd: s.fd, How: int64(how)}}}, false /* ignoreResult */) - <-c - - if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Shutdown).Shutdown.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - - return nil -} - -// GetSockOpt implements socket.Socket.GetSockOpt. -func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) { - // SO_RCVTIMEO and SO_SNDTIMEO are special because blocking is performed - // within the sentry. - if level == linux.SOL_SOCKET && name == linux.SO_RCVTIMEO { - if outLen < linux.SizeOfTimeval { - return nil, syserr.ErrInvalidArgument - } - - return linux.NsecToTimeval(s.RecvTimeout()), nil - } - if level == linux.SOL_SOCKET && name == linux.SO_SNDTIMEO { - if outLen < linux.SizeOfTimeval { - return nil, syserr.ErrInvalidArgument - } - - return linux.NsecToTimeval(s.SendTimeout()), nil - } - - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_GetSockOpt{&pb.GetSockOptRequest{Fd: s.fd, Level: int64(level), Name: int64(name), Length: uint32(outLen)}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_GetSockOpt).GetSockOpt.Result - if e, ok := res.(*pb.GetSockOptResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.GetSockOptResponse_Opt).Opt, nil -} - -// SetSockOpt implements socket.Socket.SetSockOpt. -func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error { - // Because blocking actually happens within the sentry we need to inspect - // this socket option to determine if it's a SO_RCVTIMEO or SO_SNDTIMEO, - // and if so, we will save it and use it as the deadline for recv(2) - // or send(2) related syscalls. - if level == linux.SOL_SOCKET && name == linux.SO_RCVTIMEO { - if len(opt) < linux.SizeOfTimeval { - return syserr.ErrInvalidArgument - } - - var v linux.Timeval - binary.Unmarshal(opt[:linux.SizeOfTimeval], usermem.ByteOrder, &v) - if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { - return syserr.ErrDomain - } - s.SetRecvTimeout(v.ToNsecCapped()) - return nil - } - if level == linux.SOL_SOCKET && name == linux.SO_SNDTIMEO { - if len(opt) < linux.SizeOfTimeval { - return syserr.ErrInvalidArgument - } - - var v linux.Timeval - binary.Unmarshal(opt[:linux.SizeOfTimeval], usermem.ByteOrder, &v) - if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { - return syserr.ErrDomain - } - s.SetSendTimeout(v.ToNsecCapped()) - return nil - } - - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_SetSockOpt{&pb.SetSockOptRequest{Fd: s.fd, Level: int64(level), Name: int64(name), Opt: opt}}}, false /* ignoreResult */) - <-c - - if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_SetSockOpt).SetSockOpt.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -// GetPeerName implements socket.Socket.GetPeerName. -func (s *socketOperations) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_GetPeerName{&pb.GetPeerNameRequest{Fd: s.fd}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_GetPeerName).GetPeerName.Result - if e, ok := res.(*pb.GetPeerNameResponse_ErrorNumber); ok { - return nil, 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - addr := res.(*pb.GetPeerNameResponse_Address).Address - return socket.UnmarshalSockAddr(s.family, addr.Address), addr.Length, nil -} - -// GetSockName implements socket.Socket.GetSockName. -func (s *socketOperations) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_GetSockName{&pb.GetSockNameRequest{Fd: s.fd}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_GetSockName).GetSockName.Result - if e, ok := res.(*pb.GetSockNameResponse_ErrorNumber); ok { - return nil, 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - addr := res.(*pb.GetSockNameResponse_Address).Address - return socket.UnmarshalSockAddr(s.family, addr.Address), addr.Length, nil -} - -func rpcIoctl(t *kernel.Task, fd, cmd uint32, arg []byte) ([]byte, error) { - stack := t.NetworkContext().(*Stack) - - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Ioctl{&pb.IOCtlRequest{Fd: fd, Cmd: cmd, Arg: arg}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Ioctl).Ioctl.Result - if e, ok := res.(*pb.IOCtlResponse_ErrorNumber); ok { - return nil, syscall.Errno(e.ErrorNumber) - } - - return res.(*pb.IOCtlResponse_Value).Value, nil -} - -// ifconfIoctlFromStack populates a struct ifconf for the SIOCGIFCONF ioctl. -func ifconfIoctlFromStack(ctx context.Context, io usermem.IO, ifc *linux.IFConf) error { - // If Ptr is NULL, return the necessary buffer size via Len. - // Otherwise, write up to Len bytes starting at Ptr containing ifreq - // structs. - t := ctx.(*kernel.Task) - s := t.NetworkContext().(*Stack) - if s == nil { - return syserr.ErrNoDevice.ToError() - } - - if ifc.Ptr == 0 { - ifc.Len = int32(len(s.Interfaces())) * int32(linux.SizeOfIFReq) - return nil - } - - max := ifc.Len - ifc.Len = 0 - for key, ifaceAddrs := range s.InterfaceAddrs() { - iface := s.Interfaces()[key] - for _, ifaceAddr := range ifaceAddrs { - // Don't write past the end of the buffer. - if ifc.Len+int32(linux.SizeOfIFReq) > max { - break - } - if ifaceAddr.Family != linux.AF_INET { - continue - } - - // Populate ifr.ifr_addr. - ifr := linux.IFReq{} - ifr.SetName(iface.Name) - usermem.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family)) - usermem.ByteOrder.PutUint16(ifr.Data[2:4], 0) - copy(ifr.Data[4:8], ifaceAddr.Addr[:4]) - - // Copy the ifr to userspace. - dst := uintptr(ifc.Ptr) + uintptr(ifc.Len) - ifc.Len += int32(linux.SizeOfIFReq) - if _, err := usermem.CopyObjectOut(ctx, io, usermem.Addr(dst), ifr, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return err - } - } - } - return nil -} - -// Ioctl implements fs.FileOperations.Ioctl. -func (s *socketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { - t := ctx.(*kernel.Task) - - cmd := uint32(args[1].Int()) - arg := args[2].Pointer() - - var buf []byte - switch cmd { - // The following ioctls take 4 byte argument parameters. - case syscall.TIOCINQ, - syscall.TIOCOUTQ: - buf = make([]byte, 4) - // The following ioctls have args which are sizeof(struct ifreq). - case syscall.SIOCGIFADDR, - syscall.SIOCGIFBRDADDR, - syscall.SIOCGIFDSTADDR, - syscall.SIOCGIFFLAGS, - syscall.SIOCGIFHWADDR, - syscall.SIOCGIFINDEX, - syscall.SIOCGIFMAP, - syscall.SIOCGIFMETRIC, - syscall.SIOCGIFMTU, - syscall.SIOCGIFNAME, - syscall.SIOCGIFNETMASK, - syscall.SIOCGIFTXQLEN: - buf = make([]byte, linux.SizeOfIFReq) - case syscall.SIOCGIFCONF: - // SIOCGIFCONF has slightly different behavior than the others, in that it - // will need to populate the array of ifreqs. - var ifc linux.IFConf - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifc, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return 0, err - } - - if err := ifconfIoctlFromStack(ctx, io, &ifc); err != nil { - return 0, err - } - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), ifc, usermem.IOOpts{ - AddressSpaceActive: true, - }) - - return 0, err - - case linux.SIOCGIFMEM, linux.SIOCGIFPFLAGS, linux.SIOCGMIIPHY, linux.SIOCGMIIREG: - unimpl.EmitUnimplementedEvent(ctx) - - default: - return 0, syserror.ENOTTY - } - - _, err := io.CopyIn(ctx, arg, buf, usermem.IOOpts{ - AddressSpaceActive: true, - }) - - if err != nil { - return 0, err - } - - v, err := rpcIoctl(t, s.fd, cmd, buf) - if err != nil { - return 0, err - } - - if len(v) != len(buf) { - return 0, syserror.EINVAL - } - - _, err = io.CopyOut(ctx, arg, v, usermem.IOOpts{ - AddressSpaceActive: true, - }) - return 0, err -} - -func rpcRecvMsg(t *kernel.Task, req *pb.SyscallRequest_Recvmsg) (*pb.RecvmsgResponse_ResultPayload, *syserr.Error) { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Recvmsg).Recvmsg.Result - if e, ok := res.(*pb.RecvmsgResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.RecvmsgResponse_Payload).Payload, nil -} - -// Because we only support SO_TIMESTAMP we will search control messages for -// that value and set it if so, all other control messages will be ignored. -func (s *socketOperations) extractControlMessages(payload *pb.RecvmsgResponse_ResultPayload) socket.ControlMessages { - c := socket.ControlMessages{} - if len(payload.GetCmsgData()) > 0 { - // Parse the control messages looking for SO_TIMESTAMP. - msgs, e := syscall.ParseSocketControlMessage(payload.GetCmsgData()) - if e != nil { - return socket.ControlMessages{} - } - for _, m := range msgs { - if m.Header.Level != linux.SOL_SOCKET || m.Header.Type != linux.SO_TIMESTAMP { - continue - } - - // Let's parse the time stamp and set it. - if len(m.Data) < linux.SizeOfTimeval { - // Give up on locating the SO_TIMESTAMP option. - return socket.ControlMessages{} - } - - var v linux.Timeval - binary.Unmarshal(m.Data[:linux.SizeOfTimeval], usermem.ByteOrder, &v) - c.IP.HasTimestamp = true - c.IP.Timestamp = v.ToNsecCapped() - break - } - } - return c -} - -// RecvMsg implements socket.Socket.RecvMsg. -func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { - req := &pb.SyscallRequest_Recvmsg{&pb.RecvmsgRequest{ - Fd: s.fd, - Length: uint32(dst.NumBytes()), - Sender: senderRequested, - Trunc: flags&linux.MSG_TRUNC != 0, - Peek: flags&linux.MSG_PEEK != 0, - CmsgLength: uint32(controlDataLen), - }} - - res, err := rpcRecvMsg(t, req) - if err == nil { - var e error - var n int - if len(res.Data) > 0 { - n, e = dst.CopyOut(t, res.Data) - if e == nil && n != len(res.Data) { - panic("CopyOut failed to copy full buffer") - } - } - c := s.extractControlMessages(res) - return int(res.Length), 0, socket.UnmarshalSockAddr(s.family, res.Address.GetAddress()), res.Address.GetLength(), c, syserr.FromError(e) - } - if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain || flags&linux.MSG_DONTWAIT != 0 { - return 0, 0, nil, 0, socket.ControlMessages{}, err - } - - // We'll have to block. Register for notifications and keep trying to - // send all the data. - e, ch := waiter.NewChannelEntry(nil) - s.EventRegister(&e, waiter.EventIn) - defer s.EventUnregister(&e) - - for { - res, err := rpcRecvMsg(t, req) - if err == nil { - var e error - var n int - if len(res.Data) > 0 { - n, e = dst.CopyOut(t, res.Data) - if e == nil && n != len(res.Data) { - panic("CopyOut failed to copy full buffer") - } - } - c := s.extractControlMessages(res) - return int(res.Length), 0, socket.UnmarshalSockAddr(s.family, res.Address.GetAddress()), res.Address.GetLength(), c, syserr.FromError(e) - } - if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain { - return 0, 0, nil, 0, socket.ControlMessages{}, err - } - - if s.isShutRdSet() { - // Blocking would have caused us to block indefinitely so we return 0, - // this is the same behavior as Linux. - return 0, 0, nil, 0, socket.ControlMessages{}, nil - } - - if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { - return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain - } - return 0, 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) - } - } -} - -func rpcSendMsg(t *kernel.Task, req *pb.SyscallRequest_Sendmsg) (uint32, *syserr.Error) { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Sendmsg).Sendmsg.Result - if e, ok := res.(*pb.SendmsgResponse_ErrorNumber); ok { - return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.SendmsgResponse_Length).Length, nil -} - -// SendMsg implements socket.Socket.SendMsg. -func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) { - // Whitelist flags. - if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 { - return 0, syserr.ErrInvalidArgument - } - - // Reject Unix control messages. - if !controlMessages.Unix.Empty() { - return 0, syserr.ErrInvalidArgument - } - - v := buffer.NewView(int(src.NumBytes())) - - // Copy all the data into the buffer. - if _, err := src.CopyIn(t, v); err != nil { - return 0, syserr.FromError(err) - } - - // TODO(bgeffon): this needs to change to map directly to a SendMsg syscall - // in the RPC. - totalWritten := 0 - n, err := rpcSendMsg(t, &pb.SyscallRequest_Sendmsg{&pb.SendmsgRequest{ - Fd: uint32(s.fd), - Data: v, - Address: to, - More: flags&linux.MSG_MORE != 0, - EndOfRecord: flags&linux.MSG_EOR != 0, - }}) - - if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain || flags&linux.MSG_DONTWAIT != 0 { - return int(n), err - } - - if n > 0 { - totalWritten += int(n) - v.TrimFront(int(n)) - } - - // We'll have to block. Register for notification and keep trying to - // send all the data. - e, ch := waiter.NewChannelEntry(nil) - s.EventRegister(&e, waiter.EventOut) - defer s.EventUnregister(&e) - - for { - n, err := rpcSendMsg(t, &pb.SyscallRequest_Sendmsg{&pb.SendmsgRequest{ - Fd: uint32(s.fd), - Data: v, - Address: to, - More: flags&linux.MSG_MORE != 0, - EndOfRecord: flags&linux.MSG_EOR != 0, - }}) - - if n > 0 { - totalWritten += int(n) - v.TrimFront(int(n)) - - if err == nil && totalWritten < int(src.NumBytes()) { - continue - } - } - - if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain { - // We eat the error in this situation. - return int(totalWritten), nil - } - - if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { - return int(totalWritten), syserr.ErrTryAgain - } - return int(totalWritten), syserr.FromError(err) - } - } -} - -// State implements socket.Socket.State. -func (s *socketOperations) State() uint32 { - // TODO(b/127845868): Define a new rpc to query the socket state. - return 0 -} - -// Type implements socket.Socket.Type. -func (s *socketOperations) Type() (family int, skType linux.SockType, protocol int) { - return s.family, s.stype, s.protocol -} - -type socketProvider struct { - family int -} - -// Socket implements socket.Provider.Socket. -func (p *socketProvider) Socket(t *kernel.Task, stypeflags linux.SockType, protocol int) (*fs.File, *syserr.Error) { - // Check that we are using the RPC network stack. - stack := t.NetworkContext() - if stack == nil { - return nil, nil - } - - s, ok := stack.(*Stack) - if !ok { - return nil, nil - } - - // Only accept TCP and UDP. - // - // Try to restrict the flags we will accept to minimize backwards - // incompatibility with netstack. - stype := stypeflags & linux.SOCK_TYPE_MASK - switch stype { - case syscall.SOCK_STREAM: - switch protocol { - case 0, syscall.IPPROTO_TCP: - // ok - default: - return nil, nil - } - case syscall.SOCK_DGRAM: - switch protocol { - case 0, syscall.IPPROTO_UDP: - // ok - default: - return nil, nil - } - default: - return nil, nil - } - - return newSocketFile(t, s, p.family, stype, protocol) -} - -// Pair implements socket.Provider.Pair. -func (p *socketProvider) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error) { - // Not supported by AF_INET/AF_INET6. - return nil, nil, nil -} - -func init() { - for _, family := range []int{syscall.AF_INET, syscall.AF_INET6} { - socket.RegisterProvider(family, &socketProvider{family}) - } -} diff --git a/pkg/sentry/socket/rpcinet/stack.go b/pkg/sentry/socket/rpcinet/stack.go deleted file mode 100644 index f7878a760..000000000 --- a/pkg/sentry/socket/rpcinet/stack.go +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rpcinet - -import ( - "fmt" - "syscall" - - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier" - "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/unet" -) - -// Stack implements inet.Stack for RPC backed sockets. -type Stack struct { - interfaces map[int32]inet.Interface - interfaceAddrs map[int32][]inet.InterfaceAddr - routes []inet.Route - rpcConn *conn.RPCConnection - notifier *notifier.Notifier -} - -// NewStack returns a Stack containing the current state of the host network -// stack. -func NewStack(fd int32) (*Stack, error) { - sock, err := unet.NewSocket(int(fd)) - if err != nil { - return nil, err - } - - stack := &Stack{ - interfaces: make(map[int32]inet.Interface), - interfaceAddrs: make(map[int32][]inet.InterfaceAddr), - rpcConn: conn.NewRPCConnection(sock), - } - - var e error - stack.notifier, e = notifier.NewRPCNotifier(stack.rpcConn) - if e != nil { - return nil, e - } - - links, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETLINK) - if err != nil { - return nil, fmt.Errorf("RTM_GETLINK failed: %v", err) - } - - addrs, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETADDR) - if err != nil { - return nil, fmt.Errorf("RTM_GETADDR failed: %v", err) - } - - e = hostinet.ExtractHostInterfaces(links, addrs, stack.interfaces, stack.interfaceAddrs) - if e != nil { - return nil, e - } - - routes, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETROUTE) - if err != nil { - return nil, fmt.Errorf("RTM_GETROUTE failed: %v", err) - } - - stack.routes, e = hostinet.ExtractHostRoutes(routes) - if e != nil { - return nil, e - } - - return stack, nil -} - -// RPCReadFile will execute the ReadFile helper RPC method which avoids the -// common pattern of open(2), read(2), close(2) by doing all three operations -// as a single RPC. It will read the entire file or return EFBIG if the file -// was too large. -func (s *Stack) RPCReadFile(path string) ([]byte, *syserr.Error) { - return s.rpcConn.RPCReadFile(path) -} - -// RPCWriteFile will execute the WriteFile helper RPC method which avoids the -// common pattern of open(2), write(2), write(2), close(2) by doing all -// operations as a single RPC. -func (s *Stack) RPCWriteFile(path string, data []byte) (int64, *syserr.Error) { - return s.rpcConn.RPCWriteFile(path, data) -} - -// Interfaces implements inet.Stack.Interfaces. -func (s *Stack) Interfaces() map[int32]inet.Interface { - interfaces := make(map[int32]inet.Interface) - for k, v := range s.interfaces { - interfaces[k] = v - } - return interfaces -} - -// InterfaceAddrs implements inet.Stack.InterfaceAddrs. -func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { - addrs := make(map[int32][]inet.InterfaceAddr) - for k, v := range s.interfaceAddrs { - addrs[k] = append([]inet.InterfaceAddr(nil), v...) - } - return addrs -} - -// SupportsIPv6 implements inet.Stack.SupportsIPv6. -func (s *Stack) SupportsIPv6() bool { - panic("rpcinet handles procfs directly this method should not be called") -} - -// TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize. -func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { - panic("rpcinet handles procfs directly this method should not be called") -} - -// SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize. -func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error { - panic("rpcinet handles procfs directly this method should not be called") - -} - -// TCPSendBufferSize implements inet.Stack.TCPSendBufferSize. -func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) { - panic("rpcinet handles procfs directly this method should not be called") - -} - -// SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize. -func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error { - panic("rpcinet handles procfs directly this method should not be called") -} - -// TCPSACKEnabled implements inet.Stack.TCPSACKEnabled. -func (s *Stack) TCPSACKEnabled() (bool, error) { - panic("rpcinet handles procfs directly this method should not be called") -} - -// SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled. -func (s *Stack) SetTCPSACKEnabled(enabled bool) error { - panic("rpcinet handles procfs directly this method should not be called") -} - -// Statistics implements inet.Stack.Statistics. -func (s *Stack) Statistics(stat interface{}, arg string) error { - return syserr.ErrEndpointOperation.ToError() -} - -// RouteTable implements inet.Stack.RouteTable. -func (s *Stack) RouteTable() []inet.Route { - return append([]inet.Route(nil), s.routes...) -} - -// Resume implements inet.Stack.Resume. -func (s *Stack) Resume() {} - -// RegisteredEndpoints implements inet.Stack.RegisteredEndpoints. -func (s *Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil } - -// CleanupEndpoints implements inet.Stack.CleanupEndpoints. -func (s *Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil } - -// RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints. -func (s *Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {} diff --git a/pkg/sentry/socket/rpcinet/stack_unsafe.go b/pkg/sentry/socket/rpcinet/stack_unsafe.go deleted file mode 100644 index a94bdad83..000000000 --- a/pkg/sentry/socket/rpcinet/stack_unsafe.go +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rpcinet - -import ( - "syscall" - "unsafe" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" - pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserr" -) - -// NewNetlinkRouteRequest builds a netlink message for getting the RIB, -// the routing information base. -func newNetlinkRouteRequest(proto, seq, family int) []byte { - rr := &syscall.NetlinkRouteRequest{} - rr.Header.Len = uint32(syscall.NLMSG_HDRLEN + syscall.SizeofRtGenmsg) - rr.Header.Type = uint16(proto) - rr.Header.Flags = syscall.NLM_F_DUMP | syscall.NLM_F_REQUEST - rr.Header.Seq = uint32(seq) - rr.Data.Family = uint8(family) - return netlinkRRtoWireFormat(rr) -} - -func netlinkRRtoWireFormat(rr *syscall.NetlinkRouteRequest) []byte { - b := make([]byte, rr.Header.Len) - *(*uint32)(unsafe.Pointer(&b[0:4][0])) = rr.Header.Len - *(*uint16)(unsafe.Pointer(&b[4:6][0])) = rr.Header.Type - *(*uint16)(unsafe.Pointer(&b[6:8][0])) = rr.Header.Flags - *(*uint32)(unsafe.Pointer(&b[8:12][0])) = rr.Header.Seq - *(*uint32)(unsafe.Pointer(&b[12:16][0])) = rr.Header.Pid - b[16] = byte(rr.Data.Family) - return b -} - -func (s *Stack) getNetlinkFd() (uint32, *syserr.Error) { - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Socket{&pb.SocketRequest{Family: int64(syscall.AF_NETLINK), Type: int64(syscall.SOCK_RAW | syscall.SOCK_NONBLOCK), Protocol: int64(syscall.NETLINK_ROUTE)}}}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Socket).Socket.Result - if e, ok := res.(*pb.SocketResponse_ErrorNumber); ok { - return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - return res.(*pb.SocketResponse_Fd).Fd, nil -} - -func (s *Stack) bindNetlinkFd(fd uint32, sockaddr []byte) *syserr.Error { - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Bind{&pb.BindRequest{Fd: fd, Address: sockaddr}}}, false /* ignoreResult */) - <-c - - if e := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Bind).Bind.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -func (s *Stack) closeNetlinkFd(fd uint32) { - _, _ = s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Close{&pb.CloseRequest{Fd: fd}}}, true /* ignoreResult */) -} - -func (s *Stack) rpcSendMsg(req *pb.SyscallRequest_Sendmsg) (uint32, *syserr.Error) { - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Sendmsg).Sendmsg.Result - if e, ok := res.(*pb.SendmsgResponse_ErrorNumber); ok { - return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.SendmsgResponse_Length).Length, nil -} - -func (s *Stack) sendMsg(fd uint32, buf []byte, to []byte, flags int) (int, *syserr.Error) { - // Whitelist flags. - if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 { - return 0, syserr.ErrInvalidArgument - } - - req := &pb.SyscallRequest_Sendmsg{&pb.SendmsgRequest{ - Fd: fd, - Data: buf, - Address: to, - More: flags&linux.MSG_MORE != 0, - EndOfRecord: flags&linux.MSG_EOR != 0, - }} - - n, err := s.rpcSendMsg(req) - return int(n), err -} - -func (s *Stack) rpcRecvMsg(req *pb.SyscallRequest_Recvmsg) (*pb.RecvmsgResponse_ResultPayload, *syserr.Error) { - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Recvmsg).Recvmsg.Result - if e, ok := res.(*pb.RecvmsgResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.RecvmsgResponse_Payload).Payload, nil -} - -func (s *Stack) recvMsg(fd, l, flags uint32) ([]byte, *syserr.Error) { - req := &pb.SyscallRequest_Recvmsg{&pb.RecvmsgRequest{ - Fd: fd, - Length: l, - Sender: false, - Trunc: flags&linux.MSG_TRUNC != 0, - Peek: flags&linux.MSG_PEEK != 0, - }} - - res, err := s.rpcRecvMsg(req) - if err != nil { - return nil, err - } - return res.Data, nil -} - -func (s *Stack) netlinkRequest(proto, family int) ([]byte, error) { - fd, err := s.getNetlinkFd() - if err != nil { - return nil, err.ToError() - } - defer s.closeNetlinkFd(fd) - - lsa := syscall.SockaddrNetlink{Family: syscall.AF_NETLINK} - b := binary.Marshal(nil, usermem.ByteOrder, &lsa) - if err := s.bindNetlinkFd(fd, b); err != nil { - return nil, err.ToError() - } - - wb := newNetlinkRouteRequest(proto, 1, family) - _, err = s.sendMsg(fd, wb, b, 0) - if err != nil { - return nil, err.ToError() - } - - var tab []byte -done: - for { - rb, err := s.recvMsg(fd, uint32(syscall.Getpagesize()), 0) - nr := len(rb) - if err != nil { - return nil, err.ToError() - } - - if nr < syscall.NLMSG_HDRLEN { - return nil, syserr.ErrInvalidArgument.ToError() - } - - tab = append(tab, rb...) - msgs, e := syscall.ParseNetlinkMessage(rb) - if e != nil { - return nil, e - } - - for _, m := range msgs { - if m.Header.Type == syscall.NLMSG_DONE { - break done - } - if m.Header.Type == syscall.NLMSG_ERROR { - return nil, syserr.ErrInvalidArgument.ToError() - } - } - } - - return tab, nil -} - -// DoNetlinkRouteRequest returns routing information base, also known as RIB, -// which consists of network facility information, states and parameters. -func (s *Stack) DoNetlinkRouteRequest(req int) ([]syscall.NetlinkMessage, error) { - data, err := s.netlinkRequest(req, syscall.AF_UNSPEC) - if err != nil { - return nil, err - } - return syscall.ParseNetlinkMessage(data) -} diff --git a/pkg/sentry/socket/rpcinet/syscall_rpc.proto b/pkg/sentry/socket/rpcinet/syscall_rpc.proto deleted file mode 100644 index b677e9eb3..000000000 --- a/pkg/sentry/socket/rpcinet/syscall_rpc.proto +++ /dev/null @@ -1,352 +0,0 @@ -syntax = "proto3"; - -// package syscall_rpc is a set of networking related system calls that can be -// forwarded to a socket gofer. -// -package syscall_rpc; - -message SendmsgRequest { - uint32 fd = 1; - bytes data = 2 [ctype = CORD]; - bytes address = 3; - bool more = 4; - bool end_of_record = 5; -} - -message SendmsgResponse { - oneof result { - uint32 error_number = 1; - uint32 length = 2; - } -} - -message IOCtlRequest { - uint32 fd = 1; - uint32 cmd = 2; - bytes arg = 3; -} - -message IOCtlResponse { - oneof result { - uint32 error_number = 1; - bytes value = 2; - } -} - -message RecvmsgRequest { - uint32 fd = 1; - uint32 length = 2; - bool sender = 3; - bool peek = 4; - bool trunc = 5; - uint32 cmsg_length = 6; -} - -message OpenRequest { - bytes path = 1; - uint32 flags = 2; - uint32 mode = 3; -} - -message OpenResponse { - oneof result { - uint32 error_number = 1; - uint32 fd = 2; - } -} - -message ReadRequest { - uint32 fd = 1; - uint32 length = 2; -} - -message ReadResponse { - oneof result { - uint32 error_number = 1; - bytes data = 2 [ctype = CORD]; - } -} - -message ReadFileRequest { - string path = 1; -} - -message ReadFileResponse { - oneof result { - uint32 error_number = 1; - bytes data = 2 [ctype = CORD]; - } -} - -message WriteRequest { - uint32 fd = 1; - bytes data = 2 [ctype = CORD]; -} - -message WriteResponse { - oneof result { - uint32 error_number = 1; - uint32 length = 2; - } -} - -message WriteFileRequest { - string path = 1; - bytes content = 2; -} - -message WriteFileResponse { - uint32 error_number = 1; - uint32 written = 2; -} - -message AddressResponse { - bytes address = 1; - uint32 length = 2; -} - -message RecvmsgResponse { - message ResultPayload { - bytes data = 1 [ctype = CORD]; - AddressResponse address = 2; - uint32 length = 3; - bytes cmsg_data = 4; - } - oneof result { - uint32 error_number = 1; - ResultPayload payload = 2; - } -} - -message BindRequest { - uint32 fd = 1; - bytes address = 2; -} - -message BindResponse { - uint32 error_number = 1; -} - -message AcceptRequest { - uint32 fd = 1; - bool peer = 2; - int64 flags = 3; -} - -message AcceptResponse { - message ResultPayload { - uint32 fd = 1; - AddressResponse address = 2; - } - oneof result { - uint32 error_number = 1; - ResultPayload payload = 2; - } -} - -message ConnectRequest { - uint32 fd = 1; - bytes address = 2; -} - -message ConnectResponse { - uint32 error_number = 1; -} - -message ListenRequest { - uint32 fd = 1; - int64 backlog = 2; -} - -message ListenResponse { - uint32 error_number = 1; -} - -message ShutdownRequest { - uint32 fd = 1; - int64 how = 2; -} - -message ShutdownResponse { - uint32 error_number = 1; -} - -message CloseRequest { - uint32 fd = 1; -} - -message CloseResponse { - uint32 error_number = 1; -} - -message GetSockOptRequest { - uint32 fd = 1; - int64 level = 2; - int64 name = 3; - uint32 length = 4; -} - -message GetSockOptResponse { - oneof result { - uint32 error_number = 1; - bytes opt = 2; - } -} - -message SetSockOptRequest { - uint32 fd = 1; - int64 level = 2; - int64 name = 3; - bytes opt = 4; -} - -message SetSockOptResponse { - uint32 error_number = 1; -} - -message GetSockNameRequest { - uint32 fd = 1; -} - -message GetSockNameResponse { - oneof result { - uint32 error_number = 1; - AddressResponse address = 2; - } -} - -message GetPeerNameRequest { - uint32 fd = 1; -} - -message GetPeerNameResponse { - oneof result { - uint32 error_number = 1; - AddressResponse address = 2; - } -} - -message SocketRequest { - int64 family = 1; - int64 type = 2; - int64 protocol = 3; -} - -message SocketResponse { - oneof result { - uint32 error_number = 1; - uint32 fd = 2; - } -} - -message EpollWaitRequest { - uint32 fd = 1; - uint32 num_events = 2; - sint64 msec = 3; -} - -message EpollEvent { - uint32 fd = 1; - uint32 events = 2; -} - -message EpollEvents { - repeated EpollEvent events = 1; -} - -message EpollWaitResponse { - oneof result { - uint32 error_number = 1; - EpollEvents events = 2; - } -} - -message EpollCtlRequest { - uint32 epfd = 1; - int64 op = 2; - uint32 fd = 3; - EpollEvent event = 4; -} - -message EpollCtlResponse { - uint32 error_number = 1; -} - -message EpollCreate1Request { - int64 flag = 1; -} - -message EpollCreate1Response { - oneof result { - uint32 error_number = 1; - uint32 fd = 2; - } -} - -message PollRequest { - uint32 fd = 1; - uint32 events = 2; -} - -message PollResponse { - oneof result { - uint32 error_number = 1; - uint32 events = 2; - } -} - -message SyscallRequest { - oneof args { - SocketRequest socket = 1; - SendmsgRequest sendmsg = 2; - RecvmsgRequest recvmsg = 3; - BindRequest bind = 4; - AcceptRequest accept = 5; - ConnectRequest connect = 6; - ListenRequest listen = 7; - ShutdownRequest shutdown = 8; - CloseRequest close = 9; - GetSockOptRequest get_sock_opt = 10; - SetSockOptRequest set_sock_opt = 11; - GetSockNameRequest get_sock_name = 12; - GetPeerNameRequest get_peer_name = 13; - EpollWaitRequest epoll_wait = 14; - EpollCtlRequest epoll_ctl = 15; - EpollCreate1Request epoll_create1 = 16; - PollRequest poll = 17; - ReadRequest read = 18; - WriteRequest write = 19; - OpenRequest open = 20; - IOCtlRequest ioctl = 21; - WriteFileRequest write_file = 22; - ReadFileRequest read_file = 23; - } -} - -message SyscallResponse { - oneof result { - SocketResponse socket = 1; - SendmsgResponse sendmsg = 2; - RecvmsgResponse recvmsg = 3; - BindResponse bind = 4; - AcceptResponse accept = 5; - ConnectResponse connect = 6; - ListenResponse listen = 7; - ShutdownResponse shutdown = 8; - CloseResponse close = 9; - GetSockOptResponse get_sock_opt = 10; - SetSockOptResponse set_sock_opt = 11; - GetSockNameResponse get_sock_name = 12; - GetPeerNameResponse get_peer_name = 13; - EpollWaitResponse epoll_wait = 14; - EpollCtlResponse epoll_ctl = 15; - EpollCreate1Response epoll_create1 = 16; - PollResponse poll = 17; - ReadResponse read = 18; - WriteResponse write = 19; - OpenResponse open = 20; - IOCtlResponse ioctl = 21; - WriteFileResponse write_file = 22; - ReadFileResponse read_file = 23; - } -} -- cgit v1.2.3 From 47d85257d3d015f0b9f7739c81af0ee9f510aaf5 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Fri, 17 Jan 2020 18:24:39 -0800 Subject: Filter out received packets with a local source IP address. CERT Advisory CA-96.21 III. Solution advises that devices drop packets which could not have correctly arrived on the wire, such as receiving a packet where the source IP address is owned by the device that sent it. Fixes #1507 PiperOrigin-RevId: 290378240 --- pkg/sentry/socket/netstack/netstack.go | 15 +++++----- pkg/sentry/socket/netstack/stack.go | 38 ++++++++++++------------- pkg/tcpip/stack/nic.go | 14 +++++++-- pkg/tcpip/tcpip.go | 10 +++++-- pkg/tcpip/transport/udp/udp_test.go | 52 ++++++++++++++++++++++++++++++++-- 5 files changed, 95 insertions(+), 34 deletions(-) (limited to 'pkg/sentry/socket') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index d2f7e987d..fec575357 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -138,13 +138,14 @@ var Metrics = tcpip.Stats{ }, }, IP: tcpip.IPStats{ - PacketsReceived: mustCreateMetric("/netstack/ip/packets_received", "Total number of IP packets received from the link layer in nic.DeliverNetworkPacket."), - InvalidAddressesReceived: mustCreateMetric("/netstack/ip/invalid_addresses_received", "Total number of IP packets received with an unknown or invalid destination address."), - PacketsDelivered: mustCreateMetric("/netstack/ip/packets_delivered", "Total number of incoming IP packets that are successfully delivered to the transport layer via HandlePacket."), - PacketsSent: mustCreateMetric("/netstack/ip/packets_sent", "Total number of IP packets sent via WritePacket."), - OutgoingPacketErrors: mustCreateMetric("/netstack/ip/outgoing_packet_errors", "Total number of IP packets which failed to write to a link-layer endpoint."), - MalformedPacketsReceived: mustCreateMetric("/netstack/ip/malformed_packets_received", "Total number of IP packets which failed IP header validation checks."), - MalformedFragmentsReceived: mustCreateMetric("/netstack/ip/malformed_fragments_received", "Total number of IP fragments which failed IP fragment validation checks."), + PacketsReceived: mustCreateMetric("/netstack/ip/packets_received", "Total number of IP packets received from the link layer in nic.DeliverNetworkPacket."), + InvalidDestinationAddressesReceived: mustCreateMetric("/netstack/ip/invalid_addresses_received", "Total number of IP packets received with an unknown or invalid destination address."), + InvalidSourceAddressesReceived: mustCreateMetric("/netstack/ip/invalid_source_addresses_received", "Total number of IP packets received with an unknown or invalid source address."), + PacketsDelivered: mustCreateMetric("/netstack/ip/packets_delivered", "Total number of incoming IP packets that are successfully delivered to the transport layer via HandlePacket."), + PacketsSent: mustCreateMetric("/netstack/ip/packets_sent", "Total number of IP packets sent via WritePacket."), + OutgoingPacketErrors: mustCreateMetric("/netstack/ip/outgoing_packet_errors", "Total number of IP packets which failed to write to a link-layer endpoint."), + MalformedPacketsReceived: mustCreateMetric("/netstack/ip/malformed_packets_received", "Total number of IP packets which failed IP header validation checks."), + MalformedFragmentsReceived: mustCreateMetric("/netstack/ip/malformed_fragments_received", "Total number of IP fragments which failed IP fragment validation checks."), }, TCP: tcpip.TCPStats{ ActiveConnectionOpenings: mustCreateMetric("/netstack/tcp/active_connection_openings", "Number of connections opened successfully via Connect."), diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index a0db2d4fd..31ea66eca 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -148,25 +148,25 @@ func (s *Stack) Statistics(stat interface{}, arg string) error { case *inet.StatSNMPIP: ip := Metrics.IP *stats = inet.StatSNMPIP{ - 0, // TODO(gvisor.dev/issue/969): Support Ip/Forwarding. - 0, // TODO(gvisor.dev/issue/969): Support Ip/DefaultTTL. - ip.PacketsReceived.Value(), // InReceives. - 0, // TODO(gvisor.dev/issue/969): Support Ip/InHdrErrors. - ip.InvalidAddressesReceived.Value(), // InAddrErrors. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ForwDatagrams. - 0, // TODO(gvisor.dev/issue/969): Support Ip/InUnknownProtos. - 0, // TODO(gvisor.dev/issue/969): Support Ip/InDiscards. - ip.PacketsDelivered.Value(), // InDelivers. - ip.PacketsSent.Value(), // OutRequests. - ip.OutgoingPacketErrors.Value(), // OutDiscards. - 0, // TODO(gvisor.dev/issue/969): Support Ip/OutNoRoutes. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmTimeout. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmReqds. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmOKs. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmFails. - 0, // TODO(gvisor.dev/issue/969): Support Ip/FragOKs. - 0, // TODO(gvisor.dev/issue/969): Support Ip/FragFails. - 0, // TODO(gvisor.dev/issue/969): Support Ip/FragCreates. + 0, // TODO(gvisor.dev/issue/969): Support Ip/Forwarding. + 0, // TODO(gvisor.dev/issue/969): Support Ip/DefaultTTL. + ip.PacketsReceived.Value(), // InReceives. + 0, // TODO(gvisor.dev/issue/969): Support Ip/InHdrErrors. + ip.InvalidDestinationAddressesReceived.Value(), // InAddrErrors. + 0, // TODO(gvisor.dev/issue/969): Support Ip/ForwDatagrams. + 0, // TODO(gvisor.dev/issue/969): Support Ip/InUnknownProtos. + 0, // TODO(gvisor.dev/issue/969): Support Ip/InDiscards. + ip.PacketsDelivered.Value(), // InDelivers. + ip.PacketsSent.Value(), // OutRequests. + ip.OutgoingPacketErrors.Value(), // OutDiscards. + 0, // TODO(gvisor.dev/issue/969): Support Ip/OutNoRoutes. + 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmTimeout. + 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmReqds. + 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmOKs. + 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmFails. + 0, // TODO(gvisor.dev/issue/969): Support Ip/FragOKs. + 0, // TODO(gvisor.dev/issue/969): Support Ip/FragFails. + 0, // TODO(gvisor.dev/issue/969): Support Ip/FragCreates. } case *inet.StatSNMPICMP: in := Metrics.ICMP.V4PacketsReceived.ICMPv4PacketStats diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 53abf29e5..4afe7b744 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -984,7 +984,7 @@ func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, // DeliverNetworkPacket finds the appropriate network protocol endpoint and // hands the packet over for further processing. This function is called when -// the NIC receives a packet from the physical interface. +// the NIC receives a packet from the link endpoint. // Note that the ownership of the slice backing vv is retained by the caller. // This rule applies only to the slice itself, not to the items of the slice; // the ownership of the items is not retained by the caller. @@ -1029,6 +1029,14 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link src, dst := netProto.ParseAddresses(pkt.Data.First()) + if n.stack.handleLocal && !n.isLoopback() && n.getRef(protocol, src) != nil { + // The source address is one of our own, so we never should have gotten a + // packet like this unless handleLocal is false. Loopback also calls this + // function even though the packets didn't come from the physical interface + // so don't drop those. + n.stack.stats.IP.InvalidSourceAddressesReceived.Increment() + return + } if ref := n.getRef(protocol, dst); ref != nil { handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, pkt) return @@ -1041,7 +1049,7 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link if n.stack.Forwarding() { r, err := n.stack.FindRoute(0, "", dst, protocol, false /* multicastLoop */) if err != nil { - n.stack.stats.IP.InvalidAddressesReceived.Increment() + n.stack.stats.IP.InvalidDestinationAddressesReceived.Increment() return } defer r.Release() @@ -1079,7 +1087,7 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link // If a packet socket handled the packet, don't treat it as invalid. if len(packetEPs) == 0 { - n.stack.stats.IP.InvalidAddressesReceived.Increment() + n.stack.stats.IP.InvalidDestinationAddressesReceived.Increment() } } diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index b7813cbc0..6243762e3 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -903,9 +903,13 @@ type IPStats struct { // link layer in nic.DeliverNetworkPacket. PacketsReceived *StatCounter - // InvalidAddressesReceived is the total number of IP packets received - // with an unknown or invalid destination address. - InvalidAddressesReceived *StatCounter + // InvalidDestinationAddressesReceived is the total number of IP packets + // received with an unknown or invalid destination address. + InvalidDestinationAddressesReceived *StatCounter + + // InvalidSourceAddressesReceived is the total number of IP packets received + // with a source address that should never have been received on the wire. + InvalidSourceAddressesReceived *StatCounter // PacketsDelivered is the total number of incoming IP packets that // are successfully delivered to the transport layer via HandlePacket. diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index ee9d10555..51bb61167 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -274,11 +274,16 @@ type testContext struct { func newDualTestContext(t *testing.T, mtu uint32) *testContext { t.Helper() - - s := stack.New(stack.Options{ + return newDualTestContextWithOptions(t, mtu, stack.Options{ NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()}, TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()}, }) +} + +func newDualTestContextWithOptions(t *testing.T, mtu uint32, options stack.Options) *testContext { + t.Helper() + + s := stack.New(options) ep := channel.New(256, mtu, "") wep := stack.LinkEndpoint(ep) @@ -763,6 +768,49 @@ func TestV6ReadOnV6(t *testing.T) { testRead(c, unicastV6) } +// TestV4ReadSelfSource checks that packets coming from a local IP address are +// correctly dropped when handleLocal is true and not otherwise. +func TestV4ReadSelfSource(t *testing.T) { + for _, tt := range []struct { + name string + handleLocal bool + wantErr *tcpip.Error + wantInvalidSource uint64 + }{ + {"HandleLocal", false, nil, 0}, + {"NoHandleLocal", true, tcpip.ErrWouldBlock, 1}, + } { + t.Run(tt.name, func(t *testing.T) { + c := newDualTestContextWithOptions(t, defaultMTU, stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()}, + TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()}, + HandleLocal: tt.handleLocal, + }) + defer c.cleanup() + + c.createEndpointForFlow(unicastV4) + + if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { + t.Fatalf("Bind failed: %s", err) + } + + payload := newPayload() + h := unicastV4.header4Tuple(incoming) + h.srcAddr = h.dstAddr + + c.injectV4Packet(payload, &h, true /* valid */) + + if got := c.s.Stats().IP.InvalidSourceAddressesReceived.Value(); got != tt.wantInvalidSource { + t.Errorf("c.s.Stats().IP.InvalidSourceAddressesReceived got %d, want %d", got, tt.wantInvalidSource) + } + + if _, _, err := c.ep.Read(nil); err != tt.wantErr { + t.Errorf("c.ep.Read() got error %v, want %v", err, tt.wantErr) + } + }) + } +} + func TestV4ReadOnV4(t *testing.T) { c := newDualTestContext(t, defaultMTU) defer c.cleanup() -- cgit v1.2.3