From 2572af38860ed8a5aa37f0ad705a360628a4ed10 Mon Sep 17 00:00:00 2001 From: Ghanan Gowripalan Date: Thu, 9 Sep 2021 17:07:37 -0700 Subject: Remove linux-compat loopback hacks from packet endpoint Previously, gVisor did not represent loopback devices as an ethernet device as Linux does. To maintain Linux API compatibility for packet sockets, a workaround was used to add an ethernet header if a link header was not already present in the packet buffer delivered to a packet endpoint. However, this workaround is a bug for non-ethernet based interfaces; not all links use an ethernet header (e.g. pure L3/TUN interfaces). As of 3b4bb947517d0d9010120aaa1c3989fd6abf278e, gVisor represents loopback devices as an ethernet-based device so this workaround can now be removed. BUG: https://fxbug.dev/81592 Updates #6530, #6531. PiperOrigin-RevId: 395819151 --- test/syscalls/linux/tuntap.cc | 210 ++++++++++++++++++++++++++++++++---------- 1 file changed, 163 insertions(+), 47 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc index 7c9c5c870..956208bee 100644 --- a/test/syscalls/linux/tuntap.cc +++ b/test/syscalls/linux/tuntap.cc @@ -73,29 +73,14 @@ PosixErrorOr GetLinkByName(const std::string& name) { return PosixError(ENOENT, "interface not found"); } -struct pihdr { - uint16_t pi_flags; - uint16_t pi_protocol; -} __attribute__((packed)); - -struct ping_pkt { - pihdr pi; - struct ethhdr eth; - struct iphdr ip; - struct icmphdr icmp; +struct ping_ip_pkt { + iphdr ip; + icmphdr icmp; char payload[64]; } __attribute__((packed)); -ping_pkt CreatePingPacket(const uint8_t srcmac[ETH_ALEN], const in_addr_t srcip, - const uint8_t dstmac[ETH_ALEN], - const in_addr_t dstip) { - ping_pkt pkt = {}; - - pkt.pi.pi_protocol = htons(ETH_P_IP); - - memcpy(pkt.eth.h_dest, dstmac, sizeof(pkt.eth.h_dest)); - memcpy(pkt.eth.h_source, srcmac, sizeof(pkt.eth.h_source)); - pkt.eth.h_proto = htons(ETH_P_IP); +ping_ip_pkt CreatePingIPPacket(const in_addr_t srcip, const in_addr_t dstip) { + ping_ip_pkt pkt = {}; pkt.ip.ihl = 5; pkt.ip.version = 4; @@ -122,6 +107,33 @@ ping_pkt CreatePingPacket(const uint8_t srcmac[ETH_ALEN], const in_addr_t srcip, return pkt; } +struct pihdr { + uint16_t pi_flags; + uint16_t pi_protocol; +} __attribute__((packed)); + +struct ping_pkt { + pihdr pi; + ethhdr eth; + ping_ip_pkt ip_pkt; +} __attribute__((packed)); + +ping_pkt CreatePingPacket(const uint8_t srcmac[ETH_ALEN], const in_addr_t srcip, + const uint8_t dstmac[ETH_ALEN], + const in_addr_t dstip) { + ping_pkt pkt = {}; + + pkt.pi.pi_protocol = htons(ETH_P_IP); + + memcpy(pkt.eth.h_dest, dstmac, sizeof(pkt.eth.h_dest)); + memcpy(pkt.eth.h_source, srcmac, sizeof(pkt.eth.h_source)); + pkt.eth.h_proto = htons(ETH_P_IP); + + pkt.ip_pkt = CreatePingIPPacket(srcip, dstip); + + return pkt; +} + struct arp_pkt { pihdr pi; struct ethhdr eth; @@ -274,13 +286,26 @@ TEST_F(TuntapTest, WriteToDownDevice) { EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EIO)); } -PosixErrorOr OpenAndAttachTap(const std::string& dev_name, - const in_addr_t dev_addr) { +struct TunTapInterface { + FileDescriptor fd; + Link link; +}; + +PosixErrorOr OpenAndAttachTunTap(const std::string& dev_name, + const in_addr_t dev_addr, + bool tap, bool no_pi) { // Interface creation. ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, Open(kDevNetTun, O_RDWR)); struct ifreq ifr_set = {}; - ifr_set.ifr_flags = IFF_TAP; + if (tap) { + ifr_set.ifr_flags |= IFF_TAP; + } else { + ifr_set.ifr_flags |= IFF_TUN; + } + if (no_pi) { + ifr_set.ifr_flags |= IFF_NO_PI; + } strncpy(ifr_set.ifr_name, dev_name.c_str(), IFNAMSIZ); if (ioctl(fd.get(), TUNSETIFF, &ifr_set) < 0) { return PosixError(errno); @@ -296,13 +321,15 @@ PosixErrorOr OpenAndAttachTap(const std::string& dev_name, if (!IsRunningOnGvisor()) { // FIXME(b/110961832): gVisor doesn't support setting MAC address on // interfaces yet. - RETURN_IF_ERRNO(LinkSetMacAddr(link.index, kMacA, sizeof(kMacA))); + if (tap) { + RETURN_IF_ERRNO(LinkSetMacAddr(link.index, kMacA, sizeof(kMacA))); + } // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces. RETURN_IF_ERRNO(LinkChangeFlags(link.index, IFF_UP, IFF_UP)); } - return fd; + return TunTapInterface{.fd = std::move(fd), .link = std::move(link)}; } // This test sets up a TAP device and pings kernel by sending ICMP echo request. @@ -322,8 +349,9 @@ PosixErrorOr OpenAndAttachTap(const std::string& dev_name, TEST_F(TuntapTest, PingKernel) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, kTapIPAddr)); + const auto& [fd, link] = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap( + kTapName, kTapIPAddr, true /* tap */, false /* no_pi */)); + ping_pkt ping_req = CreatePingPacket(kMacB, kTapPeerIPAddr, kMacA, kTapIPAddr); std::string arp_rep = @@ -365,10 +393,10 @@ TEST_F(TuntapTest, PingKernel) { // Process ping response packet. if (n >= sizeof(ping_pkt) && r.pi.pi_protocol == ping_req.pi.pi_protocol && - r.ping.ip.protocol == ping_req.ip.protocol && - !memcmp(&r.ping.ip.saddr, &ping_req.ip.daddr, kIPLen) && - !memcmp(&r.ping.ip.daddr, &ping_req.ip.saddr, kIPLen) && - r.ping.icmp.type == 0 && r.ping.icmp.code == 0) { + r.ping.ip_pkt.ip.protocol == ping_req.ip_pkt.ip.protocol && + !memcmp(&r.ping.ip_pkt.ip.saddr, &ping_req.ip_pkt.ip.daddr, kIPLen) && + !memcmp(&r.ping.ip_pkt.ip.daddr, &ping_req.ip_pkt.ip.saddr, kIPLen) && + r.ping.ip_pkt.icmp.type == 0 && r.ping.ip_pkt.icmp.code == 0) { // Ends and passes the test. break; } @@ -378,8 +406,8 @@ TEST_F(TuntapTest, PingKernel) { TEST_F(TuntapTest, SendUdpTriggersArpResolution) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, kTapIPAddr)); + const auto& [fd, link] = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap( + kTapName, kTapIPAddr, true /* tap */, false /* no_pi */)); // Send a UDP packet to remote. int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); @@ -433,19 +461,17 @@ TEST_F(TuntapTest, TUNNoPacketInfo) { EXPECT_NO_ERRNO(LinkAddLocalAddr(link.index, AF_INET, 24, &dev_ipv4_addr, sizeof(dev_ipv4_addr))); - ping_pkt ping_req = - CreatePingPacket(kMacB, kTapPeerIPAddr, kMacA, kTapIPAddr); - size_t packet_size = sizeof(ping_req) - offsetof(ping_pkt, ip); + ping_ip_pkt ping_req = CreatePingIPPacket(kTapPeerIPAddr, kTapIPAddr); // Send ICMP query - EXPECT_THAT(write(fd.get(), &ping_req.ip, packet_size), - SyscallSucceedsWithValue(packet_size)); + EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)), + SyscallSucceedsWithValue(sizeof(ping_req))); // Receive loop to process inbound packets. while (1) { - ping_pkt ping_resp = {}; - EXPECT_THAT(read(fd.get(), &ping_resp.ip, packet_size), - SyscallSucceedsWithValue(packet_size)); + ping_ip_pkt ping_resp = {}; + EXPECT_THAT(read(fd.get(), &ping_resp, sizeof(ping_req)), + SyscallSucceedsWithValue(sizeof(ping_req))); // Process ping response packet. if (!memcmp(&ping_resp.ip.saddr, &ping_req.ip.daddr, kIPLen) && @@ -465,8 +491,8 @@ TEST_F(TuntapTest, TCPBlockingConnectFailsArpResolution) { FileDescriptor sender = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)); - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, kTapIPAddr)); + const auto tuntap = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap( + kTapName, kTapIPAddr, true /* tap */, false /* no_pi */)); sockaddr_in connect_addr = { .sin_family = AF_INET, @@ -487,8 +513,8 @@ TEST_F(TuntapTest, TCPNonBlockingConnectFailsArpResolution) { FileDescriptor sender = ASSERT_NO_ERRNO_AND_VALUE( Socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, kTapIPAddr)); + const auto tuntap = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap( + kTapName, kTapIPAddr, true /* tap */, false /* no_pi */)); sockaddr_in connect_addr = { .sin_family = AF_INET, @@ -518,8 +544,8 @@ TEST_F(TuntapTest, TCPNonBlockingConnectFailsArpResolution) { TEST_F(TuntapTest, WriteHangBug155928773) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, kTapIPAddr)); + const auto tuntap = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap( + kTapName, kTapIPAddr, true /* tap */, false /* no_pi */)); int sock = socket(AF_INET, SOCK_DGRAM, 0); ASSERT_THAT(sock, SyscallSucceeds()); @@ -534,5 +560,95 @@ TEST_F(TuntapTest, WriteHangBug155928773) { write(sock, "hello", 5); } +// Test that raw packet sockets do not need/include link headers when +// sending/receiving packets to/from pure L3 (e.g. TUN) interfaces. +TEST_F(TuntapTest, RawPacketSocket) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + auto [tun, link] = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap( + kTunName, kTapIPAddr, false /* tap */, true /* no_pi */)); + FileDescriptor packet_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_PACKET, SOCK_RAW, htons(ETH_P_IP))); + + constexpr int kInfiniteTimeout = -1; + + uint8_t hardware_address_length = 0; + if (IsRunningOnGvisor()) { + // TODO(https://gvisor.dev/issue/6530): Do not assume all interfaces have + // an ethernet address. + hardware_address_length = ETH_ALEN; + } + + { + const ping_ip_pkt ping_req = CreatePingIPPacket(kTapPeerIPAddr, kTapIPAddr); + ASSERT_THAT(write(tun.get(), &ping_req, sizeof(ping_req)), + SyscallSucceedsWithValue(sizeof(ping_req))); + // Wait for the packet socket to become readable. + pollfd pfd = { + .fd = packet_sock.get(), + .events = POLLIN, + }; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, kInfiniteTimeout), + SyscallSucceedsWithValue(1)); + + char read_buf[sizeof(ping_req) + 1]; + struct sockaddr_ll src; + socklen_t src_len = sizeof(src); + ASSERT_THAT(recvfrom(packet_sock.get(), read_buf, sizeof(read_buf), 0, + reinterpret_cast(&src), &src_len), + SyscallSucceedsWithValue(sizeof(ping_req))); + EXPECT_EQ(memcmp(read_buf, &ping_req, sizeof(ping_req)), 0); + ASSERT_EQ(src_len, sizeof(src)); + EXPECT_EQ(src.sll_family, AF_PACKET); + EXPECT_EQ(ntohs(src.sll_protocol), ETH_P_IP); + EXPECT_EQ(src.sll_ifindex, link.index); + EXPECT_EQ(src.sll_pkttype, PACKET_HOST); + EXPECT_EQ(src.sll_halen, hardware_address_length); + if (IsRunningOnGvisor()) { + // TODO(https://gvisor.dev/issue/6531): Check this field for the right + // hardware type. + EXPECT_EQ(src.sll_hatype, 0); + } else { + EXPECT_EQ(src.sll_hatype, ARPHRD_NONE); + } + } + + { + const struct sockaddr_ll dest = { + .sll_family = AF_PACKET, + .sll_protocol = htons(ETH_P_IP), + .sll_ifindex = link.index, + .sll_halen = hardware_address_length, + }; + + const ping_ip_pkt ping_req = CreatePingIPPacket(kTapIPAddr, kTapPeerIPAddr); + ASSERT_THAT( + sendto(packet_sock.get(), &ping_req, sizeof(ping_req), 0, + reinterpret_cast(&dest), sizeof(dest)), + SyscallSucceedsWithValue(sizeof(ping_req))); + + // Loop until we receive the packet we expect - the kernel may send packets + // we do not care about. + while (true) { + // Wait for the TUN interface to become readable. + pollfd pfd = { + .fd = tun.get(), + .events = POLLIN, + }; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, kInfiniteTimeout), + SyscallSucceedsWithValue(1)); + + char read_buf[sizeof(ping_req) + 1]; + int n = read(tun.get(), &read_buf, sizeof(read_buf)); + ASSERT_THAT(n, SyscallSucceeds()); + if (n == sizeof(ping_req) && + memcmp(read_buf, &ping_req, sizeof(ping_req)) == 0) { + break; + } + } + } +} + } // namespace testing } // namespace gvisor -- cgit v1.2.3