summaryrefslogtreecommitdiffhomepage
path: root/test/util/socket_util.cc
diff options
context:
space:
mode:
Diffstat (limited to 'test/util/socket_util.cc')
-rw-r--r--test/util/socket_util.cc1117
1 files changed, 1117 insertions, 0 deletions
diff --git a/test/util/socket_util.cc b/test/util/socket_util.cc
new file mode 100644
index 000000000..f2360b732
--- /dev/null
+++ b/test/util/socket_util.cc
@@ -0,0 +1,1117 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/util/socket_util.h"
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <sys/socket.h>
+
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/time/clock.h"
+#include "absl/types/optional.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+Creator<SocketPair> SyscallSocketPairCreator(int domain, int type,
+ int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+ int pair[2];
+ RETURN_ERROR_IF_SYSCALL_FAIL(socketpair(domain, type, protocol, pair));
+ MaybeSave(); // Save on successful creation.
+ return absl::make_unique<FDSocketPair>(pair[0], pair[1]);
+ };
+}
+
+Creator<FileDescriptor> SyscallSocketCreator(int domain, int type,
+ int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> {
+ int fd = 0;
+ RETURN_ERROR_IF_SYSCALL_FAIL(fd = socket(domain, type, protocol));
+ MaybeSave(); // Save on successful creation.
+ return absl::make_unique<FileDescriptor>(fd);
+ };
+}
+
+PosixErrorOr<struct sockaddr_un> UniqueUnixAddr(bool abstract, int domain) {
+ struct sockaddr_un addr = {};
+ std::string path = NewTempAbsPathInDir("/tmp");
+ if (path.size() >= sizeof(addr.sun_path)) {
+ return PosixError(EINVAL,
+ "Unable to generate a temp path of appropriate length");
+ }
+
+ if (abstract) {
+ // Indicate that the path is in the abstract namespace.
+ path[0] = 0;
+ }
+ memcpy(addr.sun_path, path.c_str(), path.length());
+ addr.sun_family = domain;
+ return addr;
+}
+
+Creator<SocketPair> AcceptBindSocketPairCreator(bool abstract, int domain,
+ int type, int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un bind_addr,
+ UniqueUnixAddr(abstract, domain));
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un extra_addr,
+ UniqueUnixAddr(abstract, domain));
+
+ int bound;
+ RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(bound, AsSockAddr(&bind_addr), sizeof(bind_addr)));
+ MaybeSave(); // Successful bind.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ listen(bound, /* backlog = */ 5)); // NOLINT(bugprone-argument-comment)
+ MaybeSave(); // Successful listen.
+
+ int connected;
+ RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ connect(connected, AsSockAddr(&bind_addr), sizeof(bind_addr)));
+ MaybeSave(); // Successful connect.
+
+ int accepted;
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ accepted = accept4(bound, nullptr, nullptr,
+ type & (SOCK_NONBLOCK | SOCK_CLOEXEC)));
+ MaybeSave(); // Successful connect.
+
+ // Cleanup no longer needed resources.
+ RETURN_ERROR_IF_SYSCALL_FAIL(close(bound));
+ MaybeSave(); // Dropped original socket.
+
+ // Only unlink if path is not in abstract namespace.
+ if (bind_addr.sun_path[0] != 0) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(unlink(bind_addr.sun_path));
+ MaybeSave(); // Unlinked path.
+ }
+
+ // accepted is before connected to destruct connected before accepted.
+ // Destructors for nonstatic member objects are called in the reverse order
+ // in which they appear in the class declaration.
+ return absl::make_unique<AddrFDSocketPair>(accepted, connected, bind_addr,
+ extra_addr);
+ };
+}
+
+Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type,
+ int protocol) {
+ return AcceptBindSocketPairCreator(/* abstract= */ false, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type,
+ int protocol) {
+ return AcceptBindSocketPairCreator(/* abstract= */ true, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> BidirectionalBindSocketPairCreator(bool abstract,
+ int domain, int type,
+ int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1,
+ UniqueUnixAddr(abstract, domain));
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2,
+ UniqueUnixAddr(abstract, domain));
+
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(sock1, AsSockAddr(&addr1), sizeof(addr1)));
+ MaybeSave(); // Successful bind.
+
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(sock2, AsSockAddr(&addr2), sizeof(addr2)));
+ MaybeSave(); // Successful bind.
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ connect(sock1, AsSockAddr(&addr2), sizeof(addr2)));
+ MaybeSave(); // Successful connect.
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ connect(sock2, AsSockAddr(&addr1), sizeof(addr1)));
+ MaybeSave(); // Successful connect.
+
+ // Cleanup no longer needed resources.
+
+ // Only unlink if path is not in abstract namespace.
+ if (addr1.sun_path[0] != 0) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr1.sun_path));
+ MaybeSave(); // Successful unlink.
+ }
+
+ // Only unlink if path is not in abstract namespace.
+ if (addr2.sun_path[0] != 0) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr2.sun_path));
+ MaybeSave(); // Successful unlink.
+ }
+
+ return absl::make_unique<FDSocketPair>(sock1, sock2);
+ };
+}
+
+Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain,
+ int type,
+ int protocol) {
+ return BidirectionalBindSocketPairCreator(/* abstract= */ false, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain,
+ int type,
+ int protocol) {
+ return BidirectionalBindSocketPairCreator(/* abstract= */ true, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type,
+ int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+ struct sockaddr_un addr = {};
+ constexpr char kSocketGoferPath[] = "/socket";
+ memcpy(addr.sun_path, kSocketGoferPath, sizeof(kSocketGoferPath));
+ addr.sun_family = domain;
+
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ connect(sock1, AsSockAddr(&addr), sizeof(addr)));
+ MaybeSave(); // Successful connect.
+
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ connect(sock2, AsSockAddr(&addr), sizeof(addr)));
+ MaybeSave(); // Successful connect.
+
+ // Make and close another socketpair to ensure that the duped ends of the
+ // first socketpair get closed.
+ //
+ // The problem is that there is no way to atomically send and close an FD.
+ // The closest that we can do is send and then immediately close the FD,
+ // which is what we do in the gofer. The gofer won't respond to another
+ // request until the reply is sent and the FD is closed, so forcing the
+ // gofer to handle another request will ensure that this has happened.
+ for (int i = 0; i < 2; i++) {
+ int sock;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol));
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ connect(sock, AsSockAddr(&addr), sizeof(addr)));
+ RETURN_ERROR_IF_SYSCALL_FAIL(close(sock));
+ }
+
+ return absl::make_unique<FDSocketPair>(sock1, sock2);
+ };
+}
+
+Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+ constexpr char kSocketGoferPath[] = "/socket";
+
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 =
+ open(kSocketGoferPath, O_RDWR | flags));
+ MaybeSave(); // Successful socket creation.
+
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 =
+ open(kSocketGoferPath, O_RDWR | flags));
+ MaybeSave(); // Successful socket creation.
+
+ return absl::make_unique<FDSocketPair>(sock1, sock2);
+ };
+}
+
+Creator<SocketPair> UnboundSocketPairCreator(bool abstract, int domain,
+ int type, int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1,
+ UniqueUnixAddr(abstract, domain));
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2,
+ UniqueUnixAddr(abstract, domain));
+
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2);
+ };
+}
+
+Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type,
+ int protocol) {
+ return UnboundSocketPairCreator(/* abstract= */ false, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type,
+ int protocol) {
+ return UnboundSocketPairCreator(/* abstract= */ true, domain, type, protocol);
+}
+
+void LocalhostAddr(struct sockaddr_in* addr, bool dual_stack) {
+ addr->sin_family = AF_INET;
+ addr->sin_port = htons(0);
+ inet_pton(AF_INET, "127.0.0.1",
+ reinterpret_cast<void*>(&addr->sin_addr.s_addr));
+}
+
+void LocalhostAddr(struct sockaddr_in6* addr, bool dual_stack) {
+ addr->sin6_family = AF_INET6;
+ addr->sin6_port = htons(0);
+ if (dual_stack) {
+ inet_pton(AF_INET6, "::ffff:127.0.0.1",
+ reinterpret_cast<void*>(&addr->sin6_addr.s6_addr));
+ } else {
+ inet_pton(AF_INET6, "::1",
+ reinterpret_cast<void*>(&addr->sin6_addr.s6_addr));
+ }
+ addr->sin6_scope_id = 0;
+}
+
+template <typename T>
+PosixErrorOr<T> BindIP(int fd, bool dual_stack) {
+ T addr = {};
+ LocalhostAddr(&addr, dual_stack);
+ RETURN_ERROR_IF_SYSCALL_FAIL(bind(fd, AsSockAddr(&addr), sizeof(addr)));
+ socklen_t addrlen = sizeof(addr);
+ RETURN_ERROR_IF_SYSCALL_FAIL(getsockname(fd, AsSockAddr(&addr), &addrlen));
+ return addr;
+}
+
+template <typename T>
+PosixErrorOr<T> TCPBindAndListen(int fd, bool dual_stack) {
+ ASSIGN_OR_RETURN_ERRNO(T addr, BindIP<T>(fd, dual_stack));
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ listen(fd, /* backlog = */ 5)); // NOLINT(bugprone-argument-comment)
+ return addr;
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>>
+CreateTCPConnectAcceptSocketPair(int bound, int connected, int type,
+ bool dual_stack, T bind_addr) {
+ int connect_result = 0;
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ (connect_result = RetryEINTR(connect)(connected, AsSockAddr(&bind_addr),
+ sizeof(bind_addr))) == -1 &&
+ errno == EINPROGRESS
+ ? 0
+ : connect_result);
+ MaybeSave(); // Successful connect.
+
+ if (connect_result == -1) {
+ struct pollfd connect_poll = {connected, POLLOUT | POLLERR | POLLHUP, 0};
+ RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&connect_poll, 1, 0));
+ int error = 0;
+ socklen_t errorlen = sizeof(error);
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ getsockopt(connected, SOL_SOCKET, SO_ERROR, &error, &errorlen));
+ errno = error;
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ /* connect */ error == 0 ? 0 : -1);
+ }
+
+ int accepted = -1;
+ struct pollfd accept_poll = {bound, POLLIN, 0};
+ while (accepted == -1) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&accept_poll, 1, 0));
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ (accepted = RetryEINTR(accept4)(
+ bound, nullptr, nullptr, type & (SOCK_NONBLOCK | SOCK_CLOEXEC))) ==
+ -1 &&
+ errno == EAGAIN
+ ? 0
+ : accepted);
+ }
+ MaybeSave(); // Successful accept.
+
+ T extra_addr = {};
+ LocalhostAddr(&extra_addr, dual_stack);
+ return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr,
+ extra_addr);
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair(
+ int bound, int connected, int type, bool dual_stack) {
+ ASSIGN_OR_RETURN_ERRNO(T bind_addr, TCPBindAndListen<T>(bound, dual_stack));
+
+ auto result = CreateTCPConnectAcceptSocketPair(bound, connected, type,
+ dual_stack, bind_addr);
+
+ // Cleanup no longer needed resources.
+ RETURN_ERROR_IF_SYSCALL_FAIL(close(bound));
+ MaybeSave(); // Successful close.
+
+ return result;
+}
+
+Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type,
+ int protocol,
+ bool dual_stack) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ int bound;
+ RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ int connected;
+ RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ if (domain == AF_INET) {
+ return CreateTCPAcceptBindSocketPair<sockaddr_in>(bound, connected, type,
+ dual_stack);
+ }
+ return CreateTCPAcceptBindSocketPair<sockaddr_in6>(bound, connected, type,
+ dual_stack);
+ };
+}
+
+Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator(
+ int domain, int type, int protocol, bool dual_stack) {
+ // These are lazily initialized below, on the first call to the returned
+ // lambda. These values are private to each returned lambda, but shared across
+ // invocations of a specific lambda.
+ //
+ // The sharing allows pairs created with the same parameters to share a
+ // listener. This prevents future connects from failing if the connecting
+ // socket selects a port which had previously been used by a listening socket
+ // that still has some connections in TIME-WAIT.
+ //
+ // The lazy initialization is to avoid creating sockets during parameter
+ // enumeration. This is important because parameters are enumerated during the
+ // build process where networking may not be available.
+ auto listener = std::make_shared<absl::optional<int>>(absl::optional<int>());
+ auto addr4 = std::make_shared<absl::optional<sockaddr_in>>(
+ absl::optional<sockaddr_in>());
+ auto addr6 = std::make_shared<absl::optional<sockaddr_in6>>(
+ absl::optional<sockaddr_in6>());
+
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ int connected;
+ RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ // Share the listener across invocations.
+ if (!listener->has_value()) {
+ int fd = socket(domain, type, protocol);
+ if (fd < 0) {
+ return PosixError(errno, absl::StrCat("socket(", domain, ", ", type,
+ ", ", protocol, ")"));
+ }
+ listener->emplace(fd);
+ MaybeSave(); // Successful socket creation.
+ }
+
+ // Bind the listener once, but create a new connect/accept pair each
+ // time.
+ if (domain == AF_INET) {
+ if (!addr4->has_value()) {
+ addr4->emplace(
+ TCPBindAndListen<sockaddr_in>(listener->value(), dual_stack)
+ .ValueOrDie());
+ }
+ return CreateTCPConnectAcceptSocketPair(listener->value(), connected,
+ type, dual_stack, addr4->value());
+ }
+ if (!addr6->has_value()) {
+ addr6->emplace(
+ TCPBindAndListen<sockaddr_in6>(listener->value(), dual_stack)
+ .ValueOrDie());
+ }
+ return CreateTCPConnectAcceptSocketPair(listener->value(), connected, type,
+ dual_stack, addr6->value());
+ };
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateUDPBoundSocketPair(
+ int sock1, int sock2, int type, bool dual_stack) {
+ ASSIGN_OR_RETURN_ERRNO(T addr1, BindIP<T>(sock1, dual_stack));
+ ASSIGN_OR_RETURN_ERRNO(T addr2, BindIP<T>(sock2, dual_stack));
+
+ return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2);
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>>
+CreateUDPBidirectionalBindSocketPair(int sock1, int sock2, int type,
+ bool dual_stack) {
+ ASSIGN_OR_RETURN_ERRNO(
+ auto socks, CreateUDPBoundSocketPair<T>(sock1, sock2, type, dual_stack));
+
+ // Connect sock1 to sock2.
+ RETURN_ERROR_IF_SYSCALL_FAIL(connect(socks->first_fd(), socks->second_addr(),
+ socks->second_addr_size()));
+ MaybeSave(); // Successful connection.
+
+ // Connect sock2 to sock1.
+ RETURN_ERROR_IF_SYSCALL_FAIL(connect(socks->second_fd(), socks->first_addr(),
+ socks->first_addr_size()));
+ MaybeSave(); // Successful connection.
+
+ return socks;
+}
+
+Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type,
+ int protocol,
+ bool dual_stack) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ if (domain == AF_INET) {
+ return CreateUDPBidirectionalBindSocketPair<sockaddr_in>(
+ sock1, sock2, type, dual_stack);
+ }
+ return CreateUDPBidirectionalBindSocketPair<sockaddr_in6>(sock1, sock2,
+ type, dual_stack);
+ };
+}
+
+Creator<SocketPair> UDPUnboundSocketPairCreator(int domain, int type,
+ int protocol, bool dual_stack) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ return absl::make_unique<FDSocketPair>(sock1, sock2);
+ };
+}
+
+SocketPairKind Reversed(SocketPairKind const& base) {
+ auto const& creator = base.creator;
+ return SocketPairKind{
+ absl::StrCat("reversed ", base.description), base.domain, base.type,
+ base.protocol,
+ [creator]() -> PosixErrorOr<std::unique_ptr<ReversedSocketPair>> {
+ ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator());
+ return absl::make_unique<ReversedSocketPair>(std::move(creator_value));
+ }};
+}
+
+Creator<FileDescriptor> UnboundSocketCreator(int domain, int type,
+ int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> {
+ int sock;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ return absl::make_unique<FileDescriptor>(sock);
+ };
+}
+
+std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec) {
+ return ApplyVecToVec<SocketPairKind>(std::vector<Middleware>{NoOp, Reversed},
+ vec);
+}
+
+SocketPairKind NoOp(SocketPairKind const& base) { return base; }
+
+void TransferTest(int fd1, int fd2) {
+ char buf1[20];
+ RandomizeBuffer(buf1, sizeof(buf1));
+ ASSERT_THAT(WriteFd(fd1, buf1, sizeof(buf1)),
+ SyscallSucceedsWithValue(sizeof(buf1)));
+
+ char buf2[20];
+ ASSERT_THAT(ReadFd(fd2, buf2, sizeof(buf2)),
+ SyscallSucceedsWithValue(sizeof(buf2)));
+
+ EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1)));
+
+ RandomizeBuffer(buf1, sizeof(buf1));
+ ASSERT_THAT(WriteFd(fd2, buf1, sizeof(buf1)),
+ SyscallSucceedsWithValue(sizeof(buf1)));
+
+ ASSERT_THAT(ReadFd(fd1, buf2, sizeof(buf2)),
+ SyscallSucceedsWithValue(sizeof(buf2)));
+
+ EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1)));
+}
+
+// Initializes the given buffer with random data.
+void RandomizeBuffer(char* ptr, size_t len) {
+ uint32_t seed = time(nullptr);
+ for (size_t i = 0; i < len; ++i) {
+ ptr[i] = static_cast<char>(rand_r(&seed));
+ }
+}
+
+size_t CalculateUnixSockAddrLen(const char* sun_path) {
+ // Abstract addresses always return the full length.
+ if (sun_path[0] == 0) {
+ return sizeof(sockaddr_un);
+ }
+ // Filesystem addresses use the address length plus the 2 byte sun_family
+ // and null terminator.
+ return strlen(sun_path) + 3;
+}
+
+struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_un& addr) {
+ struct sockaddr_storage addr_storage = {};
+ memcpy(&addr_storage, &addr, sizeof(addr));
+ return addr_storage;
+}
+
+struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in& addr) {
+ struct sockaddr_storage addr_storage = {};
+ memcpy(&addr_storage, &addr, sizeof(addr));
+ return addr_storage;
+}
+
+struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in6& addr) {
+ struct sockaddr_storage addr_storage = {};
+ memcpy(&addr_storage, &addr, sizeof(addr));
+ return addr_storage;
+}
+
+SocketKind SimpleSocket(int fam, int type, int proto) {
+ return SocketKind{
+ absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto), fam,
+ type, proto, SyscallSocketCreator(fam, type, proto)};
+}
+
+ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets,
+ size_t size, bool reader) {
+ const int rfd = sockets->second_fd();
+ ScopedThread t([rfd, size, reader] {
+ if (!reader) {
+ return;
+ }
+
+ // Potentially too many syscalls in the loop.
+ const DisableSave ds;
+
+ std::vector<char> buf(size);
+ size_t total = 0;
+
+ while (total < size) {
+ int ret = read(rfd, buf.data(), buf.size());
+ if (ret == -1 && errno == EAGAIN) {
+ continue;
+ }
+ if (ret > 0) {
+ total += ret;
+ }
+
+ // Assert to return on first failure.
+ ASSERT_THAT(ret, SyscallSucceeds());
+ }
+ });
+
+ std::vector<char> buf(size);
+
+ struct iovec iov = {};
+ iov.iov_base = buf.data();
+ iov.iov_len = buf.size();
+
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ return RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0);
+}
+
+namespace internal {
+PosixErrorOr<int> TryPortAvailable(int port, AddressFamily family,
+ SocketType type, bool reuse_addr) {
+ if (port < 0) {
+ return PosixError(EINVAL, "Invalid port");
+ }
+
+ // Both Ipv6 and Dualstack are AF_INET6.
+ int sock_fam = (family == AddressFamily::kIpv4 ? AF_INET : AF_INET6);
+ int sock_type = (type == SocketType::kTcp ? SOCK_STREAM : SOCK_DGRAM);
+ ASSIGN_OR_RETURN_ERRNO(auto fd, Socket(sock_fam, sock_type, 0));
+
+ if (reuse_addr) {
+ int one = 1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ setsockopt(fd.get(), SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)));
+ }
+
+ // Try to bind.
+ sockaddr_storage storage = {};
+ int storage_size = 0;
+ if (family == AddressFamily::kIpv4) {
+ sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&storage);
+ storage_size = sizeof(*addr);
+ addr->sin_family = AF_INET;
+ addr->sin_port = htons(port);
+ addr->sin_addr.s_addr = htonl(INADDR_ANY);
+ } else {
+ sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&storage);
+ storage_size = sizeof(*addr);
+ addr->sin6_family = AF_INET6;
+ addr->sin6_port = htons(port);
+ if (family == AddressFamily::kDualStack) {
+ inet_pton(AF_INET6, "::ffff:0.0.0.0",
+ reinterpret_cast<void*>(&addr->sin6_addr.s6_addr));
+ } else {
+ addr->sin6_addr = in6addr_any;
+ }
+ }
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(fd.get(), AsSockAddr(&storage), storage_size));
+
+ // If the user specified 0 as the port, we will return the port that the
+ // kernel gave us, otherwise we will validate that this socket bound to the
+ // requested port.
+ sockaddr_storage bound_storage = {};
+ socklen_t bound_storage_size = sizeof(bound_storage);
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ getsockname(fd.get(), AsSockAddr(&bound_storage), &bound_storage_size));
+
+ int available_port = -1;
+ if (bound_storage.ss_family == AF_INET) {
+ sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&bound_storage);
+ available_port = ntohs(addr->sin_port);
+ } else if (bound_storage.ss_family == AF_INET6) {
+ sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&bound_storage);
+ available_port = ntohs(addr->sin6_port);
+ } else {
+ return PosixError(EPROTOTYPE, "Getsockname returned invalid family");
+ }
+
+ // If we requested a specific port make sure our bound port is that port.
+ if (port != 0 && available_port != port) {
+ return PosixError(EINVAL,
+ absl::StrCat("Bound port ", available_port,
+ " was not equal to requested port ", port));
+ }
+
+ // If we're trying to do a TCP socket, let's also try to listen.
+ if (type == SocketType::kTcp) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd.get(), 1));
+ }
+
+ return available_port;
+}
+} // namespace internal
+
+PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size) {
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = buf_size;
+ msg->msg_iov = &iov;
+ msg->msg_iovlen = 1;
+
+ int ret;
+ RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(sendmsg)(sock, msg, 0));
+ return ret;
+}
+
+PosixErrorOr<int> RecvTimeout(int sock, char buf[], int buf_size, int timeout) {
+ fd_set rfd;
+ struct timeval to = {.tv_sec = timeout, .tv_usec = 0};
+ FD_ZERO(&rfd);
+ FD_SET(sock, &rfd);
+
+ int ret;
+ RETURN_ERROR_IF_SYSCALL_FAIL(ret = select(1, &rfd, NULL, NULL, &to));
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ ret = RetryEINTR(recv)(sock, buf, buf_size, MSG_DONTWAIT));
+ return ret;
+}
+
+PosixErrorOr<int> RecvMsgTimeout(int sock, struct msghdr* msg, int timeout) {
+ fd_set rfd;
+ struct timeval to = {.tv_sec = timeout, .tv_usec = 0};
+ FD_ZERO(&rfd);
+ FD_SET(sock, &rfd);
+
+ int ret;
+ RETURN_ERROR_IF_SYSCALL_FAIL(ret = select(1, &rfd, NULL, NULL, &to));
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ ret = RetryEINTR(recvmsg)(sock, msg, MSG_DONTWAIT));
+ return ret;
+}
+
+void RecvNoData(int sock) {
+ char data = 0;
+ struct iovec iov;
+ iov.iov_base = &data;
+ iov.iov_len = 1;
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, MSG_DONTWAIT),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TestAddress TestAddress::WithPort(uint16_t port) const {
+ TestAddress addr = *this;
+ switch (addr.family()) {
+ case AF_INET:
+ reinterpret_cast<sockaddr_in*>(&addr.addr)->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ reinterpret_cast<sockaddr_in6*>(&addr.addr)->sin6_port = htons(port);
+ break;
+ }
+ return addr;
+}
+
+namespace {
+
+TestAddress V4Addr(std::string description, in_addr_t addr) {
+ TestAddress t(std::move(description));
+ t.addr.ss_family = AF_INET;
+ t.addr_len = sizeof(sockaddr_in);
+ reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = addr;
+ return t;
+}
+
+TestAddress V6Addr(std::string description, const in6_addr& addr) {
+ TestAddress t(std::move(description));
+ t.addr.ss_family = AF_INET6;
+ t.addr_len = sizeof(sockaddr_in6);
+ reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = addr;
+ return t;
+}
+
+} // namespace
+
+TestAddress V4AddrStr(std::string description, const char* addr) {
+ in_addr_t s_addr;
+ inet_pton(AF_INET, addr, &s_addr);
+ return V4Addr(description, s_addr);
+}
+
+TestAddress V6AddrStr(std::string description, const char* addr) {
+ struct in6_addr s_addr;
+ inet_pton(AF_INET6, addr, &s_addr);
+ return V6Addr(description, s_addr);
+}
+
+TestAddress V4Any() { return V4Addr("V4Any", htonl(INADDR_ANY)); }
+
+TestAddress V4Broadcast() {
+ return V4Addr("V4Broadcast", htonl(INADDR_BROADCAST));
+}
+
+TestAddress V4Loopback() {
+ return V4Addr("V4Loopback", htonl(INADDR_LOOPBACK));
+}
+
+TestAddress V4LoopbackSubnetBroadcast() {
+ return V4AddrStr("V4LoopbackSubnetBroadcast", "127.255.255.255");
+}
+
+TestAddress V4MappedAny() { return V6AddrStr("V4MappedAny", "::ffff:0.0.0.0"); }
+
+TestAddress V4MappedLoopback() {
+ return V6AddrStr("V4MappedLoopback", "::ffff:127.0.0.1");
+}
+
+TestAddress V4Multicast() {
+ return V4Addr("V4Multicast", inet_addr(kMulticastAddress));
+}
+
+TestAddress V4MulticastAllHosts() {
+ return V4Addr("V4MulticastAllHosts", htonl(INADDR_ALLHOSTS_GROUP));
+}
+
+TestAddress V6Any() { return V6Addr("V6Any", in6addr_any); }
+
+TestAddress V6Loopback() { return V6Addr("V6Loopback", in6addr_loopback); }
+
+TestAddress V6Multicast() { return V6AddrStr("V6Multicast", "ff05::1234"); }
+
+TestAddress V6MulticastInterfaceLocalAllNodes() {
+ return V6AddrStr("V6MulticastInterfaceLocalAllNodes", "ff01::1");
+}
+
+TestAddress V6MulticastLinkLocalAllNodes() {
+ return V6AddrStr("V6MulticastLinkLocalAllNodes", "ff02::1");
+}
+
+TestAddress V6MulticastLinkLocalAllRouters() {
+ return V6AddrStr("V6MulticastLinkLocalAllRouters", "ff02::2");
+}
+
+// Checksum computes the internet checksum of a buffer.
+uint16_t Checksum(uint16_t* buf, ssize_t buf_size) {
+ // Add up the 16-bit values in the buffer.
+ uint32_t total = 0;
+ for (unsigned int i = 0; i < buf_size; i += sizeof(*buf)) {
+ total += *buf;
+ buf++;
+ }
+
+ // If buf has an odd size, add the remaining byte.
+ if (buf_size % 2) {
+ total += *(reinterpret_cast<unsigned char*>(buf) - 1);
+ }
+
+ // This carries any bits past the lower 16 until everything fits in 16 bits.
+ while (total >> 16) {
+ uint16_t lower = total & 0xffff;
+ uint16_t upper = total >> 16;
+ total = lower + upper;
+ }
+
+ return ~total;
+}
+
+uint16_t IPChecksum(struct iphdr ip) {
+ return Checksum(reinterpret_cast<uint16_t*>(&ip), sizeof(ip));
+}
+
+// The pseudo-header defined in RFC 768 for calculating the UDP checksum.
+struct udp_pseudo_hdr {
+ uint32_t srcip;
+ uint32_t destip;
+ char zero;
+ char protocol;
+ uint16_t udplen;
+};
+
+uint16_t UDPChecksum(struct iphdr iphdr, struct udphdr udphdr,
+ const char* payload, ssize_t payload_len) {
+ struct udp_pseudo_hdr phdr = {};
+ phdr.srcip = iphdr.saddr;
+ phdr.destip = iphdr.daddr;
+ phdr.zero = 0;
+ phdr.protocol = IPPROTO_UDP;
+ phdr.udplen = udphdr.len;
+
+ ssize_t buf_size = sizeof(phdr) + sizeof(udphdr) + payload_len;
+ char* buf = static_cast<char*>(malloc(buf_size));
+ memcpy(buf, &phdr, sizeof(phdr));
+ memcpy(buf + sizeof(phdr), &udphdr, sizeof(udphdr));
+ memcpy(buf + sizeof(phdr) + sizeof(udphdr), payload, payload_len);
+
+ uint16_t csum = Checksum(reinterpret_cast<uint16_t*>(buf), buf_size);
+ free(buf);
+ return csum;
+}
+
+uint16_t ICMPChecksum(struct icmphdr icmphdr, const char* payload,
+ ssize_t payload_len) {
+ ssize_t buf_size = sizeof(icmphdr) + payload_len;
+ char* buf = static_cast<char*>(malloc(buf_size));
+ memcpy(buf, &icmphdr, sizeof(icmphdr));
+ memcpy(buf + sizeof(icmphdr), payload, payload_len);
+
+ uint16_t csum = Checksum(reinterpret_cast<uint16_t*>(buf), buf_size);
+ free(buf);
+ return csum;
+}
+
+PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) {
+ switch (family) {
+ case AF_INET:
+ return static_cast<uint16_t>(
+ reinterpret_cast<sockaddr_in const*>(&addr)->sin_port);
+ case AF_INET6:
+ return static_cast<uint16_t>(
+ reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port);
+ default:
+ return PosixError(EINVAL,
+ absl::StrCat("unknown socket family: ", family));
+ }
+}
+
+PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) {
+ switch (family) {
+ case AF_INET:
+ reinterpret_cast<sockaddr_in*>(addr)->sin_port = port;
+ return NoError();
+ case AF_INET6:
+ reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port;
+ return NoError();
+ default:
+ return PosixError(EINVAL,
+ absl::StrCat("unknown socket family: ", family));
+ }
+}
+
+void SetupTimeWaitClose(const TestAddress* listener,
+ const TestAddress* connector, bool reuse,
+ bool accept_close, sockaddr_storage* listen_addr,
+ sockaddr_storage* conn_bound_addr) {
+ // Create the listening socket.
+ FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(listener->family(), SOCK_STREAM, IPPROTO_TCP));
+ if (reuse) {
+ ASSERT_THAT(setsockopt(listen_fd.get(), SOL_SOCKET, SO_REUSEADDR,
+ &kSockOptOn, sizeof(kSockOptOn)),
+ SyscallSucceeds());
+ }
+ ASSERT_THAT(
+ bind(listen_fd.get(), AsSockAddr(listen_addr), listener->addr_len),
+ SyscallSucceeds());
+ ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+ // Get the port bound by the listening socket.
+ socklen_t addrlen = listener->addr_len;
+ ASSERT_THAT(getsockname(listen_fd.get(), AsSockAddr(listen_addr), &addrlen),
+ SyscallSucceeds());
+
+ uint16_t const port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener->family(), *listen_addr));
+
+ // Connect to the listening socket.
+ FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(connector->family(), SOCK_STREAM, IPPROTO_TCP));
+
+ // We disable saves after this point as a S/R causes the netstack seed
+ // to be regenerated which changes what ports/ISN is picked for a given
+ // tuple (src ip,src port, dst ip, dst port). This can cause the final
+ // SYN to use a sequence number that looks like one from the current
+ // connection in TIME_WAIT and will not be accepted causing the test
+ // to timeout.
+ //
+ // TODO(gvisor.dev/issue/940): S/R portSeed/portHint
+ DisableSave ds;
+
+ sockaddr_storage conn_addr = connector->addr;
+ ASSERT_NO_ERRNO(SetAddrPort(connector->family(), &conn_addr, port));
+ ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), AsSockAddr(&conn_addr),
+ connector->addr_len),
+ SyscallSucceeds());
+
+ // Accept the connection.
+ auto accepted =
+ ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+
+ // Get the address/port bound by the connecting socket.
+ socklen_t conn_addrlen = connector->addr_len;
+ ASSERT_THAT(
+ getsockname(conn_fd.get(), AsSockAddr(conn_bound_addr), &conn_addrlen),
+ SyscallSucceeds());
+
+ FileDescriptor active_closefd, passive_closefd;
+ if (accept_close) {
+ active_closefd = std::move(accepted);
+ passive_closefd = std::move(conn_fd);
+ } else {
+ active_closefd = std::move(conn_fd);
+ passive_closefd = std::move(accepted);
+ }
+
+ // shutdown to trigger TIME_WAIT.
+ ASSERT_THAT(shutdown(active_closefd.get(), SHUT_WR), SyscallSucceeds());
+ {
+ constexpr int kTimeout = 10000;
+ pollfd pfd = {
+ .fd = passive_closefd.get(),
+ .events = POLLIN,
+ };
+ ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
+ ASSERT_EQ(pfd.revents, POLLIN);
+ }
+ ASSERT_THAT(shutdown(passive_closefd.get(), SHUT_WR), SyscallSucceeds());
+ {
+ constexpr int kTimeout = 10000;
+ constexpr int16_t want_events = POLLHUP;
+ pollfd pfd = {
+ .fd = active_closefd.get(),
+ .events = want_events,
+ };
+ ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
+ }
+
+ // This sleep is needed to reduce flake to ensure that the passive-close
+ // ensures the state transitions to CLOSE from LAST_ACK.
+ absl::SleepFor(absl::Seconds(1));
+}
+
+constexpr char kRangeFile[] = "/proc/sys/net/ipv4/ip_local_port_range";
+
+PosixErrorOr<int> MaybeLimitEphemeralPorts() {
+ int min = 0;
+ int max = 1 << 16;
+
+ // Read the ephemeral range from /proc.
+ ASSIGN_OR_RETURN_ERRNO(std::string rangefile, GetContents(kRangeFile));
+ const std::string err_msg =
+ absl::StrFormat("%s has invalid content: %s", kRangeFile, rangefile);
+ if (rangefile.back() != '\n') {
+ return PosixError(EINVAL, err_msg);
+ }
+ rangefile.pop_back();
+ std::vector<std::string> range =
+ absl::StrSplit(rangefile, absl::ByAnyChar("\t "));
+ if (range.size() < 2 || !absl::SimpleAtoi(range.front(), &min) ||
+ !absl::SimpleAtoi(range.back(), &max)) {
+ return PosixError(EINVAL, err_msg);
+ }
+
+ // If we can open as writable, limit the range.
+ if (!access(kRangeFile, W_OK)) {
+ ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd,
+ Open(kRangeFile, O_WRONLY | O_TRUNC, 0));
+ int newMax = min + 50;
+ const std::string small_range = absl::StrFormat("%d %d", min, newMax);
+ int n = write(fd.get(), small_range.c_str(), small_range.size());
+ if (n < 0) {
+ // Hostinet doesn't allow modifying the host port range. And if we're root
+ // (as we are in some tests), access and open will succeed even if the
+ // file mode is readonly.
+ if (errno != EACCES) {
+ return PosixError(
+ errno,
+ absl::StrFormat("write(%d [%s], \"%s\", %d)", fd.get(), kRangeFile,
+ small_range.c_str(), small_range.size()));
+ }
+ } else {
+ max = newMax;
+ }
+ }
+ return max - min;
+}
+
+} // namespace testing
+} // namespace gvisor