diff options
Diffstat (limited to 'test/syscalls/linux')
279 files changed, 80398 insertions, 0 deletions
diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc new file mode 100644 index 000000000..3c825477c --- /dev/null +++ b/test/syscalls/linux/32bit.cc @@ -0,0 +1,248 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <string.h> +#include <sys/mman.h> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "test/util/memory_util.h" +#include "test/util/platform_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +#ifndef __x86_64__ +#error "This test is x86-64 specific." +#endif + +namespace gvisor { +namespace testing { + +namespace { + +constexpr char kInt3 = '\xcc'; +constexpr char kInt80[2] = {'\xcd', '\x80'}; +constexpr char kSyscall[2] = {'\x0f', '\x05'}; +constexpr char kSysenter[2] = {'\x0f', '\x34'}; + +void ExitGroup32(const char instruction[2], int code) { + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0)); + + // Fill with INT 3 in case we execute too far. + memset(m.ptr(), kInt3, m.len()); + + // Copy in the actual instruction. + memcpy(m.ptr(), instruction, 2); + + // We're playing *extremely* fast-and-loose with the various syscall ABIs + // here, which we can more-or-less get away with since exit_group doesn't + // return. + // + // SYSENTER expects the user stack in (%ebp) and arg6 in 0(%ebp). The kernel + // will unconditionally dereference %ebp for arg6, so we must pass a valid + // address or it will return EFAULT. + // + // SYSENTER also unconditionally returns to thread_info->sysenter_return which + // is ostensibly a stub in the 32-bit VDSO. But a 64-bit binary doesn't have + // the 32-bit VDSO mapped, so sysenter_return will simply be the value + // inherited from the most recent 32-bit ancestor, or NULL if there is none. + // As a result, return would not return from SYSENTER. + asm volatile( + "movl $252, %%eax\n" // exit_group + "movl %[code], %%ebx\n" // code + "movl %%edx, %%ebp\n" // SYSENTER: user stack (use IP as a valid addr) + "leaq -20(%%rsp), %%rsp\n" + "movl $0x2b, 16(%%rsp)\n" // SS = CPL3 data segment + "movl $0,12(%%rsp)\n" // ESP = nullptr (unused) + "movl $0, 8(%%rsp)\n" // EFLAGS + "movl $0x23, 4(%%rsp)\n" // CS = CPL3 32-bit code segment + "movl %%edx, 0(%%rsp)\n" // EIP + "iretl\n" + "int $3\n" + : + : [ code ] "m"(code), [ ip ] "d"(m.ptr()) + : "rax", "rbx"); +} + +constexpr int kExitCode = 42; + +TEST(Syscall32Bit, Int80) { + switch (PlatformSupport32Bit()) { + case PlatformSupport::NotSupported: + break; + case PlatformSupport::Segfault: + EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), + ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Ignored: + // Since the call is ignored, we'll hit the int3 trap. + EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), + ::testing::KilledBySignal(SIGTRAP), ""); + break; + + case PlatformSupport::Allowed: + EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), ::testing::ExitedWithCode(42), + ""); + break; + } +} + +TEST(Syscall32Bit, Sysenter) { + if ((PlatformSupport32Bit() == PlatformSupport::Allowed || + PlatformSupport32Bit() == PlatformSupport::Ignored) && + GetCPUVendor() == CPUVendor::kAMD) { + // SYSENTER is an illegal instruction in compatibility mode on AMD. + EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), + ::testing::KilledBySignal(SIGILL), ""); + return; + } + + switch (PlatformSupport32Bit()) { + case PlatformSupport::NotSupported: + break; + + case PlatformSupport::Segfault: + EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), + ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Ignored: + // See above, except expected code is SIGSEGV. + EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), + ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Allowed: + EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), + ::testing::ExitedWithCode(42), ""); + break; + } +} + +TEST(Syscall32Bit, Syscall) { + if ((PlatformSupport32Bit() == PlatformSupport::Allowed || + PlatformSupport32Bit() == PlatformSupport::Ignored) && + GetCPUVendor() == CPUVendor::kIntel) { + // SYSCALL is an illegal instruction in compatibility mode on Intel. + EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), + ::testing::KilledBySignal(SIGILL), ""); + return; + } + + switch (PlatformSupport32Bit()) { + case PlatformSupport::NotSupported: + break; + + case PlatformSupport::Segfault: + EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), + ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Ignored: + // See above. + EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), + ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Allowed: + EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), + ::testing::ExitedWithCode(42), ""); + break; + } +} + +// Far call code called below. +// +// Input stack layout: +// +// %esp+12 lcall segment +// %esp+8 lcall address offset +// %esp+0 return address +// +// The lcall will enter compatibility mode and jump to the call address (the +// address of the lret). The lret will return to 64-bit mode at the retq, which +// will return to the external caller of this function. +// +// Since this enters compatibility mode, it must be mapped in a 32-bit region of +// address space and have a 32-bit stack pointer. +constexpr char kFarCall[] = { + '\x67', '\xff', '\x5c', '\x24', '\x08', // lcall *8(%esp) + '\xc3', // retq + '\xcb', // lret +}; + +void FarCall32() { + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0)); + + // Fill with INT 3 in case we execute too far. + memset(m.ptr(), kInt3, m.len()); + + // 32-bit code. + memcpy(m.ptr(), kFarCall, sizeof(kFarCall)); + + // Use the end of the code page as its stack. + uintptr_t stack = m.endaddr(); + + uintptr_t lcall = m.addr(); + uintptr_t lret = m.addr() + sizeof(kFarCall) - 1; + + // N.B. We must save and restore RSP manually. GCC can do so automatically + // with an "rsp" clobber, but clang cannot. + asm volatile( + // Place the address of lret (%edx) and the 32-bit code segment (0x23) on + // the 32-bit stack for lcall. + "subl $0x8, %%ecx\n" + "movl $0x23, 4(%%ecx)\n" + "movl %%edx, 0(%%ecx)\n" + + // Save the current stack and switch to 32-bit stack. + "pushq %%rbp\n" + "movq %%rsp, %%rbp\n" + "movq %%rcx, %%rsp\n" + + // Run the lcall code. + "callq *%%rbx\n" + + // Restore the old stack. + "leaveq\n" + : "+c"(stack) + : "b"(lcall), "d"(lret)); +} + +TEST(Call32Bit, Disallowed) { + switch (PlatformSupport32Bit()) { + case PlatformSupport::NotSupported: + break; + + case PlatformSupport::Segfault: + EXPECT_EXIT(FarCall32(), ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Ignored: + ABSL_FALLTHROUGH_INTENDED; + case PlatformSupport::Allowed: + // Shouldn't crash. + FarCall32(); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD new file mode 100644 index 000000000..9e097c888 --- /dev/null +++ b/test/syscalls/linux/BUILD @@ -0,0 +1,3933 @@ +load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "gtest", "select_arch", "select_system") + +package( + default_visibility = ["//:sandbox"], + licenses = ["notice"], +) + +exports_files( + [ + "socket.cc", + "socket_inet_loopback.cc", + "socket_ip_loopback_blocking.cc", + "socket_ip_tcp_generic_loopback.cc", + "socket_ip_tcp_loopback.cc", + "socket_ip_tcp_loopback_blocking.cc", + "socket_ip_tcp_loopback_nonblock.cc", + "socket_ip_tcp_udp_generic.cc", + "socket_ip_udp_loopback.cc", + "socket_ip_udp_loopback_blocking.cc", + "socket_ip_udp_loopback_nonblock.cc", + "socket_ip_unbound.cc", + "socket_ipv4_tcp_unbound_external_networking_test.cc", + "socket_ipv4_udp_unbound_external_networking_test.cc", + "socket_ipv4_udp_unbound_loopback.cc", + "tcp_socket.cc", + "udp_bind.cc", + "udp_socket.cc", + ], + visibility = ["//:sandbox"], +) + +cc_binary( + name = "sigaltstack_check", + testonly = 1, + srcs = ["sigaltstack_check.cc"], + deps = ["//test/util:logging"], +) + +cc_binary( + name = "exec_assert_closed_workload", + testonly = 1, + srcs = ["exec_assert_closed_workload.cc"], + deps = [ + "@com_google_absl//absl/strings", + ], +) + +cc_binary( + name = "exec_basic_workload", + testonly = 1, + srcs = [ + "exec.h", + "exec_basic_workload.cc", + ], +) + +cc_binary( + name = "exec_proc_exe_workload", + testonly = 1, + srcs = ["exec_proc_exe_workload.cc"], + deps = [ + "//test/util:fs_util", + "//test/util:posix_error", + ], +) + +cc_binary( + name = "exec_state_workload", + testonly = 1, + srcs = ["exec_state_workload.cc"], + deps = ["@com_google_absl//absl/strings"], +) + +sh_binary( + name = "exit_script", + testonly = 1, + srcs = [ + "exit_script.sh", + ], +) + +cc_binary( + name = "priority_execve", + testonly = 1, + srcs = [ + "priority_execve.cc", + ], +) + +cc_library( + name = "base_poll_test", + testonly = 1, + srcs = ["base_poll_test.cc"], + hdrs = ["base_poll_test.h"], + deps = [ + "@com_google_absl//absl/memory", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_library( + name = "file_base", + testonly = 1, + hdrs = ["file_base.h"], + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + ], +) + +cc_library( + name = "socket_netlink_util", + testonly = 1, + srcs = ["socket_netlink_util.cc"], + hdrs = ["socket_netlink_util.h"], + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:posix_error", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "socket_netlink_route_util", + testonly = 1, + srcs = ["socket_netlink_route_util.cc"], + hdrs = ["socket_netlink_route_util.h"], + deps = [ + ":socket_netlink_util", + ], +) + +cc_library( + name = "socket_test_util", + testonly = 1, + srcs = [ + "socket_test_util.cc", + "socket_test_util_impl.cc", + ], + hdrs = ["socket_test_util.h"], + defines = select_system(), + deps = default_net_util() + [ + gtest, + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_library( + name = "unix_domain_socket_test_util", + testonly = 1, + srcs = ["unix_domain_socket_test_util.cc"], + hdrs = ["unix_domain_socket_test_util.h"], + deps = [ + ":socket_test_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:test_util", + ], +) + +cc_library( + name = "ip_socket_test_util", + testonly = 1, + srcs = ["ip_socket_test_util.cc"], + hdrs = ["ip_socket_test_util.h"], + deps = [ + ":socket_test_util", + "@com_google_absl//absl/strings", + ], +) + +cc_binary( + name = "clock_nanosleep_test", + testonly = 1, + srcs = ["clock_nanosleep.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "@com_google_absl//absl/time", + gtest, + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + ], +) + +cc_binary( + name = "32bit_test", + testonly = 1, + srcs = select_arch( + amd64 = ["32bit.cc"], + arm64 = [], + ), + linkstatic = 1, + deps = [ + "@com_google_absl//absl/base:core_headers", + gtest, + "//test/util:memory_util", + "//test/util:platform_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "accept_bind_test", + testonly = 1, + srcs = ["accept_bind.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "accept_bind_stream_test", + testonly = 1, + srcs = ["accept_bind_stream.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "access_test", + testonly = 1, + srcs = ["access.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:fs_util", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "affinity_test", + testonly = 1, + srcs = ["affinity.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "aio_test", + testonly = 1, + srcs = [ + "aio.cc", + "file_base.h", + ], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:proc_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "alarm_test", + testonly = 1, + srcs = ["alarm.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "bad_test", + testonly = 1, + srcs = ["bad.cc"], + linkstatic = 1, + visibility = [ + "//:sandbox", + ], + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "bind_test", + testonly = 1, + srcs = ["bind.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_test", + testonly = 1, + srcs = ["socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + gtest, + "//test/util:file_descriptor", + "//test/util:temp_umask", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_capability_test", + testonly = 1, + srcs = ["socket_capability.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "brk_test", + testonly = 1, + srcs = ["brk.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "chdir_test", + testonly = 1, + srcs = ["chdir.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "chmod_test", + testonly = 1, + srcs = ["chmod.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "chown_test", + testonly = 1, + srcs = ["chown.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/synchronization", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "sticky_test", + testonly = 1, + srcs = ["sticky.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/flags:flag", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "chroot_test", + testonly = 1, + srcs = ["chroot.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:mount_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "clock_getres_test", + testonly = 1, + srcs = ["clock_getres.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "clock_gettime_test", + testonly = 1, + srcs = ["clock_gettime.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/time", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "concurrency_test", + testonly = 1, + srcs = ["concurrency.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:platform_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "connect_external_test", + testonly = 1, + srcs = ["connect_external.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "creat_test", + testonly = 1, + srcs = ["creat.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "dev_test", + testonly = 1, + srcs = ["dev.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "dup_test", + testonly = 1, + srcs = ["dup.cc"], + linkstatic = 1, + deps = [ + "//test/util:eventfd_util", + "//test/util:file_descriptor", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "epoll_test", + testonly = 1, + srcs = ["epoll.cc"], + linkstatic = 1, + deps = [ + "//test/util:epoll_util", + "//test/util:eventfd_util", + "//test/util:file_descriptor", + gtest, + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "eventfd_test", + testonly = 1, + srcs = ["eventfd.cc"], + linkstatic = 1, + deps = [ + "//test/util:epoll_util", + "//test/util:eventfd_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "exceptions_test", + testonly = 1, + srcs = select_arch( + amd64 = ["exceptions.cc"], + arm64 = [], + ), + linkstatic = 1, + deps = [ + gtest, + "//test/util:logging", + "//test/util:platform_util", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "getcpu_test", + testonly = 1, + srcs = ["getcpu.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/time", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "getcpu_host_test", + testonly = 1, + srcs = ["getcpu.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/time", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "getrusage_test", + testonly = 1, + srcs = ["getrusage.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:memory_util", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "exec_binary_test", + testonly = 1, + srcs = ["exec_binary.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:proc_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "exec_test", + testonly = 1, + srcs = [ + "exec.cc", + "exec.h", + ], + data = [ + ":exec_assert_closed_workload", + ":exec_basic_workload", + ":exec_proc_exe_workload", + ":exec_state_workload", + ":exit_script", + ":priority_execve", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:optional", + gtest, + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "exit_test", + testonly = 1, + srcs = ["exit.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:time_util", + ], +) + +cc_binary( + name = "fallocate_test", + testonly = 1, + srcs = ["fallocate.cc"], + linkstatic = 1, + deps = [ + ":file_base", + ":socket_test_util", + "//test/util:cleanup", + "//test/util:eventfd_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "fault_test", + testonly = 1, + srcs = ["fault.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "fchdir_test", + testonly = 1, + srcs = ["fchdir.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "fcntl_test", + testonly = 1, + srcs = ["fcntl.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:cleanup", + "//test/util:epoll_util", + "//test/util:eventfd_util", + "//test/util:fs_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:save_util", + "//test/util:temp_path", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + ], +) + +cc_binary( + name = "flock_test", + testonly = 1, + srcs = [ + "file_base.h", + "flock.cc", + ], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:epoll_util", + "//test/util:eventfd_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + ], +) + +cc_binary( + name = "fork_test", + testonly = 1, + srcs = ["fork.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:memory_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "fpsig_fork_test", + testonly = 1, + srcs = ["fpsig_fork.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "fpsig_nested_test", + testonly = 1, + srcs = ["fpsig_nested.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "sync_file_range_test", + testonly = 1, + srcs = ["sync_file_range.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "fsync_test", + testonly = 1, + srcs = ["fsync.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "futex_test", + testonly = 1, + srcs = ["futex.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/time", + gtest, + "//test/util:memory_util", + "//test/util:save_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:time_util", + "//test/util:timer_util", + ], +) + +cc_binary( + name = "getdents_test", + testonly = 1, + srcs = ["getdents.cc"], + linkstatic = 1, + deps = [ + "//test/util:eventfd_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "getrandom_test", + testonly = 1, + srcs = ["getrandom.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "inotify_test", + testonly = 1, + srcs = ["inotify.cc"], + linkstatic = 1, + deps = [ + "//test/util:epoll_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "ioctl_test", + testonly = 1, + srcs = ["ioctl.cc"], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:file_descriptor", + gtest, + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_library( + name = "iptables_types", + testonly = 1, + hdrs = [ + "iptables.h", + ], +) + +cc_binary( + name = "iptables_test", + testonly = 1, + srcs = [ + "iptables.cc", + ], + linkstatic = 1, + deps = [ + ":iptables_types", + ":socket_test_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "itimer_test", + testonly = 1, + srcs = ["itimer.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + ], +) + +cc_binary( + name = "kill_test", + testonly = 1, + srcs = ["kill.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "link_test", + testonly = 1, + srcs = ["link.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "lseek_test", + testonly = 1, + srcs = ["lseek.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "madvise_test", + testonly = 1, + srcs = ["madvise.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:logging", + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "mempolicy_test", + testonly = 1, + srcs = ["mempolicy.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "@com_google_absl//absl/memory", + gtest, + "//test/util:memory_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "mincore_test", + testonly = 1, + srcs = ["mincore.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "mkdir_test", + testonly = 1, + srcs = ["mkdir.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:fs_util", + gtest, + "//test/util:temp_path", + "//test/util:temp_umask", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "mknod_test", + testonly = 1, + srcs = ["mknod.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "mlock_test", + testonly = 1, + srcs = ["mlock.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + gtest, + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:rlimit_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "mmap_test", + testonly = 1, + srcs = ["mmap.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "mount_test", + testonly = 1, + srcs = ["mount.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:mount_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "mremap_test", + testonly = 1, + srcs = ["mremap.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + "//test/util:logging", + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "msync_test", + testonly = 1, + srcs = ["msync.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "munmap_test", + testonly = 1, + srcs = ["munmap.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "open_test", + testonly = 1, + srcs = [ + "file_base.h", + "open.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "open_create_test", + testonly = 1, + srcs = ["open_create.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + gtest, + "//test/util:temp_path", + "//test/util:temp_umask", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "packet_socket_raw_test", + testonly = 1, + srcs = ["packet_socket_raw.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:endian", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "packet_socket_test", + testonly = 1, + srcs = ["packet_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:endian", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "pty_test", + testonly = 1, + srcs = ["pty.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, + "//test/util:posix_error", + "//test/util:pty_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "pty_root_test", + testonly = 1, + srcs = ["pty_root.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + gtest, + "//test/util:posix_error", + "//test/util:pty_util", + "//test/util:test_main", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "partial_bad_buffer_test", + testonly = 1, + srcs = ["partial_bad_buffer.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/time", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "pause_test", + testonly = 1, + srcs = ["pause.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "ping_socket_test", + testonly = 1, + srcs = ["ping_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + gtest, + "//test/util:save_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "pipe_test", + testonly = 1, + srcs = ["pipe.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "poll_test", + testonly = 1, + srcs = ["poll.cc"], + linkstatic = 1, + deps = [ + ":base_poll_test", + "//test/util:eventfd_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "ppoll_test", + testonly = 1, + srcs = ["ppoll.cc"], + linkstatic = 1, + deps = [ + ":base_poll_test", + "@com_google_absl//absl/time", + gtest, + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "arch_prctl_test", + testonly = 1, + srcs = select_arch( + amd64 = ["arch_prctl.cc"], + arm64 = [], + ), + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "prctl_test", + testonly = 1, + srcs = ["prctl.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "@com_google_absl//absl/flags:flag", + gtest, + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "prctl_setuid_test", + testonly = 1, + srcs = ["prctl_setuid.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "@com_google_absl//absl/flags:flag", + gtest, + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "pread64_test", + testonly = 1, + srcs = ["pread64.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "preadv_test", + testonly = 1, + srcs = ["preadv.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:memory_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + ], +) + +cc_binary( + name = "preadv2_test", + testonly = 1, + srcs = [ + "file_base.h", + "preadv2.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "priority_test", + testonly = 1, + srcs = ["priority.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "proc_test", + testonly = 1, + srcs = ["proc.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:time_util", + "//test/util:timer_util", + ], +) + +cc_binary( + name = "proc_net_test", + testonly = 1, + srcs = ["proc_net.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "proc_pid_oomscore_test", + testonly = 1, + srcs = ["proc_pid_oomscore.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + ], +) + +cc_binary( + name = "proc_pid_smaps_test", + testonly = 1, + srcs = ["proc_pid_smaps.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:optional", + gtest, + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:proc_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "proc_pid_uid_gid_map_test", + testonly = 1, + srcs = ["proc_pid_uid_gid_map.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:save_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:time_util", + ], +) + +cc_binary( + name = "pselect_test", + testonly = 1, + srcs = ["pselect.cc"], + linkstatic = 1, + deps = [ + ":base_poll_test", + "@com_google_absl//absl/time", + gtest, + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "ptrace_test", + testonly = 1, + srcs = ["ptrace.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:platform_util", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:time_util", + ], +) + +cc_binary( + name = "pwrite64_test", + testonly = 1, + srcs = ["pwrite64.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "pwritev2_test", + testonly = 1, + srcs = [ + "pwritev2.cc", + ], + linkstatic = 1, + deps = [ + ":file_base", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "raw_socket_hdrincl_test", + testonly = 1, + srcs = ["raw_socket_hdrincl.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:endian", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "raw_socket_test", + testonly = 1, + srcs = ["raw_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "raw_socket_icmp_test", + testonly = 1, + srcs = ["raw_socket_icmp.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "read_test", + testonly = 1, + srcs = ["read.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "readahead_test", + testonly = 1, + srcs = ["readahead.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "readv_test", + testonly = 1, + srcs = [ + "file_base.h", + "readv.cc", + "readv_common.cc", + "readv_common.h", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:timer_util", + ], +) + +cc_binary( + name = "readv_socket_test", + testonly = 1, + srcs = [ + "readv_common.cc", + "readv_common.h", + "readv_socket.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "rename_test", + testonly = 1, + srcs = ["rename.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "rlimits_test", + testonly = 1, + srcs = ["rlimits.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "rseq_test", + testonly = 1, + srcs = ["rseq.cc"], + data = ["//test/syscalls/linux/rseq"], + linkstatic = 1, + deps = [ + "//test/syscalls/linux/rseq:lib", + gtest, + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "rtsignal_test", + testonly = 1, + srcs = ["rtsignal.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + gtest, + "//test/util:logging", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_util", + ], +) + +cc_binary( + name = "sched_test", + testonly = 1, + srcs = ["sched.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "sched_yield_test", + testonly = 1, + srcs = ["sched_yield.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "seccomp_test", + testonly = 1, + srcs = ["seccomp.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/base:core_headers", + gtest, + "//test/util:logging", + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:proc_util", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "select_test", + testonly = 1, + srcs = ["select.cc"], + linkstatic = 1, + deps = [ + ":base_poll_test", + "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:rlimit_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "sendfile_test", + testonly = 1, + srcs = ["sendfile.cc"], + linkstatic = 1, + deps = [ + "//test/util:eventfd_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "sendfile_socket_test", + testonly = 1, + srcs = ["sendfile_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + ":ip_socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "splice_test", + testonly = 1, + srcs = ["splice.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "sigaction_test", + testonly = 1, + srcs = ["sigaction.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "sigaltstack_test", + testonly = 1, + srcs = ["sigaltstack.cc"], + data = [ + ":sigaltstack_check", + ], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:fs_util", + gtest, + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "sigiret_test", + testonly = 1, + srcs = select_arch( + amd64 = ["sigiret.cc"], + arm64 = [], + ), + linkstatic = 1, + deps = [ + gtest, + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:timer_util", + ] + select_arch( + amd64 = [], + arm64 = ["//test/util:test_main"], + ), +) + +cc_binary( + name = "signalfd_test", + testonly = 1, + srcs = ["signalfd.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/synchronization", + gtest, + "//test/util:logging", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "sigprocmask_test", + testonly = 1, + srcs = ["sigprocmask.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "sigstop_test", + testonly = 1, + srcs = ["sigstop.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "sigtimedwait_test", + testonly = 1, + srcs = ["sigtimedwait.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + ], +) + +cc_library( + name = "socket_generic_test_cases", + testonly = 1, + srcs = [ + "socket_generic.cc", + ], + hdrs = [ + "socket_generic.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_binary( + name = "socket_stress_test", + testonly = 1, + srcs = [ + "socket_generic_stress.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_library( + name = "socket_unix_dgram_test_cases", + testonly = 1, + srcs = ["socket_unix_dgram.cc"], + hdrs = ["socket_unix_dgram.h"], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_unix_seqpacket_test_cases", + testonly = 1, + srcs = ["socket_unix_seqpacket.cc"], + hdrs = ["socket_unix_seqpacket.h"], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_ip_tcp_generic_test_cases", + testonly = 1, + srcs = [ + "socket_ip_tcp_generic.cc", + ], + hdrs = [ + "socket_ip_tcp_generic.h", + ], + deps = [ + ":socket_test_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/time", + gtest, + "//test/util:test_util", + "//test/util:thread_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_non_blocking_test_cases", + testonly = 1, + srcs = [ + "socket_non_blocking.cc", + ], + hdrs = [ + "socket_non_blocking.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_unix_non_stream_test_cases", + testonly = 1, + srcs = [ + "socket_unix_non_stream.cc", + ], + hdrs = [ + "socket_unix_non_stream.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:memory_util", + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_non_stream_test_cases", + testonly = 1, + srcs = [ + "socket_non_stream.cc", + ], + hdrs = [ + "socket_non_stream.h", + ], + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_ip_udp_test_cases", + testonly = 1, + srcs = [ + "socket_ip_udp_generic.cc", + ], + hdrs = [ + "socket_ip_udp_generic.h", + ], + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_ipv4_udp_unbound_test_cases", + testonly = 1, + srcs = [ + "socket_ipv4_udp_unbound.cc", + ], + hdrs = [ + "socket_ipv4_udp_unbound.h", + ], + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + "@com_google_absl//absl/memory", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_ipv4_udp_unbound_external_networking_test_cases", + testonly = 1, + srcs = [ + "socket_ipv4_udp_unbound_external_networking.cc", + ], + hdrs = [ + "socket_ipv4_udp_unbound_external_networking.h", + ], + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_ipv4_tcp_unbound_external_networking_test_cases", + testonly = 1, + srcs = [ + "socket_ipv4_tcp_unbound_external_networking.cc", + ], + hdrs = [ + "socket_ipv4_tcp_unbound_external_networking.h", + ], + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_binary( + name = "socket_abstract_test", + testonly = 1, + srcs = [ + "socket_abstract.cc", + ], + linkstatic = 1, + deps = [ + ":socket_generic_test_cases", + ":socket_test_util", + ":socket_unix_cmsg_test_cases", + ":socket_unix_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_abstract_non_blocking_test", + testonly = 1, + srcs = [ + "socket_unix_abstract_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_dgram_local_test", + testonly = 1, + srcs = ["socket_unix_dgram_local.cc"], + linkstatic = 1, + deps = [ + ":socket_non_stream_test_cases", + ":socket_test_util", + ":socket_unix_dgram_test_cases", + ":socket_unix_non_stream_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_dgram_non_blocking_test", + testonly = 1, + srcs = ["socket_unix_dgram_non_blocking.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_seqpacket_local_test", + testonly = 1, + srcs = [ + "socket_unix_seqpacket_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_stream_test_cases", + ":socket_test_util", + ":socket_unix_non_stream_test_cases", + ":socket_unix_seqpacket_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_stream_test", + testonly = 1, + srcs = ["socket_unix_stream.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_tcp_generic_loopback_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_generic_loopback.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_ip_tcp_generic_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_tcp_udp_generic_loopback_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_udp_generic.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_tcp_loopback_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_loopback.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_generic_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_tcp_loopback_non_blocking_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_loopback_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_non_blocking_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_udp_loopback_test", + testonly = 1, + srcs = [ + "socket_ip_udp_loopback.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_generic_test_cases", + ":socket_ip_udp_test_cases", + ":socket_non_stream_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ipv4_udp_unbound_external_networking_test", + testonly = 1, + srcs = [ + "socket_ipv4_udp_unbound_external_networking_test.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_ipv4_udp_unbound_external_networking_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ipv4_tcp_unbound_external_networking_test", + testonly = 1, + srcs = [ + "socket_ipv4_tcp_unbound_external_networking_test.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_ipv4_tcp_unbound_external_networking_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_bind_to_device_test", + testonly = 1, + srcs = [ + "socket_bind_to_device.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_bind_to_device_util", + ":socket_test_util", + "//test/util:capability_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "socket_bind_to_device_sequence_test", + testonly = 1, + srcs = [ + "socket_bind_to_device_sequence.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_bind_to_device_util", + ":socket_test_util", + "//test/util:capability_util", + "@com_google_absl//absl/container:node_hash_map", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "socket_bind_to_device_distribution_test", + testonly = 1, + srcs = [ + "socket_bind_to_device_distribution.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_bind_to_device_util", + ":socket_test_util", + "//test/util:capability_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "socket_ip_udp_loopback_non_blocking_test", + testonly = 1, + srcs = [ + "socket_ip_udp_loopback_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_non_blocking_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ipv4_udp_unbound_loopback_test", + testonly = 1, + srcs = [ + "socket_ipv4_udp_unbound_loopback.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_ipv4_udp_unbound_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_unbound_test", + testonly = 1, + srcs = [ + "socket_ip_unbound.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_domain_test", + testonly = 1, + srcs = [ + "socket_unix_domain.cc", + ], + linkstatic = 1, + deps = [ + ":socket_generic_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_domain_non_blocking_test", + testonly = 1, + srcs = [ + "socket_unix_pair_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_filesystem_test", + testonly = 1, + srcs = [ + "socket_filesystem.cc", + ], + linkstatic = 1, + deps = [ + ":socket_generic_test_cases", + ":socket_test_util", + ":socket_unix_cmsg_test_cases", + ":socket_unix_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_filesystem_non_blocking_test", + testonly = 1, + srcs = [ + "socket_unix_filesystem_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_inet_loopback_test", + testonly = 1, + srcs = ["socket_inet_loopback.cc"], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:posix_error", + "//test/util:save_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "socket_inet_loopback_nogotsan_test", + testonly = 1, + srcs = ["socket_inet_loopback_nogotsan.cc"], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:save_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "socket_netlink_test", + testonly = 1, + srcs = ["socket_netlink.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_netlink_route_test", + testonly = 1, + srcs = ["socket_netlink_route.cc"], + linkstatic = 1, + deps = [ + ":socket_netlink_route_util", + ":socket_netlink_util", + ":socket_test_util", + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings:str_format", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_netlink_uevent_test", + testonly = 1, + srcs = ["socket_netlink_uevent.cc"], + linkstatic = 1, + deps = [ + ":socket_netlink_util", + ":socket_test_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +# These socket tests are in a library because the test cases are shared +# across several test build targets. +cc_library( + name = "socket_stream_test_cases", + testonly = 1, + srcs = [ + "socket_stream.cc", + ], + hdrs = [ + "socket_stream.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_blocking_test_cases", + testonly = 1, + srcs = [ + "socket_blocking.cc", + ], + hdrs = [ + "socket_blocking.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_unix_test_cases", + testonly = 1, + srcs = [ + "socket_unix.cc", + ], + hdrs = [ + "socket_unix.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:test_util", + "//test/util:thread_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_unix_cmsg_test_cases", + testonly = 1, + srcs = [ + "socket_unix_cmsg.cc", + ], + hdrs = [ + "socket_unix_cmsg.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:test_util", + "//test/util:thread_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_stream_blocking_test_cases", + testonly = 1, + srcs = [ + "socket_stream_blocking.cc", + ], + hdrs = [ + "socket_stream_blocking.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_stream_nonblocking_test_cases", + testonly = 1, + srcs = [ + "socket_stream_nonblock.cc", + ], + hdrs = [ + "socket_stream_nonblock.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_non_stream_blocking_test_cases", + testonly = 1, + srcs = [ + "socket_non_stream_blocking.cc", + ], + hdrs = [ + "socket_non_stream_blocking.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, + "//test/util:test_util", + "//test/util:thread_util", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_bind_to_device_util", + testonly = 1, + srcs = [ + "socket_bind_to_device_util.cc", + ], + hdrs = [ + "socket_bind_to_device_util.h", + ], + deps = [ + "//test/util:test_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +cc_binary( + name = "socket_stream_local_test", + testonly = 1, + srcs = [ + "socket_unix_stream_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_stream_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_stream_blocking_local_test", + testonly = 1, + srcs = [ + "socket_unix_stream_blocking_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_stream_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_stream_blocking_tcp_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_loopback_blocking.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_stream_blocking_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_stream_nonblock_local_test", + testonly = 1, + srcs = [ + "socket_unix_stream_nonblock_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_stream_nonblocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_unbound_dgram_test", + testonly = 1, + srcs = ["socket_unix_unbound_dgram.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_unbound_abstract_test", + testonly = 1, + srcs = ["socket_unix_unbound_abstract.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_unbound_filesystem_test", + testonly = 1, + srcs = ["socket_unix_unbound_filesystem.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_blocking_local_test", + testonly = 1, + srcs = [ + "socket_unix_blocking_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_blocking_ip_test", + testonly = 1, + srcs = [ + "socket_ip_loopback_blocking.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_blocking_test_cases", + ":socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_non_stream_blocking_local_test", + testonly = 1, + srcs = [ + "socket_unix_non_stream_blocking_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_stream_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_non_stream_blocking_udp_test", + testonly = 1, + srcs = [ + "socket_ip_udp_loopback_blocking.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_non_stream_blocking_test_cases", + ":socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_pair_test", + testonly = 1, + srcs = [ + "socket_unix_pair.cc", + ], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":socket_unix_cmsg_test_cases", + ":socket_unix_test_cases", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_unbound_seqpacket_test", + testonly = 1, + srcs = ["socket_unix_unbound_seqpacket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_unbound_stream_test", + testonly = 1, + srcs = ["socket_unix_unbound_stream.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_netdevice_test", + testonly = 1, + srcs = ["socket_netdevice.cc"], + linkstatic = 1, + deps = [ + ":socket_netlink_util", + ":socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/base:endian", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "stat_test", + testonly = 1, + srcs = [ + "file_base.h", + "stat.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "stat_times_test", + testonly = 1, + srcs = ["stat_times.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "statfs_test", + testonly = 1, + srcs = [ + "file_base.h", + "statfs.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "symlink_test", + testonly = 1, + srcs = ["symlink.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/time", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "sync_test", + testonly = 1, + srcs = ["sync.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "sysinfo_test", + testonly = 1, + srcs = ["sysinfo.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/time", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "syslog_test", + testonly = 1, + srcs = ["syslog.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "sysret_test", + testonly = 1, + srcs = ["sysret.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "tcp_socket_test", + testonly = 1, + srcs = ["tcp_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "tgkill_test", + testonly = 1, + srcs = ["tgkill.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "time_test", + testonly = 1, + srcs = ["time.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:proc_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "timerfd_test", + testonly = 1, + srcs = ["timerfd.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "timers_test", + testonly = 1, + srcs = ["timers.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "tkill_test", + testonly = 1, + srcs = ["tkill.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "truncate_test", + testonly = 1, + srcs = ["truncate.cc"], + linkstatic = 1, + deps = [ + ":file_base", + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "tuntap_test", + testonly = 1, + srcs = ["tuntap.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + gtest, + ":socket_netlink_route_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + ], +) + +cc_binary( + name = "tuntap_hostinet_test", + testonly = 1, + srcs = ["tuntap_hostinet.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_library( + name = "udp_socket_test_cases", + testonly = 1, + srcs = [ + "udp_socket_errqueue_test_case.cc", + "udp_socket_test_cases.cc", + ], + hdrs = ["udp_socket_test_cases.h"], + defines = select_system(), + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + ":unix_domain_socket_test_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + gtest, + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], + alwayslink = 1, +) + +cc_binary( + name = "udp_socket_test", + testonly = 1, + srcs = ["udp_socket.cc"], + linkstatic = 1, + deps = [ + ":udp_socket_test_cases", + ], +) + +cc_binary( + name = "udp_bind_test", + testonly = 1, + srcs = ["udp_bind.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "uidgid_test", + testonly = 1, + srcs = ["uidgid.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:uid_util", + ], +) + +cc_binary( + name = "uname_test", + testonly = 1, + srcs = ["uname.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "unlink_test", + testonly = 1, + srcs = ["unlink.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "unshare_test", + testonly = 1, + srcs = ["unshare.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/synchronization", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "utimes_test", + testonly = 1, + srcs = ["utimes.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "vdso_test", + testonly = 1, + srcs = ["vdso.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + gtest, + "//test/util:posix_error", + "//test/util:proc_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "vfork_test", + testonly = 1, + srcs = ["vfork.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:test_util", + "//test/util:time_util", + ], +) + +cc_binary( + name = "wait_test", + testonly = 1, + srcs = ["wait.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:time_util", + ], +) + +cc_binary( + name = "write_test", + testonly = 1, + srcs = ["write.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "memory_accounting_test", + testonly = 1, + srcs = ["memory_accounting.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + gtest, + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "network_namespace_test", + testonly = 1, + srcs = ["network_namespace.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + gtest, + "//test/util:capability_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "semaphore_test", + testonly = 1, + srcs = ["semaphore.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "shm_test", + testonly = 1, + srcs = ["shm.cc"], + linkstatic = 1, + deps = [ + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "fadvise64_test", + testonly = 1, + srcs = ["fadvise64.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + gtest, + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "vdso_clock_gettime_test", + testonly = 1, + srcs = ["vdso_clock_gettime.cc"], + linkstatic = 1, + deps = [ + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "vsyscall_test", + testonly = 1, + srcs = ["vsyscall.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:proc_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "proc_net_unix_test", + testonly = 1, + srcs = ["proc_net_unix.cc"], + linkstatic = 1, + deps = [ + ":unix_domain_socket_test_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "memfd_test", + testonly = 1, + srcs = ["memfd.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + gtest, + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "proc_net_tcp_test", + testonly = 1, + srcs = ["proc_net_tcp.cc"], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "proc_net_udp_test", + testonly = 1, + srcs = ["proc_net_udp.cc"], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "xattr_test", + testonly = 1, + srcs = [ + "file_base.h", + "xattr.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) diff --git a/test/syscalls/linux/accept_bind.cc b/test/syscalls/linux/accept_bind.cc new file mode 100644 index 000000000..f65a14fb8 --- /dev/null +++ b/test/syscalls/linux/accept_bind.cc @@ -0,0 +1,641 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include <algorithm> +#include <vector> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(AllSocketPairTest, Listen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, ListenIncreaseBacklog) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5), + SyscallSucceeds()); + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 10), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, ListenDecreaseBacklog) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5), + SyscallSucceeds()); + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 1), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, ListenWithoutBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(listen(sockets->first_fd(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, DoubleBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, BindListenBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, DoubleListen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, DoubleConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EISCONN)); +} + +TEST_P(AllSocketPairTest, Connect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, ConnectWithWrongType) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int type; + socklen_t typelen = sizeof(type); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_TYPE, &type, &typelen), + SyscallSucceeds()); + switch (type) { + case SOCK_STREAM: + type = SOCK_SEQPACKET; + break; + case SOCK_SEQPACKET: + type = SOCK_STREAM; + break; + } + + const FileDescriptor another_socket = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, type, 0)); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + if (sockets->first_addr()->sa_data[0] != 0) { + ASSERT_THAT(connect(another_socket.get(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EPROTOTYPE)); + } else { + ASSERT_THAT(connect(another_socket.get(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(ECONNREFUSED)); + } + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, ConnectNonListening) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +TEST_P(AllSocketPairTest, ConnectToFilePath) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + constexpr char kPath[] = "/tmp"; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + ASSERT_THAT( + connect(sockets->second_fd(), + reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +TEST_P(AllSocketPairTest, ConnectToInvalidAbstractPath) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + constexpr char kPath[] = "\0nonexistent"; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + ASSERT_THAT( + connect(sockets->second_fd(), + reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +TEST_P(AllSocketPairTest, SelfConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, ConnectWithoutListen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +TEST_P(AllSocketPairTest, Accept) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + ASSERT_THAT(close(accepted), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, AcceptValidAddrLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + struct sockaddr_un addr = {}; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + accepted = accept(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), &addr_len), + SyscallSucceeds()); + ASSERT_THAT(close(accepted), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, AcceptNegativeAddrLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // With a negative addr_len, accept returns EINVAL, + struct sockaddr_un addr = {}; + socklen_t addr_len = -1; + ASSERT_THAT(accept(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), &addr_len), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, AcceptLargePositiveAddrLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // With a large (positive) addr_len, accept does not return EINVAL. + int accepted = -1; + char addr_buf[200]; + socklen_t addr_len = sizeof(addr_buf); + ASSERT_THAT(accepted = accept(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(addr_buf), + &addr_len), + SyscallSucceeds()); + // addr_len should have been updated by accept(). + EXPECT_LT(addr_len, sizeof(addr_buf)); + ASSERT_THAT(close(accepted), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, AcceptVeryLargePositiveAddrLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // With a large (positive) addr_len, accept does not return EINVAL. + int accepted = -1; + char addr_buf[2000]; + socklen_t addr_len = sizeof(addr_buf); + ASSERT_THAT(accepted = accept(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(addr_buf), + &addr_len), + SyscallSucceeds()); + // addr_len should have been updated by accept(). + EXPECT_LT(addr_len, sizeof(addr_buf)); + ASSERT_THAT(close(accepted), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, AcceptWithoutBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, AcceptWithoutListen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, GetRemoteAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + socklen_t addr_len = sockets->first_addr_size(); + struct sockaddr_storage addr = {}; + ASSERT_THAT( + getpeername(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, sockets->first_addr_len()); + EXPECT_EQ(0, memcmp(&addr, sockets->first_addr(), sockets->first_addr_len())); +} + +TEST_P(AllSocketPairTest, UnboundGetLocalAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + socklen_t addr_len = sockets->first_addr_size(); + struct sockaddr_storage addr = {}; + ASSERT_THAT( + getsockname(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, 2); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +TEST_P(AllSocketPairTest, BoundGetLocalAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + socklen_t addr_len = sockets->first_addr_size(); + struct sockaddr_storage addr = {}; + ASSERT_THAT( + getsockname(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +TEST_P(AllSocketPairTest, BoundConnector) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, UnboundSenderAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor accepted_fd(accepted); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(i))); + EXPECT_EQ(addr_len, 0); +} + +TEST_P(AllSocketPairTest, BoundSenderAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor accepted_fd(accepted); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(i))); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +TEST_P(AllSocketPairTest, BindAfterConnectSenderAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor accepted_fd(accepted); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(i))); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +TEST_P(AllSocketPairTest, BindAfterAcceptSenderAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor accepted_fd(accepted); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(i))); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/accept_bind_stream.cc b/test/syscalls/linux/accept_bind_stream.cc new file mode 100644 index 000000000..4857f160b --- /dev/null +++ b/test/syscalls/linux/accept_bind_stream.cc @@ -0,0 +1,92 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> + +#include <algorithm> +#include <vector> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(AllSocketPairTest, BoundSenderAddrCoalesced) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor closer(accepted); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + int ri[2] = {0, 0}; + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted, ri, sizeof(ri), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(ri))); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>(FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{ + 0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/access.cc b/test/syscalls/linux/access.cc new file mode 100644 index 000000000..bcc25cef4 --- /dev/null +++ b/test/syscalls/linux/access.cc @@ -0,0 +1,170 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::Ge; + +namespace gvisor { +namespace testing { + +namespace { + +class AccessTest : public ::testing::Test { + public: + std::string CreateTempFile(int perm) { + const std::string path = NewTempAbsPath(); + const int fd = open(path.c_str(), O_CREAT | O_RDONLY, perm); + TEST_PCHECK(fd > 0); + TEST_PCHECK(close(fd) == 0); + return path; + } + + protected: + // SetUp creates various configurations of files. + void SetUp() override { + // Move to the temporary directory. This allows us to reason more easily + // about absolute and relative paths. + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + + // Create an empty file, standard permissions. + relfile_ = NewTempRelPath(); + int fd; + ASSERT_THAT(fd = open(relfile_.c_str(), O_CREAT | O_TRUNC, 0644), + SyscallSucceedsWithValue(Ge(0))); + ASSERT_THAT(close(fd), SyscallSucceeds()); + absfile_ = GetAbsoluteTestTmpdir() + "/" + relfile_; + + // Create an empty directory, no writable permissions. + absdir_ = NewTempAbsPath(); + reldir_ = JoinPath(Basename(absdir_), ""); + ASSERT_THAT(mkdir(reldir_.c_str(), 0555), SyscallSucceeds()); + + // This file doesn't exist. + relnone_ = NewTempRelPath(); + absnone_ = GetAbsoluteTestTmpdir() + "/" + relnone_; + } + + // TearDown unlinks created files. + void TearDown() override { + ASSERT_THAT(unlink(absfile_.c_str()), SyscallSucceeds()); + ASSERT_THAT(rmdir(absdir_.c_str()), SyscallSucceeds()); + } + + std::string relfile_; + std::string reldir_; + + std::string absfile_; + std::string absdir_; + + std::string relnone_; + std::string absnone_; +}; + +TEST_F(AccessTest, RelativeFile) { + EXPECT_THAT(access(relfile_.c_str(), R_OK), SyscallSucceeds()); +} + +TEST_F(AccessTest, RelativeDir) { + EXPECT_THAT(access(reldir_.c_str(), R_OK | X_OK), SyscallSucceeds()); +} + +TEST_F(AccessTest, AbsFile) { + EXPECT_THAT(access(absfile_.c_str(), R_OK), SyscallSucceeds()); +} + +TEST_F(AccessTest, AbsDir) { + EXPECT_THAT(access(absdir_.c_str(), R_OK | X_OK), SyscallSucceeds()); +} + +TEST_F(AccessTest, RelDoesNotExist) { + EXPECT_THAT(access(relnone_.c_str(), R_OK), SyscallFailsWithErrno(ENOENT)); +} + +TEST_F(AccessTest, AbsDoesNotExist) { + EXPECT_THAT(access(absnone_.c_str(), R_OK), SyscallFailsWithErrno(ENOENT)); +} + +TEST_F(AccessTest, InvalidMode) { + EXPECT_THAT(access(relfile_.c_str(), 0xffffffff), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(AccessTest, NoPerms) { + // Drop capabilities that allow us to override permissions. We must drop + // PERMITTED because access() checks those instead of EFFECTIVE. + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE)); + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH)); + + EXPECT_THAT(access(absdir_.c_str(), W_OK), SyscallFailsWithErrno(EACCES)); +} + +TEST_F(AccessTest, InvalidName) { + EXPECT_THAT(access(reinterpret_cast<char*>(0x1234), W_OK), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(AccessTest, UsrReadOnly) { + // Drop capabilities that allow us to override permissions. We must drop + // PERMITTED because access() checks those instead of EFFECTIVE. + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE)); + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH)); + + const std::string filename = CreateTempFile(0400); + EXPECT_THAT(access(filename.c_str(), R_OK), SyscallSucceeds()); + EXPECT_THAT(access(filename.c_str(), W_OK), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(access(filename.c_str(), X_OK), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +TEST_F(AccessTest, UsrReadExec) { + // Drop capabilities that allow us to override permissions. We must drop + // PERMITTED because access() checks those instead of EFFECTIVE. + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE)); + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH)); + + const std::string filename = CreateTempFile(0500); + EXPECT_THAT(access(filename.c_str(), R_OK | X_OK), SyscallSucceeds()); + EXPECT_THAT(access(filename.c_str(), W_OK), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +TEST_F(AccessTest, UsrReadWrite) { + const std::string filename = CreateTempFile(0600); + EXPECT_THAT(access(filename.c_str(), R_OK | W_OK), SyscallSucceeds()); + EXPECT_THAT(access(filename.c_str(), X_OK), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +TEST_F(AccessTest, UsrReadWriteExec) { + const std::string filename = CreateTempFile(0700); + EXPECT_THAT(access(filename.c_str(), R_OK | W_OK | X_OK), SyscallSucceeds()); + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/affinity.cc b/test/syscalls/linux/affinity.cc new file mode 100644 index 000000000..128364c34 --- /dev/null +++ b/test/syscalls/linux/affinity.cc @@ -0,0 +1,242 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/strings/str_split.h" +#include "test/util/cleanup.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +// These tests are for both the sched_getaffinity(2) and sched_setaffinity(2) +// syscalls. +class AffinityTest : public ::testing::Test { + protected: + void SetUp() override { + EXPECT_THAT( + // Needs use the raw syscall to get the actual size. + cpuset_size_ = syscall(SYS_sched_getaffinity, /*pid=*/0, + sizeof(cpu_set_t), &mask_), + SyscallSucceeds()); + // Lots of tests rely on having more than 1 logical processor available. + EXPECT_GT(CPU_COUNT(&mask_), 1); + } + + static PosixError ClearLowestBit(cpu_set_t* mask, size_t cpus) { + const size_t mask_size = CPU_ALLOC_SIZE(cpus); + for (size_t n = 0; n < cpus; ++n) { + if (CPU_ISSET_S(n, mask_size, mask)) { + CPU_CLR_S(n, mask_size, mask); + return NoError(); + } + } + return PosixError(EINVAL, "No bit to clear, mask is empty"); + } + + PosixError ClearLowestBit() { return ClearLowestBit(&mask_, CPU_SETSIZE); } + + // Stores the initial cpu mask for this process. + cpu_set_t mask_ = {}; + int cpuset_size_ = 0; +}; + +// sched_getaffinity(2) is implemented. +TEST_F(AffinityTest, SchedGetAffinityImplemented) { + EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallSucceeds()); +} + +// PID is not found. +TEST_F(AffinityTest, SchedGetAffinityInvalidPID) { + // Flaky, but it's tough to avoid a race condition when finding an unused pid + EXPECT_THAT(sched_getaffinity(/*pid=*/INT_MAX - 1, sizeof(cpu_set_t), &mask_), + SyscallFailsWithErrno(ESRCH)); +} + +// PID is not found. +TEST_F(AffinityTest, SchedSetAffinityInvalidPID) { + // Flaky, but it's tough to avoid a race condition when finding an unused pid + EXPECT_THAT(sched_setaffinity(/*pid=*/INT_MAX - 1, sizeof(cpu_set_t), &mask_), + SyscallFailsWithErrno(ESRCH)); +} + +TEST_F(AffinityTest, SchedSetAffinityZeroMask) { + CPU_ZERO(&mask_); + EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallFailsWithErrno(EINVAL)); +} + +// N.B. This test case relies on cpuset_size_ larger than the actual number of +// of all existing CPUs. Check your machine if the test fails. +TEST_F(AffinityTest, SchedSetAffinityNonexistentCPUDropped) { + cpu_set_t mask = mask_; + // Add a nonexistent CPU. + // + // The number needs to be larger than the possible number of CPU available, + // but smaller than the number of the CPU that the kernel claims to support -- + // it's implicitly returned by raw sched_getaffinity syscall. + CPU_SET(cpuset_size_ * 8 - 1, &mask); + EXPECT_THAT( + // Use raw syscall because it will be rejected by the libc wrapper + // otherwise. + syscall(SYS_sched_setaffinity, /*pid=*/0, sizeof(cpu_set_t), &mask), + SyscallSucceeds()) + << "failed with cpumask : " << CPUSetToString(mask) + << ", cpuset_size_ : " << cpuset_size_; + cpu_set_t newmask; + EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &newmask), + SyscallSucceeds()); + EXPECT_TRUE(CPU_EQUAL(&mask_, &newmask)) + << "got: " << CPUSetToString(newmask) + << " != expected: " << CPUSetToString(mask_); +} + +TEST_F(AffinityTest, SchedSetAffinityOnlyNonexistentCPUFails) { + // Make an empty cpu set. + CPU_ZERO(&mask_); + // Add a nonexistent CPU. + // + // The number needs to be larger than the possible number of CPU available, + // but smaller than the number of the CPU that the kernel claims to support -- + // it's implicitly returned by raw sched_getaffinity syscall. + int cpu = cpuset_size_ * 8 - 1; + if (cpu <= NumCPUs()) { + GTEST_SKIP() << "Skipping test: cpu " << cpu << " exists"; + } + CPU_SET(cpu, &mask_); + EXPECT_THAT( + // Use raw syscall because it will be rejected by the libc wrapper + // otherwise. + syscall(SYS_sched_setaffinity, /*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(AffinityTest, SchedSetAffinityInvalidSize) { + EXPECT_GT(cpuset_size_, 0); + // Not big enough. + EXPECT_THAT(sched_getaffinity(/*pid=*/0, cpuset_size_ - 1, &mask_), + SyscallFailsWithErrno(EINVAL)); + // Not a multiple of word size. + EXPECT_THAT(sched_getaffinity(/*pid=*/0, cpuset_size_ + 1, &mask_), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(AffinityTest, Sanity) { + ASSERT_NO_ERRNO(ClearLowestBit()); + EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallSucceeds()); + cpu_set_t newmask; + EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &newmask), + SyscallSucceeds()); + EXPECT_TRUE(CPU_EQUAL(&mask_, &newmask)) + << "got: " << CPUSetToString(newmask) + << " != expected: " << CPUSetToString(mask_); +} + +TEST_F(AffinityTest, NewThread) { + SKIP_IF(CPU_COUNT(&mask_) < 3); + ASSERT_NO_ERRNO(ClearLowestBit()); + ASSERT_NO_ERRNO(ClearLowestBit()); + EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallSucceeds()); + ScopedThread([this]() { + cpu_set_t child_mask; + ASSERT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask), + SyscallSucceeds()); + ASSERT_TRUE(CPU_EQUAL(&child_mask, &mask_)) + << "child cpu mask: " << CPUSetToString(child_mask) + << " != parent cpu mask: " << CPUSetToString(mask_); + }); +} + +TEST_F(AffinityTest, ConsistentWithProcCpuInfo) { + // Count how many cpus are shown in /proc/cpuinfo. + std::string cpuinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo")); + int count = 0; + for (auto const& line : absl::StrSplit(cpuinfo, '\n')) { + if (absl::StartsWith(line, "processor")) { + count++; + } + } + EXPECT_GE(count, CPU_COUNT(&mask_)); +} + +TEST_F(AffinityTest, ConsistentWithProcStat) { + // Count how many cpus are shown in /proc/stat. + std::string stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + int count = 0; + for (auto const& line : absl::StrSplit(stat, '\n')) { + if (absl::StartsWith(line, "cpu") && !absl::StartsWith(line, "cpu ")) { + count++; + } + } + EXPECT_GE(count, CPU_COUNT(&mask_)); +} + +TEST_F(AffinityTest, SmallCpuMask) { + const int num_cpus = NumCPUs(); + const size_t mask_size = CPU_ALLOC_SIZE(num_cpus); + cpu_set_t* mask = CPU_ALLOC(num_cpus); + ASSERT_NE(mask, nullptr); + const auto free_mask = Cleanup([&] { CPU_FREE(mask); }); + + CPU_ZERO_S(mask_size, mask); + ASSERT_THAT(sched_getaffinity(0, mask_size, mask), SyscallSucceeds()); +} + +TEST_F(AffinityTest, LargeCpuMask) { + // Allocate mask bigger than cpu_set_t normally allocates. + const size_t cpus = CPU_SETSIZE * 8; + const size_t mask_size = CPU_ALLOC_SIZE(cpus); + + cpu_set_t* large_mask = CPU_ALLOC(cpus); + auto free_mask = Cleanup([large_mask] { CPU_FREE(large_mask); }); + CPU_ZERO_S(mask_size, large_mask); + + // Check that get affinity with large mask works as expected. + ASSERT_THAT(sched_getaffinity(/*pid=*/0, mask_size, large_mask), + SyscallSucceeds()); + EXPECT_TRUE(CPU_EQUAL(&mask_, large_mask)) + << "got: " << CPUSetToString(*large_mask, cpus) + << " != expected: " << CPUSetToString(mask_); + + // Check that set affinity with large mask works as expected. + ASSERT_NO_ERRNO(ClearLowestBit(large_mask, cpus)); + EXPECT_THAT(sched_setaffinity(/*pid=*/0, mask_size, large_mask), + SyscallSucceeds()); + + cpu_set_t* new_mask = CPU_ALLOC(cpus); + auto free_new_mask = Cleanup([new_mask] { CPU_FREE(new_mask); }); + CPU_ZERO_S(mask_size, new_mask); + EXPECT_THAT(sched_getaffinity(/*pid=*/0, mask_size, new_mask), + SyscallSucceeds()); + + EXPECT_TRUE(CPU_EQUAL_S(mask_size, large_mask, new_mask)) + << "got: " << CPUSetToString(*new_mask, cpus) + << " != expected: " << CPUSetToString(*large_mask, cpus); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/aio.cc b/test/syscalls/linux/aio.cc new file mode 100644 index 000000000..806d5729e --- /dev/null +++ b/test/syscalls/linux/aio.cc @@ -0,0 +1,430 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <linux/aio_abi.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <string> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/proc_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::_; + +namespace gvisor { +namespace testing { +namespace { + +// Returns the size of the VMA containing the given address. +PosixErrorOr<size_t> VmaSizeAt(uintptr_t addr) { + ASSIGN_OR_RETURN_ERRNO(std::string proc_self_maps, + GetContents("/proc/self/maps")); + ASSIGN_OR_RETURN_ERRNO(auto entries, ParseProcMaps(proc_self_maps)); + // Use binary search to find the first VMA that might contain addr. + ProcMapsEntry target = {}; + target.end = addr; + auto it = + std::upper_bound(entries.begin(), entries.end(), target, + [](const ProcMapsEntry& x, const ProcMapsEntry& y) { + return x.end < y.end; + }); + // Check that it actually contains addr. + if (it == entries.end() || addr < it->start) { + return PosixError(ENOENT, absl::StrCat("no VMA contains address ", addr)); + } + return it->end - it->start; +} + +constexpr char kData[] = "hello world!"; + +int SubmitCtx(aio_context_t ctx, long nr, struct iocb** iocbpp) { + return syscall(__NR_io_submit, ctx, nr, iocbpp); +} + +class AIOTest : public FileTest { + public: + AIOTest() : ctx_(0) {} + + int SetupContext(unsigned int nr) { + return syscall(__NR_io_setup, nr, &ctx_); + } + + int Submit(long nr, struct iocb** iocbpp) { + return SubmitCtx(ctx_, nr, iocbpp); + } + + int GetEvents(long min, long max, struct io_event* events, + struct timespec* timeout) { + return RetryEINTR(syscall)(__NR_io_getevents, ctx_, min, max, events, + timeout); + } + + int DestroyContext() { return syscall(__NR_io_destroy, ctx_); } + + void TearDown() override { + FileTest::TearDown(); + if (ctx_ != 0) { + ASSERT_THAT(DestroyContext(), SyscallSucceeds()); + ctx_ = 0; + } + } + + struct iocb CreateCallback() { + struct iocb cb = {}; + cb.aio_data = 0x123; + cb.aio_fildes = test_file_fd_.get(); + cb.aio_lio_opcode = IOCB_CMD_PWRITE; + cb.aio_buf = reinterpret_cast<uint64_t>(kData); + cb.aio_offset = 0; + cb.aio_nbytes = strlen(kData); + return cb; + } + + protected: + aio_context_t ctx_; +}; + +TEST_F(AIOTest, BasicWrite) { + // Copied from fs/aio.c. + constexpr unsigned AIO_RING_MAGIC = 0xa10a10a1; + struct aio_ring { + unsigned id; + unsigned nr; + unsigned head; + unsigned tail; + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; + struct io_event io_events[0]; + }; + + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + // Check that 'ctx_' points to a valid address. libaio uses it to check if + // aio implementation uses aio_ring. gVisor doesn't and returns all zeroes. + // Linux implements aio_ring, so skip the zeroes check. + // + // TODO(gvisor.dev/issue/204): Remove when gVisor implements aio_ring. + auto ring = reinterpret_cast<struct aio_ring*>(ctx_); + auto magic = IsRunningOnGvisor() ? 0 : AIO_RING_MAGIC; + EXPECT_EQ(ring->magic, magic); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Submit the request. + ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); + + // Get the reply. + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); + + // Verify that it is as expected. + EXPECT_EQ(events[0].data, 0x123); + EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb)); + EXPECT_EQ(events[0].res, strlen(kData)); + + // Verify that the file contains the contents. + char verify_buf[sizeof(kData)] = {}; + ASSERT_THAT(read(test_file_fd_.get(), verify_buf, sizeof(kData)), + SyscallSucceedsWithValue(strlen(kData))); + EXPECT_STREQ(verify_buf, kData); +} + +TEST_F(AIOTest, BadWrite) { + // Create a pipe and immediately close the read end. + int pipefd[2]; + ASSERT_THAT(pipe(pipefd), SyscallSucceeds()); + + FileDescriptor rfd(pipefd[0]); + FileDescriptor wfd(pipefd[1]); + + rfd.reset(); // Close the read end. + + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + // Try to write to the read end. + cb.aio_fildes = wfd.get(); + struct iocb* cbs[1] = {&cb}; + + // Submit the request. + ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); + + // Get the reply. + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); + + // Verify that it fails with the right error code. + EXPECT_EQ(events[0].data, 0x123); + EXPECT_EQ(events[0].obj, reinterpret_cast<uint64_t>(&cb)); + EXPECT_LT(events[0].res, 0); +} + +TEST_F(AIOTest, ExitWithPendingIo) { + // Setup a context that is 100 entries deep. + ASSERT_THAT(SetupContext(100), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[] = {&cb}; + + // Submit a request but don't complete it to make it pending. + for (int i = 0; i < 100; ++i) { + EXPECT_THAT(Submit(1, cbs), SyscallSucceeds()); + } + + ASSERT_THAT(DestroyContext(), SyscallSucceeds()); + ctx_ = 0; +} + +int Submitter(void* arg) { + auto test = reinterpret_cast<AIOTest*>(arg); + + struct iocb cb = test->CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Submit the request. + TEST_CHECK(test->Submit(1, cbs) == 1); + return 0; +} + +TEST_F(AIOTest, CloneVm) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + const size_t kStackSize = 5 * kPageSize; + std::unique_ptr<char[]> stack(new char[kStackSize]); + char* bp = stack.get() + kStackSize; + pid_t child; + ASSERT_THAT(child = clone(Submitter, bp, CLONE_VM | SIGCHLD, + reinterpret_cast<void*>(this)), + SyscallSucceeds()); + + // Get the reply. + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); + + // Verify that it is as expected. + EXPECT_EQ(events[0].data, 0x123); + EXPECT_EQ(events[0].res, strlen(kData)); + + // Verify that the file contains the contents. + char verify_buf[32] = {}; + ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)), + SyscallSucceeds()); + EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +// Tests that AIO context can be remapped to a different address. +TEST_F(AIOTest, Mremap) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + const size_t ctx_size = + ASSERT_NO_ERRNO_AND_VALUE(VmaSizeAt(reinterpret_cast<uintptr_t>(ctx_))); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Reserve address space for the mremap target so we have something safe to + // map over. + Mapping dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(ctx_size, PROT_READ, MAP_PRIVATE)); + + // Remap context 'handle' to a different address. + ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(), + MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()), + IsPosixErrorOkAndHolds(dst.ptr())); + aio_context_t old_ctx = ctx_; + ctx_ = reinterpret_cast<aio_context_t>(dst.addr()); + // io_destroy() will unmap dst now. + dst.release(); + + // Check that submitting the request with the old 'ctx_' fails. + ASSERT_THAT(SubmitCtx(old_ctx, 1, cbs), SyscallFailsWithErrno(EINVAL)); + + // Submit the request with the new 'ctx_'. + ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); + + // Remap again. + dst = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(ctx_size, PROT_READ, MAP_PRIVATE)); + ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(), + MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()), + IsPosixErrorOkAndHolds(dst.ptr())); + ctx_ = reinterpret_cast<aio_context_t>(dst.addr()); + dst.release(); + + // Get the reply with yet another 'ctx_' and verify it. + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); + EXPECT_EQ(events[0].data, 0x123); + EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb)); + EXPECT_EQ(events[0].res, strlen(kData)); + + // Verify that the file contains the contents. + char verify_buf[sizeof(kData)] = {}; + ASSERT_THAT(read(test_file_fd_.get(), verify_buf, sizeof(kData)), + SyscallSucceedsWithValue(strlen(kData))); + EXPECT_STREQ(verify_buf, kData); +} + +// Tests that AIO context cannot be expanded with mremap. +TEST_F(AIOTest, MremapExpansion) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + const size_t ctx_size = + ASSERT_NO_ERRNO_AND_VALUE(VmaSizeAt(reinterpret_cast<uintptr_t>(ctx_))); + + // Reserve address space for the mremap target so we have something safe to + // map over. + Mapping dst = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(ctx_size + kPageSize, PROT_NONE, MAP_PRIVATE)); + + // Test that remapping to a larger address range fails. + ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(), + MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()), + PosixErrorIs(EFAULT, _)); + + // mm/mremap.c:sys_mremap() => mremap_to() does do_munmap() of the destination + // before it hits the VM_DONTEXPAND check in vma_to_resize(), so we should no + // longer munmap it (another thread may have created a mapping there). + dst.release(); +} + +// Tests that AIO calls fail if context's address is inaccessible. +TEST_F(AIOTest, Mprotect) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); + + // Makes the context 'handle' inaccessible and check that all subsequent + // calls fail. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(ctx_), kPageSize, PROT_NONE), + SyscallSucceeds()); + struct io_event events[1]; + EXPECT_THAT(GetEvents(1, 1, events, nullptr), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(DestroyContext(), SyscallFailsWithErrno(EINVAL)); + + // Prevent TearDown from attempting to destroy the context and fail. + ctx_ = 0; +} + +TEST_F(AIOTest, Timeout) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct timespec timeout; + timeout.tv_sec = 0; + timeout.tv_nsec = 10; + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, &timeout), SyscallSucceedsWithValue(0)); +} + +class AIOReadWriteParamTest : public AIOTest, + public ::testing::WithParamInterface<int> {}; + +TEST_P(AIOReadWriteParamTest, BadOffset) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Create a buffer that we can write to. + char buf[] = "hello world!"; + cb.aio_buf = reinterpret_cast<uint64_t>(buf); + + // Set the operation on the callback and give a negative offset. + const int opcode = GetParam(); + cb.aio_lio_opcode = opcode; + + iovec iov = {}; + if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) { + // Create a valid iovec and set it in the callback. + iov.iov_base = reinterpret_cast<void*>(buf); + iov.iov_len = 1; + cb.aio_buf = reinterpret_cast<uint64_t>(&iov); + // aio_nbytes is the number of iovecs. + cb.aio_nbytes = 1; + } + + // Pass a negative offset. + cb.aio_offset = -1; + + // Should get error on submission. + ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL)); +} + +INSTANTIATE_TEST_SUITE_P(BadOffset, AIOReadWriteParamTest, + ::testing::Values(IOCB_CMD_PREAD, IOCB_CMD_PWRITE, + IOCB_CMD_PREADV, IOCB_CMD_PWRITEV)); + +class AIOVectorizedParamTest : public AIOTest, + public ::testing::WithParamInterface<int> {}; + +TEST_P(AIOVectorizedParamTest, BadIOVecs) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Modify the callback to use the operation from the param. + cb.aio_lio_opcode = GetParam(); + + // Create an iovec with address in kernel range, and pass that as the buffer. + iovec iov = {}; + iov.iov_base = reinterpret_cast<void*>(0xFFFFFFFF00000000); + iov.iov_len = 1; + cb.aio_buf = reinterpret_cast<uint64_t>(&iov); + // aio_nbytes is the number of iovecs. + cb.aio_nbytes = 1; + + // Should get error on submission. + ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EFAULT)); +} + +INSTANTIATE_TEST_SUITE_P(BadIOVecs, AIOVectorizedParamTest, + ::testing::Values(IOCB_CMD_PREADV, IOCB_CMD_PWRITEV)); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/alarm.cc b/test/syscalls/linux/alarm.cc new file mode 100644 index 000000000..940c97285 --- /dev/null +++ b/test/syscalls/linux/alarm.cc @@ -0,0 +1,192 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// N.B. Below, main blocks SIGALRM. Test cases must unblock it if they want +// delivery. + +void do_nothing_handler(int sig, siginfo_t* siginfo, void* arg) {} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and read. +TEST(AlarmTest, Interrupt_NoRandomSave) { + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + FileDescriptor read_fd(pipe_fds[0]); + FileDescriptor write_fd(pipe_fds[1]); + + // Use a signal handler that interrupts but does nothing rather than using the + // default terminate action. + struct sigaction sa; + sa.sa_sigaction = do_nothing_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well after read blocks below. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + + char buf; + ASSERT_THAT(read(read_fd.get(), &buf, 1), SyscallFailsWithErrno(EINTR)); +} + +/* Count of the number of SIGALARMS handled. */ +static volatile int alarms_received = 0; + +void inc_alarms_handler(int sig, siginfo_t* siginfo, void* arg) { + alarms_received++; +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and read. +TEST(AlarmTest, Restart_NoRandomSave) { + alarms_received = 0; + + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + FileDescriptor read_fd(pipe_fds[0]); + // Write end closed by thread below. + + struct sigaction sa; + sa.sa_sigaction = inc_alarms_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Spawn a thread to eventually unblock the read below. + ScopedThread t([pipe_fds] { + absl::SleepFor(absl::Seconds(30)); + EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); + }); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well after read blocks below, but + // before it returns. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + + // Read and eventually get an EOF from the writer closing. If SA_RESTART + // didn't work, then the alarm would not have fired and we wouldn't increment + // our alarms_received count in our signal handler, or we would have not + // restarted the syscall gracefully, which we expect below in order to be + // able to get the final EOF on the pipe. + char buf; + ASSERT_THAT(read(read_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_EQ(alarms_received, 1); + + t.Join(); +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and pause. +TEST(AlarmTest, SaSiginfo_NoRandomSave) { + // Use a signal handler that interrupts but does nothing rather than using the + // default terminate action. + struct sigaction sa; + sa.sa_sigaction = do_nothing_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well after pause blocks below. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + ASSERT_THAT(pause(), SyscallFailsWithErrno(EINTR)); +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and pause. +TEST(AlarmTest, SaInterrupt_NoRandomSave) { + // Use a signal handler that interrupts but does nothing rather than using the + // default terminate action. + struct sigaction sa; + sa.sa_sigaction = do_nothing_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_INTERRUPT; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well after pause blocks below. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + ASSERT_THAT(pause(), SyscallFailsWithErrno(EINTR)); +} + +TEST(AlarmTest, UserModeSpinning) { + alarms_received = 0; + + struct sigaction sa = {}; + sa.sa_sigaction = inc_alarms_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well into the loop below. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + // Make sure that the signal gets delivered even if we are spinning in user + // mode when it arrives. + while (!alarms_received) { + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // These tests depend on delivering SIGALRM to the main thread. Block SIGALRM + // so that any other threads created by TestInit will also have SIGALRM + // blocked. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGALRM); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/arch_prctl.cc b/test/syscalls/linux/arch_prctl.cc new file mode 100644 index 000000000..81bf5a775 --- /dev/null +++ b/test/syscalls/linux/arch_prctl.cc @@ -0,0 +1,48 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <asm/prctl.h> +#include <sys/prctl.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +// glibc does not provide a prototype for arch_prctl() so declare it here. +extern "C" int arch_prctl(int code, uintptr_t addr); + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ArchPrctlTest, GetSetFS) { + uintptr_t orig; + const uintptr_t kNonCanonicalFsbase = 0x4141414142424242; + + // Get the original FS.base and then set it to the same value (this is + // intentional because FS.base is the TLS pointer so we cannot change it + // arbitrarily). + ASSERT_THAT(arch_prctl(ARCH_GET_FS, reinterpret_cast<uintptr_t>(&orig)), + SyscallSucceeds()); + ASSERT_THAT(arch_prctl(ARCH_SET_FS, orig), SyscallSucceeds()); + + // Trying to set FS.base to a non-canonical value should return an error. + ASSERT_THAT(arch_prctl(ARCH_SET_FS, kNonCanonicalFsbase), + SyscallFailsWithErrno(EPERM)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/bad.cc b/test/syscalls/linux/bad.cc new file mode 100644 index 000000000..a26fc6af3 --- /dev/null +++ b/test/syscalls/linux/bad.cc @@ -0,0 +1,45 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { +#ifdef __x86_64__ +// get_kernel_syms is not supported in Linux > 2.6, and not implemented in +// gVisor. +constexpr uint32_t kNotImplementedSyscall = SYS_get_kernel_syms; +#elif __aarch64__ +// Use the last of arch_specific_syscalls which are not implemented on arm64. +constexpr uint32_t kNotImplementedSyscall = __NR_arch_specific_syscall + 15; +#endif + +TEST(BadSyscallTest, NotImplemented) { + EXPECT_THAT(syscall(kNotImplementedSyscall), SyscallFailsWithErrno(ENOSYS)); +} + +TEST(BadSyscallTest, NegativeOne) { + EXPECT_THAT(syscall(-1), SyscallFailsWithErrno(ENOSYS)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/base_poll_test.cc b/test/syscalls/linux/base_poll_test.cc new file mode 100644 index 000000000..ab7a19dd0 --- /dev/null +++ b/test/syscalls/linux/base_poll_test.cc @@ -0,0 +1,65 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/base_poll_test.h" + +#include <sys/syscall.h> +#include <sys/types.h> +#include <syscall.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +static volatile int timer_fired = 0; +static void SigAlarmHandler(int, siginfo_t*, void*) { timer_fired = 1; } + +BasePollTest::BasePollTest() { + // Register our SIGALRM handler, but save the original so we can restore in + // the destructor. + struct sigaction sa = {}; + sa.sa_sigaction = SigAlarmHandler; + sigfillset(&sa.sa_mask); + TEST_PCHECK(sigaction(SIGALRM, &sa, &original_alarm_sa_) == 0); +} + +BasePollTest::~BasePollTest() { + ClearTimer(); + TEST_PCHECK(sigaction(SIGALRM, &original_alarm_sa_, nullptr) == 0); +} + +void BasePollTest::SetTimer(absl::Duration duration) { + pid_t tgid = getpid(); + pid_t tid = gettid(); + ClearTimer(); + + // Create a new timer thread. + timer_ = absl::make_unique<TimerThread>(absl::Now() + duration, tgid, tid); +} + +bool BasePollTest::TimerFired() const { return timer_fired; } + +void BasePollTest::ClearTimer() { + timer_.reset(); + timer_fired = 0; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/base_poll_test.h b/test/syscalls/linux/base_poll_test.h new file mode 100644 index 000000000..0d4a6701e --- /dev/null +++ b/test/syscalls/linux/base_poll_test.h @@ -0,0 +1,101 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_ +#define GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_ + +#include <signal.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <syscall.h> +#include <time.h> +#include <unistd.h> + +#include <memory> + +#include "gtest/gtest.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +// TimerThread is a cancelable timer. +class TimerThread { + public: + TimerThread(absl::Time deadline, pid_t tgid, pid_t tid) + : thread_([=] { + mu_.Lock(); + mu_.AwaitWithDeadline(absl::Condition(&cancel_), deadline); + if (!cancel_) { + TEST_PCHECK(tgkill(tgid, tid, SIGALRM) == 0); + } + mu_.Unlock(); + }) {} + + ~TimerThread() { Cancel(); } + + void Cancel() { + absl::MutexLock ml(&mu_); + cancel_ = true; + } + + private: + mutable absl::Mutex mu_; + bool cancel_ ABSL_GUARDED_BY(mu_) = false; + + // Must be last to ensure that the destructor for the thread is run before + // any other member of the object is destroyed. + ScopedThread thread_; +}; + +// Base test fixture for poll, select, ppoll, and pselect tests. +// +// This fixture makes use of SIGALRM. The handler is saved in SetUp() and +// restored in TearDown(). +class BasePollTest : public ::testing::Test { + protected: + BasePollTest(); + ~BasePollTest() override; + + // Sets a timer that will send a signal to the calling thread after + // `duration`. + void SetTimer(absl::Duration duration); + + // Returns true if the timer has fired. + bool TimerFired() const; + + // Stops the pending timer (if any) and clear the "fired" state. + void ClearTimer(); + + private: + // Thread that implements the timer. If the timer is stopped, timer_ is null. + // + // We have to use a thread for this purpose because tests using this fixture + // expect to be interrupted by the timer signal, but itimers/alarm(2) send + // thread-group-directed signals, which may be handled by any thread in the + // test process. + std::unique_ptr<TimerThread> timer_; + + // The original SIGALRM handler, to restore in destructor. + struct sigaction original_alarm_sa_; +}; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_ diff --git a/test/syscalls/linux/bind.cc b/test/syscalls/linux/bind.cc new file mode 100644 index 000000000..9547c4ab2 --- /dev/null +++ b/test/syscalls/linux/bind.cc @@ -0,0 +1,145 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(AllSocketPairTest, Bind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, BindTooLong) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + // first_addr is a sockaddr_storage being used as a sockaddr_un. Use the full + // length which is longer than expected for a Unix socket. + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sizeof(sockaddr_storage)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, DoubleBindSocket) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + EXPECT_THAT( + bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + // Linux 4.09 returns EINVAL here, but some time before 4.19 it switched + // to EADDRINUSE. + AnyOf(SyscallFailsWithErrno(EADDRINUSE), SyscallFailsWithErrno(EINVAL))); +} + +TEST_P(AllSocketPairTest, GetLocalAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + socklen_t addressLength = sockets->first_addr_size(); + struct sockaddr_storage address = {}; + ASSERT_THAT(getsockname(sockets->first_fd(), (struct sockaddr*)(&address), + &addressLength), + SyscallSucceeds()); + EXPECT_EQ( + 0, memcmp(&address, sockets->first_addr(), sockets->first_addr_size())); +} + +TEST_P(AllSocketPairTest, GetLocalAddrWithoutBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + socklen_t addressLength = sockets->first_addr_size(); + struct sockaddr_storage received_address = {}; + ASSERT_THAT( + getsockname(sockets->first_fd(), (struct sockaddr*)(&received_address), + &addressLength), + SyscallSucceeds()); + struct sockaddr_storage want_address = {}; + want_address.ss_family = sockets->first_addr()->sa_family; + EXPECT_EQ(0, memcmp(&received_address, &want_address, addressLength)); +} + +TEST_P(AllSocketPairTest, GetRemoteAddressWithoutConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + socklen_t addressLength = sockets->first_addr_size(); + struct sockaddr_storage address = {}; + ASSERT_THAT(getpeername(sockets->second_fd(), (struct sockaddr*)(&address), + &addressLength), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(AllSocketPairTest, DoubleBindAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + EXPECT_THAT(bind(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(AllSocketPairTest, Unbind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + + // Filesystem Unix sockets do not release their address when closed. + if (sockets->first_addr()->sa_data[0] != 0) { + ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EADDRINUSE)); + return; + } + + ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, + SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, + SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/brk.cc b/test/syscalls/linux/brk.cc new file mode 100644 index 000000000..a03a44465 --- /dev/null +++ b/test/syscalls/linux/brk.cc @@ -0,0 +1,31 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdint.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST(BrkTest, BrkSyscallReturnsOldBrkOnFailure) { + auto old_brk = sbrk(0); + EXPECT_THAT(syscall(SYS_brk, reinterpret_cast<void*>(-1)), + SyscallSucceedsWithValue(reinterpret_cast<uintptr_t>(old_brk))); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/chdir.cc b/test/syscalls/linux/chdir.cc new file mode 100644 index 000000000..3182c228b --- /dev/null +++ b/test/syscalls/linux/chdir.cc @@ -0,0 +1,64 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <linux/limits.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ChdirTest, Success) { + auto old_dir = GetAbsoluteTestTmpdir(); + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chdir(temp_dir.path().c_str()), SyscallSucceeds()); + // Temp path destructor deletes the newly created tmp dir and Sentry rejects + // saving when its current dir is still pointing to the path. Switch to a + // permanent path here. + EXPECT_THAT(chdir(old_dir.c_str()), SyscallSucceeds()); +} + +TEST(ChdirTest, PermissionDenied) { + // Drop capabilities that allow us to override directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */)); + EXPECT_THAT(chdir(temp_dir.path().c_str()), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChdirTest, NotDir) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + EXPECT_THAT(chdir(temp_file.path().c_str()), SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(ChdirTest, NotExist) { + EXPECT_THAT(chdir("/foo/bar"), SyscallFailsWithErrno(ENOENT)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/chmod.cc b/test/syscalls/linux/chmod.cc new file mode 100644 index 000000000..a06b5cfd6 --- /dev/null +++ b/test/syscalls/linux/chmod.cc @@ -0,0 +1,264 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ChmodTest, ChmodFileSucceeds) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + ASSERT_THAT(chmod(file.path().c_str(), 0466), SyscallSucceeds()); + EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, ChmodDirSucceeds) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string fileInDir = NewTempAbsPathInDir(dir.path()); + + ASSERT_THAT(chmod(dir.path().c_str(), 0466), SyscallSucceeds()); + EXPECT_THAT(open(fileInDir.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodFileSucceeds_NoRandomSave) { + // Drop capabilities that allow us to file directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666)); + int fd; + ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds()); + + { + const DisableSave ds; // File permissions are reduced. + ASSERT_THAT(fchmod(fd, 0444), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + } + + EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodDirSucceeds_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + int fd; + ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + + { + const DisableSave ds; // File permissions are reduced. + ASSERT_THAT(fchmod(fd, 0), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + } + + EXPECT_THAT(open(dir.path().c_str(), O_RDONLY), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodBadF) { + ASSERT_THAT(fchmod(-1, 0444), SyscallFailsWithErrno(EBADF)); +} + +TEST(ChmodTest, FchmodatBadF) { + ASSERT_THAT(fchmodat(-1, "foo", 0444, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST(ChmodTest, FchmodatNotDir) { + ASSERT_THAT(fchmodat(-1, "", 0444, 0), SyscallFailsWithErrno(ENOENT)); +} + +TEST(ChmodTest, FchmodatFileAbsolutePath) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + ASSERT_THAT(fchmodat(-1, file.path().c_str(), 0444, 0), SyscallSucceeds()); + EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodatDirAbsolutePath) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + int fd; + ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_THAT(fchmodat(-1, dir.path().c_str(), 0, 0), SyscallSucceeds()); + EXPECT_THAT(open(dir.path().c_str(), O_RDONLY), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodatFile) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + int parent_fd; + ASSERT_THAT( + parent_fd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + + ASSERT_THAT( + fchmodat(parent_fd, std::string(Basename(temp_file.path())).c_str(), 0444, + 0), + SyscallSucceeds()); + EXPECT_THAT(close(parent_fd), SyscallSucceeds()); + + EXPECT_THAT(open(temp_file.path().c_str(), O_RDWR), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodatDir) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + int parent_fd; + ASSERT_THAT( + parent_fd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + + int fd; + ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_THAT( + fchmodat(parent_fd, std::string(Basename(dir.path())).c_str(), 0, 0), + SyscallSucceeds()); + EXPECT_THAT(close(parent_fd), SyscallSucceeds()); + + EXPECT_THAT(open(dir.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, ChmodDowngradeWritability_NoRandomSave) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666)); + + int fd; + ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds()); + + const DisableSave ds; // Permissions are dropped. + ASSERT_THAT(chmod(file.path().c_str(), 0444), SyscallSucceeds()); + EXPECT_THAT(write(fd, "hello", 5), SyscallSucceedsWithValue(5)); + + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(ChmodTest, ChmodFileToNoPermissionsSucceeds) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666)); + + ASSERT_THAT(chmod(file.path().c_str(), 0), SyscallSucceeds()); + + EXPECT_THAT(open(file.path().c_str(), O_RDONLY), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodDowngradeWritability_NoRandomSave) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + int fd; + ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + + const DisableSave ds; // Permissions are dropped. + ASSERT_THAT(fchmod(fd, 0444), SyscallSucceeds()); + EXPECT_THAT(write(fd, "hello", 5), SyscallSucceedsWithValue(5)); + + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(ChmodTest, FchmodFileToNoPermissionsSucceeds_NoRandomSave) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666)); + + int fd; + ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds()); + + { + const DisableSave ds; // Permissions are dropped. + ASSERT_THAT(fchmod(fd, 0), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + } + + EXPECT_THAT(open(file.path().c_str(), O_RDONLY), + SyscallFailsWithErrno(EACCES)); +} + +// Verify that we can get a RW FD after chmod, even if a RO fd is left open. +TEST(ChmodTest, ChmodWritableWithOpenFD) { + // FIXME(b/72455313): broken on hostfs. + if (IsRunningOnGvisor()) { + return; + } + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0444)); + + FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + ASSERT_THAT(fchmod(fd1.get(), 0644), SyscallSucceeds()); + + // This FD is writable, even though fd1 has a read-only reference to the file. + FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + // fd1 is not writable, but fd2 is. + char c = 'a'; + EXPECT_THAT(WriteFd(fd1.get(), &c, 1), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(WriteFd(fd2.get(), &c, 1), SyscallSucceedsWithValue(1)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/chown.cc b/test/syscalls/linux/chown.cc new file mode 100644 index 000000000..7a28b674d --- /dev/null +++ b/test/syscalls/linux/chown.cc @@ -0,0 +1,206 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <grp.h> +#include <sys/types.h> +#include <unistd.h> + +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/synchronization/notification.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +ABSL_FLAG(int32_t, scratch_uid1, 65534, "first scratch UID"); +ABSL_FLAG(int32_t, scratch_uid2, 65533, "second scratch UID"); +ABSL_FLAG(int32_t, scratch_gid, 65534, "first scratch GID"); + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ChownTest, FchownBadF) { + ASSERT_THAT(fchown(-1, 0, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST(ChownTest, FchownatBadF) { + ASSERT_THAT(fchownat(-1, "fff", 0, 0, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST(ChownTest, FchownatEmptyPath) { + const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY | O_RDONLY)); + ASSERT_THAT(fchownat(fd.get(), "", 0, 0, 0), SyscallFailsWithErrno(ENOENT)); +} + +using Chown = + std::function<PosixError(const std::string&, uid_t owner, gid_t group)>; + +class ChownParamTest : public ::testing::TestWithParam<Chown> {}; + +TEST_P(ChownParamTest, ChownFileSucceeds) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_CHOWN))) { + ASSERT_NO_ERRNO(SetCapability(CAP_CHOWN, false)); + } + + const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // At least *try* setting to a group other than the EGID. + gid_t gid; + EXPECT_THAT(gid = getegid(), SyscallSucceeds()); + int num_groups; + EXPECT_THAT(num_groups = getgroups(0, nullptr), SyscallSucceeds()); + if (num_groups > 0) { + std::vector<gid_t> list(num_groups); + EXPECT_THAT(getgroups(list.size(), list.data()), SyscallSucceeds()); + gid = list[0]; + } + + EXPECT_NO_ERRNO(GetParam()(file.path(), geteuid(), gid)); + + struct stat s = {}; + ASSERT_THAT(stat(file.path().c_str(), &s), SyscallSucceeds()); + EXPECT_EQ(s.st_uid, geteuid()); + EXPECT_EQ(s.st_gid, gid); +} + +TEST_P(ChownParamTest, ChownFilePermissionDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0777)); + + // Drop privileges and change IDs only in child thread, or else this parent + // thread won't be able to open some log files after the test ends. + ScopedThread([&] { + // Drop privileges. + if (HaveCapability(CAP_CHOWN).ValueOrDie()) { + EXPECT_NO_ERRNO(SetCapability(CAP_CHOWN, false)); + } + + // Change EUID and EGID. + // + // See note about POSIX below. + EXPECT_THAT( + syscall(SYS_setresgid, -1, absl::GetFlag(FLAGS_scratch_gid), -1), + SyscallSucceeds()); + EXPECT_THAT( + syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid1), -1), + SyscallSucceeds()); + + EXPECT_THAT(GetParam()(file.path(), geteuid(), getegid()), + PosixErrorIs(EPERM, ::testing::ContainsRegex("chown"))); + }); +} + +TEST_P(ChownParamTest, ChownFileSucceedsAsRoot) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_CHOWN)))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_SETUID)))); + + const std::string filename = NewTempAbsPath(); + + absl::Notification fileCreated, fileChowned; + // Change UID only in child thread, or else this parent thread won't be able + // to open some log files after the test ends. + ScopedThread t([&] { + // POSIX requires that all threads in a process share the same UIDs, so + // the NPTL setresuid wrappers use signals to make all threads execute the + // setresuid syscall. However, we want this thread to have its own set of + // credentials different from the parent process, so we use the raw + // syscall. + EXPECT_THAT( + syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid2), -1), + SyscallSucceeds()); + + // Create file and immediately close it. + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0644)); + fd.reset(); // Close the fd. + + fileCreated.Notify(); + fileChowned.WaitForNotification(); + + EXPECT_THAT(open(filename.c_str(), O_RDWR), SyscallFailsWithErrno(EACCES)); + FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDONLY)); + }); + + fileCreated.WaitForNotification(); + + // Set file's owners to someone different. + EXPECT_NO_ERRNO(GetParam()(filename, absl::GetFlag(FLAGS_scratch_uid1), + absl::GetFlag(FLAGS_scratch_gid))); + + struct stat s; + EXPECT_THAT(stat(filename.c_str(), &s), SyscallSucceeds()); + EXPECT_EQ(s.st_uid, absl::GetFlag(FLAGS_scratch_uid1)); + EXPECT_EQ(s.st_gid, absl::GetFlag(FLAGS_scratch_gid)); + + fileChowned.Notify(); +} + +PosixError errorFromReturn(const std::string& name, int ret) { + if (ret == -1) { + return PosixError(errno, absl::StrCat(name, " failed")); + } + return NoError(); +} + +INSTANTIATE_TEST_SUITE_P( + ChownKinds, ChownParamTest, + ::testing::Values( + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + int rc = chown(path.c_str(), owner, group); + MaybeSave(); + return errorFromReturn("chown", rc); + }, + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + int rc = lchown(path.c_str(), owner, group); + MaybeSave(); + return errorFromReturn("lchown", rc); + }, + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, O_RDWR)); + int rc = fchown(fd.get(), owner, group); + MaybeSave(); + return errorFromReturn("fchown", rc); + }, + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, O_RDWR)); + int rc = fchownat(fd.get(), "", owner, group, AT_EMPTY_PATH); + MaybeSave(); + return errorFromReturn("fchownat-fd", rc); + }, + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + ASSIGN_OR_RETURN_ERRNO(auto dirfd, Open(std::string(Dirname(path)), + O_DIRECTORY | O_RDONLY)); + int rc = fchownat(dirfd.get(), std::string(Basename(path)).c_str(), + owner, group, 0); + MaybeSave(); + return errorFromReturn("fchownat-dirfd", rc); + })); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc new file mode 100644 index 000000000..85ec013d5 --- /dev/null +++ b/test/syscalls/linux/chroot.cc @@ -0,0 +1,366 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <syscall.h> +#include <unistd.h> + +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/mount_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::HasSubstr; +using ::testing::Not; + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ChrootTest, Success) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds()); +} + +TEST(ChrootTest, PermissionDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + // CAP_DAC_READ_SEARCH and CAP_DAC_OVERRIDE may override Execute permission on + // directories. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */)); + EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChrootTest, NotDir) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + EXPECT_THAT(chroot(temp_file.path().c_str()), SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(ChrootTest, NotExist) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + EXPECT_THAT(chroot("/foo/bar"), SyscallFailsWithErrno(ENOENT)); +} + +TEST(ChrootTest, WithoutCapability) { + // Unset CAP_SYS_CHROOT. + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_CHROOT, false)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallFailsWithErrno(EPERM)); +} + +TEST(ChrootTest, CreatesNewRoot) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + // Grab the initial cwd. + char initial_cwd[1024]; + ASSERT_THAT(syscall(__NR_getcwd, initial_cwd, sizeof(initial_cwd)), + SyscallSucceeds()); + + auto new_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto file_in_new_root = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(new_root.path())); + + // chroot into new_root. + ASSERT_THAT(chroot(new_root.path().c_str()), SyscallSucceeds()); + + // getcwd should return "(unreachable)" followed by the initial_cwd. + char cwd[1024]; + ASSERT_THAT(syscall(__NR_getcwd, cwd, sizeof(cwd)), SyscallSucceeds()); + std::string expected_cwd = "(unreachable)"; + expected_cwd += initial_cwd; + EXPECT_STREQ(cwd, expected_cwd.c_str()); + + // Should not be able to stat file by its full path. + struct stat statbuf; + EXPECT_THAT(stat(file_in_new_root.path().c_str(), &statbuf), + SyscallFailsWithErrno(ENOENT)); + + // Should be able to stat file at new rooted path. + auto basename = std::string(Basename(file_in_new_root.path())); + auto rootedFile = "/" + basename; + ASSERT_THAT(stat(rootedFile.c_str(), &statbuf), SyscallSucceeds()); + + // Should be able to stat cwd at '.' even though it's outside root. + ASSERT_THAT(stat(".", &statbuf), SyscallSucceeds()); + + // chdir into new root. + ASSERT_THAT(chdir("/"), SyscallSucceeds()); + + // getcwd should return "/". + EXPECT_THAT(syscall(__NR_getcwd, cwd, sizeof(cwd)), SyscallSucceeds()); + EXPECT_STREQ(cwd, "/"); + + // Statting '.', '..', '/', and '/..' all return the same dev and inode. + struct stat statbuf_dot; + ASSERT_THAT(stat(".", &statbuf_dot), SyscallSucceeds()); + struct stat statbuf_dotdot; + ASSERT_THAT(stat("..", &statbuf_dotdot), SyscallSucceeds()); + EXPECT_EQ(statbuf_dot.st_dev, statbuf_dotdot.st_dev); + EXPECT_EQ(statbuf_dot.st_ino, statbuf_dotdot.st_ino); + struct stat statbuf_slash; + ASSERT_THAT(stat("/", &statbuf_slash), SyscallSucceeds()); + EXPECT_EQ(statbuf_dot.st_dev, statbuf_slash.st_dev); + EXPECT_EQ(statbuf_dot.st_ino, statbuf_slash.st_ino); + struct stat statbuf_slashdotdot; + ASSERT_THAT(stat("/..", &statbuf_slashdotdot), SyscallSucceeds()); + EXPECT_EQ(statbuf_dot.st_dev, statbuf_slashdotdot.st_dev); + EXPECT_EQ(statbuf_dot.st_ino, statbuf_slashdotdot.st_ino); +} + +TEST(ChrootTest, DotDotFromOpenFD) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + auto dir_outside_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(dir_outside_root.path(), O_RDONLY | O_DIRECTORY)); + auto new_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // chroot into new_root. + ASSERT_THAT(chroot(new_root.path().c_str()), SyscallSucceeds()); + + // openat on fd with path .. will succeed. + int other_fd; + ASSERT_THAT(other_fd = openat(fd.get(), "..", O_RDONLY), SyscallSucceeds()); + EXPECT_THAT(close(other_fd), SyscallSucceeds()); + + // getdents on fd should not error. + char buf[1024]; + ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)), + SyscallSucceeds()); +} + +// Test that link resolution in a chroot can escape the root by following an +// open proc fd. Regression test for b/32316719. +TEST(ChrootTest, ProcFdLinkResolutionInChroot) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + const TempPath file_outside_chroot = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file_outside_chroot.path(), O_RDONLY)); + + const FileDescriptor proc_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open("/proc", O_DIRECTORY | O_RDONLY | O_CLOEXEC)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds()); + + // Opening relative to an already open fd to a node outside the chroot works. + const FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE( + OpenAt(proc_fd.get(), "self/fd", O_DIRECTORY | O_RDONLY | O_CLOEXEC)); + + // Proc fd symlinks can escape the chroot if the fd the symlink refers to + // refers to an object outside the chroot. + struct stat s = {}; + EXPECT_THAT( + fstatat(proc_self_fd.get(), absl::StrCat(fd.get()).c_str(), &s, 0), + SyscallSucceeds()); + + // Try to stat the stdin fd. Internally, this is handled differently from a + // proc fd entry pointing to a file, since stdin is backed by a host fd, and + // isn't a walkable path on the filesystem inside the sandbox. + EXPECT_THAT(fstatat(proc_self_fd.get(), "0", &s, 0), SyscallSucceeds()); +} + +// This test will verify that when you hold a fd to proc before entering +// a chroot that any files inside the chroot will appear rooted to the +// base chroot when examining /proc/self/fd/{num}. +TEST(ChrootTest, ProcMemSelfFdsNoEscapeProcOpen) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + // Get a FD to /proc before we enter the chroot. + const FileDescriptor proc = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY)); + + // Create and enter a chroot directory. + const auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds()); + + // Open a file inside the chroot at /foo. + const FileDescriptor foo = + ASSERT_NO_ERRNO_AND_VALUE(Open("/foo", O_CREAT | O_RDONLY, 0644)); + + // Examine /proc/self/fd/{foo_fd} to see if it exposes the fact that we're + // inside a chroot, the path should be /foo and NOT {chroot_dir}/foo. + const std::string fd_path = absl::StrCat("self/fd/", foo.get()); + char buf[1024] = {}; + size_t bytes_read = 0; + ASSERT_THAT(bytes_read = + readlinkat(proc.get(), fd_path.c_str(), buf, sizeof(buf) - 1), + SyscallSucceeds()); + + // The link should resolve to something. + ASSERT_GT(bytes_read, 0); + + // Assert that the link doesn't contain the chroot path and is only /foo. + EXPECT_STREQ(buf, "/foo"); +} + +// This test will verify that a file inside a chroot when mmapped will not +// expose the full file path via /proc/self/maps and instead honor the chroot. +TEST(ChrootTest, ProcMemSelfMapsNoEscapeProcOpen) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + // Get a FD to /proc before we enter the chroot. + const FileDescriptor proc = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY)); + + // Create and enter a chroot directory. + const auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds()); + + // Open a file inside the chroot at /foo. + const FileDescriptor foo = + ASSERT_NO_ERRNO_AND_VALUE(Open("/foo", O_CREAT | O_RDONLY, 0644)); + + // Mmap the newly created file. + void* foo_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + foo.get(), 0); + ASSERT_THAT(reinterpret_cast<int64_t>(foo_map), SyscallSucceeds()); + + // Always unmap. + auto cleanup_map = Cleanup( + [&] { EXPECT_THAT(munmap(foo_map, kPageSize), SyscallSucceeds()); }); + + // Examine /proc/self/maps to be sure that /foo doesn't appear to be + // mapped with the full chroot path. + const FileDescriptor maps = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), "self/maps", O_RDONLY)); + + size_t bytes_read = 0; + char buf[8 * 1024] = {}; + ASSERT_THAT(bytes_read = ReadFd(maps.get(), buf, sizeof(buf)), + SyscallSucceeds()); + + // The maps file should have something. + ASSERT_GT(bytes_read, 0); + + // Finally we want to make sure the maps don't contain the chroot path + ASSERT_EQ(std::string(buf, bytes_read).find(temp_dir.path()), + std::string::npos); +} + +// Test that mounts outside the chroot will not appear in /proc/self/mounts or +// /proc/self/mountinfo. +TEST(ChrootTest, ProcMountsMountinfoNoEscape) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + // We are going to create some mounts and then chroot. In order to be able to + // unmount the mounts after the test run, we must chdir to the root and use + // relative paths for all mounts. That way, as long as we never chdir into + // the new root, we can access the mounts via relative paths and unmount them. + ASSERT_THAT(chdir("/"), SyscallSucceeds()); + + // Create nested tmpfs mounts. Note the use of relative paths in Mount calls. + auto const outer_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const outer_mount = ASSERT_NO_ERRNO_AND_VALUE(Mount( + "none", JoinPath(".", outer_dir.path()), "tmpfs", 0, "mode=0700", 0)); + + auto const inner_dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(outer_dir.path())); + auto const inner_mount = ASSERT_NO_ERRNO_AND_VALUE(Mount( + "none", JoinPath(".", inner_dir.path()), "tmpfs", 0, "mode=0700", 0)); + + // Filenames that will be checked for mounts, all relative to /proc dir. + std::string paths[3] = {"mounts", "self/mounts", "self/mountinfo"}; + + for (const std::string& path : paths) { + // We should have both inner and outer mounts. + const std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContents(JoinPath("/proc", path))); + EXPECT_THAT(contents, AllOf(HasSubstr(outer_dir.path()), + HasSubstr(inner_dir.path()))); + // We better have at least two mounts: the mounts we created plus the root. + std::vector<absl::string_view> submounts = + absl::StrSplit(contents, '\n', absl::SkipWhitespace()); + EXPECT_GT(submounts.size(), 2); + } + + // Get a FD to /proc before we enter the chroot. + const FileDescriptor proc = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY)); + + // Chroot to outer mount. + ASSERT_THAT(chroot(outer_dir.path().c_str()), SyscallSucceeds()); + + for (const std::string& path : paths) { + const FileDescriptor proc_file = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), path, O_RDONLY)); + + // Only two mounts visible from this chroot: the inner and outer. Both + // paths should be relative to the new chroot. + const std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContentsFD(proc_file.get())); + EXPECT_THAT(contents, + AllOf(HasSubstr(absl::StrCat(Basename(inner_dir.path()))), + Not(HasSubstr(outer_dir.path())), + Not(HasSubstr(inner_dir.path())))); + std::vector<absl::string_view> submounts = + absl::StrSplit(contents, '\n', absl::SkipWhitespace()); + EXPECT_EQ(submounts.size(), 2); + } + + // Chroot to inner mount. We must use an absolute path accessible to our + // chroot. + const std::string inner_dir_basename = + absl::StrCat("/", Basename(inner_dir.path())); + ASSERT_THAT(chroot(inner_dir_basename.c_str()), SyscallSucceeds()); + + for (const std::string& path : paths) { + const FileDescriptor proc_file = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), path, O_RDONLY)); + const std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContentsFD(proc_file.get())); + + // Only the inner mount visible from this chroot. + std::vector<absl::string_view> submounts = + absl::StrSplit(contents, '\n', absl::SkipWhitespace()); + EXPECT_EQ(submounts.size(), 1); + } + + // Chroot back to ".". + ASSERT_THAT(chroot("."), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/clock_getres.cc b/test/syscalls/linux/clock_getres.cc new file mode 100644 index 000000000..c408b936c --- /dev/null +++ b/test/syscalls/linux/clock_getres.cc @@ -0,0 +1,37 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/time.h> +#include <time.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// clock_getres works regardless of whether or not a timespec is passed. +TEST(ClockGetres, Timespec) { + struct timespec ts; + EXPECT_THAT(clock_getres(CLOCK_MONOTONIC, &ts), SyscallSucceeds()); + EXPECT_THAT(clock_getres(CLOCK_MONOTONIC, nullptr), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/clock_gettime.cc b/test/syscalls/linux/clock_gettime.cc new file mode 100644 index 000000000..7f6015049 --- /dev/null +++ b/test/syscalls/linux/clock_gettime.cc @@ -0,0 +1,163 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <pthread.h> +#include <sys/time.h> + +#include <cerrno> +#include <cstdint> +#include <ctime> +#include <list> +#include <memory> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +int64_t clock_gettime_nsecs(clockid_t id) { + struct timespec ts; + TEST_PCHECK(clock_gettime(id, &ts) == 0); + return (ts.tv_sec * 1000000000 + ts.tv_nsec); +} + +// Spin on the CPU for at least ns nanoseconds, based on +// CLOCK_THREAD_CPUTIME_ID. +void spin_ns(int64_t ns) { + int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID); + int64_t end = start + ns; + + do { + constexpr int kLoopCount = 1000000; // large and arbitrary + // volatile to prevent the compiler from skipping this loop. + for (volatile int i = 0; i < kLoopCount; i++) { + } + } while (clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID) < end); +} + +// Test that CLOCK_PROCESS_CPUTIME_ID is a superset of CLOCK_THREAD_CPUTIME_ID. +TEST(ClockGettime, CputimeId) { + constexpr int kNumThreads = 13; // arbitrary + + absl::Duration spin_time = absl::Seconds(1); + + // Start off the worker threads and compute the aggregate time spent by + // the workers. Note that we test CLOCK_PROCESS_CPUTIME_ID by having the + // workers execute in parallel and verifying that CLOCK_PROCESS_CPUTIME_ID + // accumulates the runtime of all threads. + int64_t start = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID); + + // Create a kNumThreads threads. + std::list<ScopedThread> threads; + for (int i = 0; i < kNumThreads; i++) { + threads.emplace_back( + [spin_time] { spin_ns(absl::ToInt64Nanoseconds(spin_time)); }); + } + for (auto& t : threads) { + t.Join(); + } + + int64_t end = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID); + + // The aggregate time spent in the worker threads must be at least + // 'kNumThreads' times the time each thread spun. + ASSERT_GE(end - start, kNumThreads * absl::ToInt64Nanoseconds(spin_time)); +} + +TEST(ClockGettime, JavaThreadTime) { + clockid_t clockid; + ASSERT_EQ(0, pthread_getcpuclockid(pthread_self(), &clockid)); + struct timespec tp; + ASSERT_THAT(clock_getres(clockid, &tp), SyscallSucceeds()); + EXPECT_TRUE(tp.tv_sec > 0 || tp.tv_nsec > 0); + // A thread cputime is updated each 10msec and there is no approximation + // if a task is running. + do { + ASSERT_THAT(clock_gettime(clockid, &tp), SyscallSucceeds()); + } while (tp.tv_sec == 0 && tp.tv_nsec == 0); + EXPECT_TRUE(tp.tv_sec > 0 || tp.tv_nsec > 0); +} + +// There is not much to test here, since CLOCK_REALTIME may be discontiguous. +TEST(ClockGettime, RealtimeWorks) { + struct timespec tp; + EXPECT_THAT(clock_gettime(CLOCK_REALTIME, &tp), SyscallSucceeds()); +} + +class MonotonicClockTest : public ::testing::TestWithParam<clockid_t> {}; + +TEST_P(MonotonicClockTest, IsMonotonic) { + auto end = absl::Now() + absl::Seconds(5); + + struct timespec tp; + EXPECT_THAT(clock_gettime(GetParam(), &tp), SyscallSucceeds()); + + auto prev = absl::TimeFromTimespec(tp); + while (absl::Now() < end) { + EXPECT_THAT(clock_gettime(GetParam(), &tp), SyscallSucceeds()); + auto now = absl::TimeFromTimespec(tp); + EXPECT_GE(now, prev); + prev = now; + } +} + +std::string PrintClockId(::testing::TestParamInfo<clockid_t> info) { + switch (info.param) { + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_BOOTTIME: + // CLOCK_BOOTTIME is a monotonic clock. + return "CLOCK_BOOTTIME"; + default: + return absl::StrCat(info.param); + } +} + +INSTANTIATE_TEST_SUITE_P(ClockGettime, MonotonicClockTest, + ::testing::Values(CLOCK_MONOTONIC, + CLOCK_MONOTONIC_COARSE, + CLOCK_MONOTONIC_RAW, CLOCK_BOOTTIME), + PrintClockId); + +TEST(ClockGettime, UnimplementedReturnsEINVAL) { + SKIP_IF(!IsRunningOnGvisor()); + + struct timespec tp; + EXPECT_THAT(clock_gettime(CLOCK_REALTIME_ALARM, &tp), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(clock_gettime(CLOCK_BOOTTIME_ALARM, &tp), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(ClockGettime, InvalidClockIDReturnsEINVAL) { + struct timespec tp; + EXPECT_THAT(clock_gettime(-1, &tp), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/clock_nanosleep.cc b/test/syscalls/linux/clock_nanosleep.cc new file mode 100644 index 000000000..b55cddc52 --- /dev/null +++ b/test/syscalls/linux/clock_nanosleep.cc @@ -0,0 +1,179 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <time.h> + +#include <atomic> +#include <utility> + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/util/cleanup.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// sys_clock_nanosleep is defined because the glibc clock_nanosleep returns +// error numbers directly and does not set errno. This makes our Syscall +// matchers look a little weird when expecting failure: +// "SyscallSucceedsWithValue(ERRNO)". +int sys_clock_nanosleep(clockid_t clkid, int flags, + const struct timespec* request, + struct timespec* remain) { + return syscall(SYS_clock_nanosleep, clkid, flags, request, remain); +} + +PosixErrorOr<absl::Time> GetTime(clockid_t clk) { + struct timespec ts = {}; + const int rc = clock_gettime(clk, &ts); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "clock_gettime"); + } + return absl::TimeFromTimespec(ts); +} + +class WallClockNanosleepTest : public ::testing::TestWithParam<clockid_t> {}; + +TEST_P(WallClockNanosleepTest, InvalidValues) { + const struct timespec invalid[] = { + {.tv_sec = -1, .tv_nsec = -1}, {.tv_sec = 0, .tv_nsec = INT32_MIN}, + {.tv_sec = 0, .tv_nsec = INT32_MAX}, {.tv_sec = 0, .tv_nsec = -1}, + {.tv_sec = -1, .tv_nsec = 0}, + }; + + for (auto const ts : invalid) { + EXPECT_THAT(sys_clock_nanosleep(GetParam(), 0, &ts, nullptr), + SyscallFailsWithErrno(EINVAL)); + } +} + +TEST_P(WallClockNanosleepTest, SleepOneSecond) { + constexpr absl::Duration kSleepDuration = absl::Seconds(1); + struct timespec duration = absl::ToTimespec(kSleepDuration); + + const absl::Time before = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + EXPECT_THAT( + RetryEINTR(sys_clock_nanosleep)(GetParam(), 0, &duration, &duration), + SyscallSucceeds()); + const absl::Time after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + + EXPECT_GE(after - before, kSleepDuration); +} + +TEST_P(WallClockNanosleepTest, InterruptedNanosleep) { + constexpr absl::Duration kSleepDuration = absl::Seconds(60); + struct timespec duration = absl::ToTimespec(kSleepDuration); + + // Install no-op signal handler for SIGALRM. + struct sigaction sa = {}; + sigfillset(&sa.sa_mask); + sa.sa_handler = +[](int signo) {}; + const auto cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Measure time since setting the alarm, since the alarm will interrupt the + // sleep and hence determine how long we sleep. + const absl::Time before = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + + // Set an alarm to go off while sleeping. + struct itimerval timer = {}; + timer.it_value.tv_sec = 1; + timer.it_value.tv_usec = 0; + timer.it_interval.tv_sec = 1; + timer.it_interval.tv_usec = 0; + const auto cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, timer)); + + EXPECT_THAT(sys_clock_nanosleep(GetParam(), 0, &duration, &duration), + SyscallFailsWithErrno(EINTR)); + const absl::Time after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + + // Remaining time updated. + const absl::Duration remaining = absl::DurationFromTimespec(duration); + EXPECT_GE(after - before + remaining, kSleepDuration); +} + +// Remaining time is *not* updated if nanosleep completes uninterrupted. +TEST_P(WallClockNanosleepTest, UninterruptedNanosleep) { + constexpr absl::Duration kSleepDuration = absl::Milliseconds(10); + const struct timespec duration = absl::ToTimespec(kSleepDuration); + + while (true) { + constexpr int kRemainingMagic = 42; + struct timespec remaining; + remaining.tv_sec = kRemainingMagic; + remaining.tv_nsec = kRemainingMagic; + + int ret = sys_clock_nanosleep(GetParam(), 0, &duration, &remaining); + if (ret == EINTR) { + // Retry from beginning. We want a single uninterrupted call. + continue; + } + + EXPECT_THAT(ret, SyscallSucceeds()); + EXPECT_EQ(remaining.tv_sec, kRemainingMagic); + EXPECT_EQ(remaining.tv_nsec, kRemainingMagic); + break; + } +} + +TEST_P(WallClockNanosleepTest, SleepUntil) { + const absl::Time now = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + const absl::Time until = now + absl::Seconds(2); + const struct timespec ts = absl::ToTimespec(until); + + EXPECT_THAT( + RetryEINTR(sys_clock_nanosleep)(GetParam(), TIMER_ABSTIME, &ts, nullptr), + SyscallSucceeds()); + const absl::Time after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + + EXPECT_GE(after, until); +} + +INSTANTIATE_TEST_SUITE_P(Sleepers, WallClockNanosleepTest, + ::testing::Values(CLOCK_REALTIME, CLOCK_MONOTONIC)); + +TEST(ClockNanosleepProcessTest, SleepFiveSeconds) { + const absl::Duration kSleepDuration = absl::Seconds(5); + struct timespec duration = absl::ToTimespec(kSleepDuration); + + // Ensure that CLOCK_PROCESS_CPUTIME_ID advances. + std::atomic<bool> done(false); + ScopedThread t([&] { + while (!done.load()) { + } + }); + const auto cleanup_done = Cleanup([&] { done.store(true); }); + + const absl::Time before = + ASSERT_NO_ERRNO_AND_VALUE(GetTime(CLOCK_PROCESS_CPUTIME_ID)); + EXPECT_THAT(RetryEINTR(sys_clock_nanosleep)(CLOCK_PROCESS_CPUTIME_ID, 0, + &duration, &duration), + SyscallSucceeds()); + const absl::Time after = + ASSERT_NO_ERRNO_AND_VALUE(GetTime(CLOCK_PROCESS_CPUTIME_ID)); + EXPECT_GE(after - before, kSleepDuration); +} +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/concurrency.cc b/test/syscalls/linux/concurrency.cc new file mode 100644 index 000000000..7cd6a75bd --- /dev/null +++ b/test/syscalls/linux/concurrency.cc @@ -0,0 +1,127 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> + +#include <atomic> + +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/platform_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +// Test that a thread that never yields to the OS does not prevent other threads +// from running. +TEST(ConcurrencyTest, SingleProcessMultithreaded) { + std::atomic<int> a(0); + + ScopedThread t([&a]() { + while (!a.load()) { + } + }); + + absl::SleepFor(absl::Seconds(1)); + + // We are still able to execute code in this thread. The other hasn't + // permanently hung execution in both threads. + a.store(1); +} + +// Test that multiple threads in this process continue to execute in parallel, +// even if an unrelated second process is spawned. Regression test for +// b/32119508. +TEST(ConcurrencyTest, MultiProcessMultithreaded) { + // In PID 1, start TIDs 1 and 2, and put both to sleep. + // + // Start PID 3, which spins for 5 seconds, then exits. + // + // TIDs 1 and 2 wake and attempt to Activate, which cannot occur until PID 3 + // exits. + // + // Both TIDs 1 and 2 should be woken. If they are not both woken, the test + // hangs. + // + // This is all fundamentally racy. If we are failing to wake all threads, the + // expectation is that this test becomes flaky, rather than consistently + // failing. + // + // If additional background threads fail to block, we may never schedule the + // child, at which point this test effectively becomes + // MultiProcessConcurrency. That's not expected to occur. + + std::atomic<int> a(0); + ScopedThread t([&a]() { + // Block so that PID 3 can execute and we can wait on its exit. + absl::SleepFor(absl::Seconds(1)); + while (!a.load()) { + } + }); + + pid_t child_pid = fork(); + if (child_pid == 0) { + // Busy wait without making any blocking syscalls. + auto end = absl::Now() + absl::Seconds(5); + while (absl::Now() < end) { + } + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + + absl::SleepFor(absl::Seconds(1)); + + // If only TID 1 is woken, thread.Join will hang. + // If only TID 2 is woken, both will hang. + a.store(1); + t.Join(); + + int status = 0; + EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(WEXITSTATUS(status), 0); +} + +// Test that multiple processes can execute concurrently, even if one process +// never yields. +TEST(ConcurrencyTest, MultiProcessConcurrency) { + SKIP_IF(PlatformSupportMultiProcess() == PlatformSupport::NotSupported); + + pid_t child_pid = fork(); + if (child_pid == 0) { + while (true) { + } + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + + absl::SleepFor(absl::Seconds(5)); + + // We are still able to execute code in this process. The other hasn't + // permanently hung execution in both processes. + ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + int status = 0; + + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(WTERMSIG(status), SIGKILL); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/connect_external.cc b/test/syscalls/linux/connect_external.cc new file mode 100644 index 000000000..1edb50e47 --- /dev/null +++ b/test/syscalls/linux/connect_external.cc @@ -0,0 +1,163 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <string> +#include <tuple> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/test_util.h" + +// This file contains tests specific to connecting to host UDS managed outside +// the sandbox / test. +// +// A set of ultity sockets will be created externally in $TEST_UDS_TREE and +// $TEST_UDS_ATTACH_TREE for these tests to interact with. + +namespace gvisor { +namespace testing { + +namespace { + +struct ProtocolSocket { + int protocol; + std::string name; +}; + +// Parameter is (socket root dir, ProtocolSocket). +using GoferStreamSeqpacketTest = + ::testing::TestWithParam<std::tuple<std::string, ProtocolSocket>>; + +// Connect to a socket and verify that write/read work. +// +// An "echo" socket doesn't work for dgram sockets because our socket is +// unnamed. The server thus has no way to reply to us. +TEST_P(GoferStreamSeqpacketTest, Echo) { + std::string env; + ProtocolSocket proto; + std::tie(env, proto) = GetParam(); + + char* val = getenv(env.c_str()); + ASSERT_NE(val, nullptr); + std::string root(val); + + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, proto.protocol, 0)); + + std::string socket_path = JoinPath(root, proto.name, "echo"); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, socket_path.c_str(), socket_path.length()); + + ASSERT_THAT(connect(sock.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + + constexpr int kBufferSize = 64; + char send_buffer[kBufferSize]; + memset(send_buffer, 'a', sizeof(send_buffer)); + + ASSERT_THAT(WriteFd(sock.get(), send_buffer, sizeof(send_buffer)), + SyscallSucceedsWithValue(sizeof(send_buffer))); + + char recv_buffer[kBufferSize]; + ASSERT_THAT(ReadFd(sock.get(), recv_buffer, sizeof(recv_buffer)), + SyscallSucceedsWithValue(sizeof(recv_buffer))); + ASSERT_EQ(0, memcmp(send_buffer, recv_buffer, sizeof(send_buffer))); +} + +// It is not possible to connect to a bound but non-listening socket. +TEST_P(GoferStreamSeqpacketTest, NonListening) { + std::string env; + ProtocolSocket proto; + std::tie(env, proto) = GetParam(); + + char* val = getenv(env.c_str()); + ASSERT_NE(val, nullptr); + std::string root(val); + + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, proto.protocol, 0)); + + std::string socket_path = JoinPath(root, proto.name, "nonlistening"); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, socket_path.c_str(), socket_path.length()); + + ASSERT_THAT(connect(sock.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +INSTANTIATE_TEST_SUITE_P( + StreamSeqpacket, GoferStreamSeqpacketTest, + ::testing::Combine( + // Test access via standard path and attach point. + ::testing::Values("TEST_UDS_TREE", "TEST_UDS_ATTACH_TREE"), + ::testing::Values(ProtocolSocket{SOCK_STREAM, "stream"}, + ProtocolSocket{SOCK_SEQPACKET, "seqpacket"}))); + +// Parameter is socket root dir. +using GoferDgramTest = ::testing::TestWithParam<std::string>; + +// Connect to a socket and verify that write works. +// +// An "echo" socket doesn't work for dgram sockets because our socket is +// unnamed. The server thus has no way to reply to us. +TEST_P(GoferDgramTest, Null) { + std::string env = GetParam(); + char* val = getenv(env.c_str()); + ASSERT_NE(val, nullptr); + std::string root(val); + + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_DGRAM, 0)); + + std::string socket_path = JoinPath(root, "dgram/null"); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, socket_path.c_str(), socket_path.length()); + + ASSERT_THAT(connect(sock.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + + constexpr int kBufferSize = 64; + char send_buffer[kBufferSize]; + memset(send_buffer, 'a', sizeof(send_buffer)); + + ASSERT_THAT(WriteFd(sock.get(), send_buffer, sizeof(send_buffer)), + SyscallSucceedsWithValue(sizeof(send_buffer))); +} + +INSTANTIATE_TEST_SUITE_P(Dgram, GoferDgramTest, + // Test access via standard path and attach point. + ::testing::Values("TEST_UDS_TREE", + "TEST_UDS_ATTACH_TREE")); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/creat.cc b/test/syscalls/linux/creat.cc new file mode 100644 index 000000000..3c270d6da --- /dev/null +++ b/test/syscalls/linux/creat.cc @@ -0,0 +1,68 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <string> + +#include "gtest/gtest.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr int kMode = 0666; + +TEST(CreatTest, CreatCreatesNewFile) { + std::string const path = NewTempAbsPath(); + struct stat buf; + int fd; + ASSERT_THAT(stat(path.c_str(), &buf), SyscallFailsWithErrno(ENOENT)); + ASSERT_THAT(fd = creat(path.c_str(), kMode), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(stat(path.c_str(), &buf), SyscallSucceeds()); +} + +TEST(CreatTest, CreatTruncatesExistingFile) { + auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + int fd; + ASSERT_NO_ERRNO(SetContents(temp_path.path(), "non-empty")); + ASSERT_THAT(fd = creat(temp_path.path().c_str(), kMode), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + std::string new_contents; + ASSERT_NO_ERRNO(GetContents(temp_path.path(), &new_contents)); + EXPECT_EQ("", new_contents); +} + +TEST(CreatTest, CreatWithNameTooLong) { + // Start with a unique name, and pad it to NAME_MAX + 1; + std::string name = NewTempRelPath(); + int padding = (NAME_MAX + 1) - name.size(); + name.append(padding, 'x'); + const std::string& path = JoinPath(GetAbsoluteTestTmpdir(), name); + + // Creation should return ENAMETOOLONG. + ASSERT_THAT(creat(path.c_str(), kMode), SyscallFailsWithErrno(ENAMETOOLONG)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc new file mode 100644 index 000000000..3c88c4cbd --- /dev/null +++ b/test/syscalls/linux/dev.cc @@ -0,0 +1,167 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(DevTest, LseekDevUrandom) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/urandom", O_RDONLY)); + EXPECT_THAT(lseek(fd.get(), -10, SEEK_CUR), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), -10, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); +} + +TEST(DevTest, LseekDevNull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + EXPECT_THAT(lseek(fd.get(), -10, SEEK_CUR), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), -10, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(DevTest, LseekDevZero) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(DevTest, LseekDevFull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_RDONLY)); + EXPECT_THAT(lseek(fd.get(), 123, SEEK_SET), SyscallSucceedsWithValue(0)); + EXPECT_THAT(lseek(fd.get(), 123, SEEK_CUR), SyscallSucceedsWithValue(0)); + EXPECT_THAT(lseek(fd.get(), 123, SEEK_END), SyscallSucceedsWithValue(0)); +} + +TEST(DevTest, LseekDevNullFreshFile) { + // Seeks to /dev/null always return 0. + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + EXPECT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceedsWithValue(0)); + EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + const FileDescriptor fd3 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + EXPECT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); +} + +TEST(DevTest, OpenTruncate) { + // Truncation is ignored on linux and gvisor for device files. + ASSERT_NO_ERRNO_AND_VALUE( + Open("/dev/null", O_CREAT | O_TRUNC | O_WRONLY, 0644)); + ASSERT_NO_ERRNO_AND_VALUE( + Open("/dev/zero", O_CREAT | O_TRUNC | O_WRONLY, 0644)); + ASSERT_NO_ERRNO_AND_VALUE( + Open("/dev/full", O_CREAT | O_TRUNC | O_WRONLY, 0644)); +} + +TEST(DevTest, Pread64DevNull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + char buf[1]; + EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(0)); +} + +TEST(DevTest, Pread64DevZero) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + char buf[1]; + EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(1)); +} + +TEST(DevTest, Pread64DevFull) { + // /dev/full behaves like /dev/zero with respect to reads. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_RDONLY)); + char buf[1]; + EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(1)); +} + +TEST(DevTest, ReadDevNull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + std::vector<char> buf(1); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), 1), SyscallSucceeds()); +} + +// Do not allow random save as it could lead to partial reads. +TEST(DevTest, ReadDevZero_NoRandomSave) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + + constexpr int kReadSize = 128 * 1024; + std::vector<char> buf(kReadSize, 1); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), kReadSize), + SyscallSucceedsWithValue(kReadSize)); + EXPECT_EQ(std::vector<char>(kReadSize, 0), buf); +} + +TEST(DevTest, WriteDevNull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_WRONLY)); + EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallSucceedsWithValue(1)); +} + +TEST(DevTest, WriteDevZero) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY)); + EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallSucceedsWithValue(1)); +} + +TEST(DevTest, WriteDevFull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_WRONLY)); + EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallFailsWithErrno(ENOSPC)); +} + +TEST(DevTest, TTYExists) { + struct stat statbuf = {}; + ASSERT_THAT(stat("/dev/tty", &statbuf), SyscallSucceeds()); + // Check that it's a character device with rw-rw-rw- permissions. + EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); +} + +TEST(DevTest, OpenDevFuse) { + // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new + // device registration is complete. + SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor()); + + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY)); +} + +} // namespace +} // namespace testing + +} // namespace gvisor diff --git a/test/syscalls/linux/dup.cc b/test/syscalls/linux/dup.cc new file mode 100644 index 000000000..4f773bc75 --- /dev/null +++ b/test/syscalls/linux/dup.cc @@ -0,0 +1,133 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/eventfd_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<FileDescriptor> Dup2(const FileDescriptor& fd, int target_fd) { + int new_fd = dup2(fd.get(), target_fd); + if (new_fd < 0) { + return PosixError(errno, "Dup2"); + } + return FileDescriptor(new_fd); +} + +PosixErrorOr<FileDescriptor> Dup3(const FileDescriptor& fd, int target_fd, + int flags) { + int new_fd = dup3(fd.get(), target_fd, flags); + if (new_fd < 0) { + return PosixError(errno, "Dup2"); + } + return FileDescriptor(new_fd); +} + +void CheckSameFile(const FileDescriptor& fd1, const FileDescriptor& fd2) { + struct stat stat_result1, stat_result2; + ASSERT_THAT(fstat(fd1.get(), &stat_result1), SyscallSucceeds()); + ASSERT_THAT(fstat(fd2.get(), &stat_result2), SyscallSucceeds()); + EXPECT_EQ(stat_result1.st_dev, stat_result2.st_dev); + EXPECT_EQ(stat_result1.st_ino, stat_result2.st_ino); +} + +TEST(DupTest, Dup) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Dup the descriptor and make sure it's the same file. + FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup()); + ASSERT_NE(fd.get(), nfd.get()); + CheckSameFile(fd, nfd); +} + +TEST(DupTest, DupClearsCloExec) { + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag set. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_CLOEXEC)); + EXPECT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); + + // Duplicate the descriptor. Ensure that it doesn't have FD_CLOEXEC set. + FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup()); + ASSERT_NE(fd.get(), nfd.get()); + CheckSameFile(fd, nfd); + EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(0)); +} + +TEST(DupTest, Dup2) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Regular dup once. + FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup()); + + ASSERT_NE(fd.get(), nfd.get()); + CheckSameFile(fd, nfd); + + // Dup over the file above. + int target_fd = nfd.release(); + FileDescriptor nfd2 = ASSERT_NO_ERRNO_AND_VALUE(Dup2(fd, target_fd)); + EXPECT_EQ(target_fd, nfd2.get()); + CheckSameFile(fd, nfd2); +} + +TEST(DupTest, Dup2SameFD) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Should succeed. + ASSERT_THAT(dup2(fd.get(), fd.get()), SyscallSucceedsWithValue(fd.get())); +} + +TEST(DupTest, Dup3) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Regular dup once. + FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup()); + ASSERT_NE(fd.get(), nfd.get()); + CheckSameFile(fd, nfd); + + // Dup over the file above, check that it has no CLOEXEC. + nfd = ASSERT_NO_ERRNO_AND_VALUE(Dup3(fd, nfd.release(), 0)); + CheckSameFile(fd, nfd); + EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(0)); + + // Dup over the file again, check that it does not CLOEXEC. + nfd = ASSERT_NO_ERRNO_AND_VALUE(Dup3(fd, nfd.release(), O_CLOEXEC)); + CheckSameFile(fd, nfd); + EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); +} + +TEST(DupTest, Dup3FailsSameFD) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Only dup3 fails if the new and old fd are the same. + ASSERT_THAT(dup3(fd.get(), fd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc new file mode 100644 index 000000000..f57d38dc7 --- /dev/null +++ b/test/syscalls/linux/epoll.cc @@ -0,0 +1,428 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <limits.h> +#include <pthread.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/eventfd.h> +#include <time.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/epoll_util.h" +#include "test/util/eventfd_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr int kFDsPerEpoll = 3; +constexpr uint64_t kMagicConstant = 0x0102030405060708; + +uint64_t ms_elapsed(const struct timespec* begin, const struct timespec* end) { + return (end->tv_sec - begin->tv_sec) * 1000 + + (end->tv_nsec - begin->tv_nsec) / 1000000; +} + +TEST(EpollTest, AllWritable) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), + EPOLLIN | EPOLLOUT, kMagicConstant + i)); + } + + struct epoll_event result[kFDsPerEpoll]; + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(kFDsPerEpoll)); + for (int i = 0; i < kFDsPerEpoll; i++) { + ASSERT_EQ(result[i].events, EPOLLOUT); + } +} + +TEST(EpollTest, LastReadable) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), + EPOLLIN | EPOLLOUT, kMagicConstant + i)); + } + + uint64_t tmp = 1; + ASSERT_THAT(WriteFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + + struct epoll_event result[kFDsPerEpoll]; + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(kFDsPerEpoll)); + + int i; + for (i = 0; i < kFDsPerEpoll - 1; i++) { + EXPECT_EQ(result[i].events, EPOLLOUT); + } + EXPECT_EQ(result[i].events, EPOLLOUT | EPOLLIN); + EXPECT_EQ(result[i].data.u64, kMagicConstant + i); +} + +TEST(EpollTest, LastNonWritable) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), + EPOLLIN | EPOLLOUT, kMagicConstant + i)); + } + + // Write the maximum value to the event fd so that writing to it again would + // block. + uint64_t tmp = ULLONG_MAX - 1; + ASSERT_THAT(WriteFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + + struct epoll_event result[kFDsPerEpoll]; + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(kFDsPerEpoll)); + + int i; + for (i = 0; i < kFDsPerEpoll - 1; i++) { + EXPECT_EQ(result[i].events, EPOLLOUT); + } + EXPECT_EQ(result[i].events, EPOLLIN); + EXPECT_THAT(ReadFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)), + sizeof(tmp)); + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(kFDsPerEpoll)); + + for (i = 0; i < kFDsPerEpoll; i++) { + EXPECT_EQ(result[i].events, EPOLLOUT); + } +} + +TEST(EpollTest, Timeout_NoRandomSave) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, + kMagicConstant + i)); + } + + constexpr int kTimeoutMs = 200; + struct timespec begin; + struct timespec end; + struct epoll_event result[kFDsPerEpoll]; + + { + const DisableSave ds; // Timing-related. + EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds()); + + ASSERT_THAT( + RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, kTimeoutMs), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds()); + } + + // Check the lower bound on the timeout. Checking for an upper bound is + // fragile because Linux can overrun the timeout due to scheduling delays. + EXPECT_GT(ms_elapsed(&begin, &end), kTimeoutMs - 1); +} + +void* writer(void* arg) { + int fd = *reinterpret_cast<int*>(arg); + uint64_t tmp = 1; + + usleep(200000); + if (WriteFd(fd, &tmp, sizeof(tmp)) != sizeof(tmp)) { + fprintf(stderr, "writer failed: errno %s\n", strerror(errno)); + } + + return nullptr; +} + +TEST(EpollTest, WaitThenUnblock) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, + kMagicConstant + i)); + } + + // Fire off a thread that will make at least one of the event fds readable. + pthread_t thread; + int make_readable = eventfds[0].get(); + ASSERT_THAT(pthread_create(&thread, nullptr, writer, &make_readable), + SyscallSucceedsWithValue(0)); + + struct epoll_event result[kFDsPerEpoll]; + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_THAT(pthread_detach(thread), SyscallSucceeds()); +} + +void sighandler(int s) {} + +void* signaler(void* arg) { + pthread_t* t = reinterpret_cast<pthread_t*>(arg); + // Repeatedly send the real-time signal until we are detached, because it's + // difficult to know exactly when epoll_wait on another thread (which this + // is intending to interrupt) has started blocking. + while (1) { + usleep(200000); + pthread_kill(*t, SIGRTMIN); + } + return nullptr; +} + +TEST(EpollTest, UnblockWithSignal) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, + kMagicConstant + i)); + } + + signal(SIGRTMIN, sighandler); + // Unblock the real time signals that InitGoogle blocks :( + sigset_t unblock; + sigemptyset(&unblock); + sigaddset(&unblock, SIGRTMIN); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &unblock, nullptr), SyscallSucceeds()); + + pthread_t thread; + pthread_t cur = pthread_self(); + ASSERT_THAT(pthread_create(&thread, nullptr, signaler, &cur), + SyscallSucceedsWithValue(0)); + + struct epoll_event result[kFDsPerEpoll]; + EXPECT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallFailsWithErrno(EINTR)); + EXPECT_THAT(pthread_cancel(thread), SyscallSucceeds()); + EXPECT_THAT(pthread_detach(thread), SyscallSucceeds()); +} + +TEST(EpollTest, TimeoutNoFds) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + struct epoll_event result[kFDsPerEpoll]; + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); +} + +struct addr_ctx { + int epollfd; + int eventfd; +}; + +void* fd_adder(void* arg) { + struct addr_ctx* actx = reinterpret_cast<struct addr_ctx*>(arg); + struct epoll_event event; + event.events = EPOLLIN | EPOLLOUT; + event.data.u64 = 0xdeadbeeffacefeed; + + usleep(200000); + if (epoll_ctl(actx->epollfd, EPOLL_CTL_ADD, actx->eventfd, &event) == -1) { + fprintf(stderr, "epoll_ctl failed: %s\n", strerror(errno)); + } + + return nullptr; +} + +TEST(EpollTest, UnblockWithNewFD) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + + pthread_t thread; + struct addr_ctx actx = {epollfd.get(), eventfd.get()}; + ASSERT_THAT(pthread_create(&thread, nullptr, fd_adder, &actx), + SyscallSucceedsWithValue(0)); + + struct epoll_event result[kFDsPerEpoll]; + // Wait while no FDs are ready, but after 200ms fd_adder will add a ready FD + // to epoll which will wake us up. + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_THAT(pthread_detach(thread), SyscallSucceeds()); + EXPECT_EQ(result[0].data.u64, 0xdeadbeeffacefeed); +} + +TEST(EpollTest, Oneshot) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, + kMagicConstant + i)); + } + + struct epoll_event event; + event.events = EPOLLOUT | EPOLLONESHOT; + event.data.u64 = kMagicConstant; + ASSERT_THAT( + epoll_ctl(epollfd.get(), EPOLL_CTL_MOD, eventfds[0].get(), &event), + SyscallSucceeds()); + + struct epoll_event result[kFDsPerEpoll]; + // One-shot entry means that the first epoll_wait should succeed. + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // One-shot entry means that the second epoll_wait should timeout. + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); +} + +TEST(EpollTest, EdgeTriggered_NoRandomSave) { + // Test edge-triggered entry: make it edge-triggered, first wait should + // return it, second one should time out, make it writable again, third wait + // should return it, fourth wait should timeout. + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfd.get(), + EPOLLOUT | EPOLLET, kMagicConstant)); + + struct epoll_event result[kFDsPerEpoll]; + + { + const DisableSave ds; // May trigger spurious event. + + // Edge-triggered entry means that the first epoll_wait should return the + // event. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // Edge-triggered entry means that the second epoll_wait should time out. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); + } + + uint64_t tmp = ULLONG_MAX - 1; + + // Make an fd non-writable. + ASSERT_THAT(WriteFd(eventfd.get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + + // Make the same fd non-writable to trigger a change, which will trigger an + // edge-triggered event. + ASSERT_THAT(ReadFd(eventfd.get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + + { + const DisableSave ds; // May trigger spurious event. + + // An edge-triggered event should now be returned. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // The edge-triggered event had been consumed above, we don't expect to + // get it again. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); + } +} + +TEST(EpollTest, OneshotAndEdgeTriggered) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfd.get(), + EPOLLOUT | EPOLLET | EPOLLONESHOT, + kMagicConstant)); + + struct epoll_event result[kFDsPerEpoll]; + // First time one shot edge-triggered entry means that epoll_wait should + // return the event. + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // Edge-triggered entry means that the second epoll_wait should time out. + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); + + uint64_t tmp = ULLONG_MAX - 1; + // Make an fd non-writable. + ASSERT_THAT(WriteFd(eventfd.get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + // Make the same fd non-writable to trigger a change, which will not trigger + // an edge-triggered event because we've also included EPOLLONESHOT. + ASSERT_THAT(ReadFd(eventfd.get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); +} + +TEST(EpollTest, CycleOfOneDisallowed) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + + struct epoll_event event; + event.events = EPOLLOUT; + event.data.u64 = kMagicConstant; + + ASSERT_THAT(epoll_ctl(epollfd.get(), EPOLL_CTL_ADD, epollfd.get(), &event), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(EpollTest, CycleOfThreeDisallowed) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto epollfd1 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto epollfd2 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + + ASSERT_NO_ERRNO( + RegisterEpollFD(epollfd.get(), epollfd1.get(), EPOLLIN, kMagicConstant)); + ASSERT_NO_ERRNO( + RegisterEpollFD(epollfd1.get(), epollfd2.get(), EPOLLIN, kMagicConstant)); + + struct epoll_event event; + event.events = EPOLLIN; + event.data.u64 = kMagicConstant; + EXPECT_THAT(epoll_ctl(epollfd2.get(), EPOLL_CTL_ADD, epollfd.get(), &event), + SyscallFailsWithErrno(ELOOP)); +} + +TEST(EpollTest, CloseFile) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + ASSERT_NO_ERRNO( + RegisterEpollFD(epollfd.get(), eventfd.get(), EPOLLOUT, kMagicConstant)); + + struct epoll_event result[kFDsPerEpoll]; + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // Close the event fd early. + eventfd.reset(); + + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc new file mode 100644 index 000000000..dc794415e --- /dev/null +++ b/test/syscalls/linux/eventfd.cc @@ -0,0 +1,222 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/epoll_util.h" +#include "test/util/eventfd_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(EventfdTest, Nonblock) { + FileDescriptor efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + + uint64_t l; + ASSERT_THAT(read(efd.get(), &l, sizeof(l)), SyscallFailsWithErrno(EAGAIN)); + + l = 1; + ASSERT_THAT(write(efd.get(), &l, sizeof(l)), SyscallSucceeds()); + + l = 0; + ASSERT_THAT(read(efd.get(), &l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l, 1); + + ASSERT_THAT(read(efd.get(), &l, sizeof(l)), SyscallFailsWithErrno(EAGAIN)); +} + +void* read_three_times(void* arg) { + int efd = *reinterpret_cast<int*>(arg); + uint64_t l; + EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l))); + EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l))); + EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l))); + return nullptr; +} + +TEST(EventfdTest, BlockingWrite) { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_SEMAPHORE)); + int efd = fd.get(); + + pthread_t p; + ASSERT_THAT(pthread_create(&p, nullptr, read_three_times, + reinterpret_cast<void*>(&efd)), + SyscallSucceeds()); + + uint64_t l = 1; + ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l, 1); + + ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l, 1); + + ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l, 1); + + ASSERT_THAT(pthread_join(p, nullptr), SyscallSucceeds()); +} + +TEST(EventfdTest, SmallWrite) { + FileDescriptor efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + + uint64_t l = 16; + ASSERT_THAT(write(efd.get(), &l, 4), SyscallFailsWithErrno(EINVAL)); +} + +TEST(EventfdTest, SmallRead) { + FileDescriptor efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + + uint64_t l = 1; + ASSERT_THAT(write(efd.get(), &l, sizeof(l)), SyscallSucceeds()); + + l = 0; + ASSERT_THAT(read(efd.get(), &l, 4), SyscallFailsWithErrno(EINVAL)); +} + +TEST(EventfdTest, IllegalSeek) { + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + EXPECT_THAT(lseek(efd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); +} + +TEST(EventfdTest, IllegalPread) { + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + int l; + EXPECT_THAT(pread(efd.get(), &l, sizeof(l), 0), + SyscallFailsWithErrno(ESPIPE)); +} + +TEST(EventfdTest, IllegalPwrite) { + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + EXPECT_THAT(pwrite(efd.get(), "x", 1, 0), SyscallFailsWithErrno(ESPIPE)); +} + +TEST(EventfdTest, BigWrite) { + FileDescriptor efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + + uint64_t big[16]; + big[0] = 16; + ASSERT_THAT(write(efd.get(), big, sizeof(big)), SyscallSucceeds()); +} + +TEST(EventfdTest, BigRead) { + FileDescriptor efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + + uint64_t l = 1; + ASSERT_THAT(write(efd.get(), &l, sizeof(l)), SyscallSucceeds()); + + uint64_t big[16]; + ASSERT_THAT(read(efd.get(), big, sizeof(big)), SyscallSucceeds()); + EXPECT_EQ(big[0], 1); +} + +TEST(EventfdTest, BigWriteBigRead) { + FileDescriptor efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + + uint64_t l[16]; + l[0] = 16; + ASSERT_THAT(write(efd.get(), l, sizeof(l)), SyscallSucceeds()); + ASSERT_THAT(read(efd.get(), l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l[0], 1); +} + +TEST(EventfdTest, SpliceFromPipePartialSucceeds) { + int pipes[2]; + ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); + const FileDescriptor pipe_rfd(pipes[0]); + const FileDescriptor pipe_wfd(pipes[1]); + constexpr uint64_t kVal{1}; + + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK)); + + uint64_t event_array[2]; + event_array[0] = kVal; + event_array[1] = kVal; + ASSERT_THAT(write(pipe_wfd.get(), event_array, sizeof(event_array)), + SyscallSucceedsWithValue(sizeof(event_array))); + EXPECT_THAT(splice(pipe_rfd.get(), /*__offin=*/nullptr, efd.get(), + /*__offout=*/nullptr, sizeof(event_array[0]) + 1, + SPLICE_F_NONBLOCK), + SyscallSucceedsWithValue(sizeof(event_array[0]))); + + uint64_t val; + ASSERT_THAT(read(efd.get(), &val, sizeof(val)), + SyscallSucceedsWithValue(sizeof(val))); + EXPECT_EQ(val, kVal); +} + +// NotifyNonZero is inherently racy, so random save is disabled. +TEST(EventfdTest, NotifyNonZero_NoRandomSave) { + // Waits will time out at 10 seconds. + constexpr int kEpollTimeoutMs = 10000; + // Create an eventfd descriptor. + FileDescriptor efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(7, EFD_NONBLOCK | EFD_SEMAPHORE)); + // Create an epoll fd to listen to efd. + FileDescriptor epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + // Add efd to epoll. + ASSERT_NO_ERRNO( + RegisterEpollFD(epollfd.get(), efd.get(), EPOLLIN | EPOLLET, efd.get())); + + // Use epoll to get a value from efd. + struct epoll_event out_ev; + int wait_out = epoll_wait(epollfd.get(), &out_ev, 1, kEpollTimeoutMs); + EXPECT_EQ(wait_out, 1); + EXPECT_EQ(efd.get(), out_ev.data.fd); + uint64_t val = 0; + ASSERT_THAT(read(efd.get(), &val, sizeof(val)), SyscallSucceeds()); + EXPECT_EQ(val, 1); + + // Start a thread that, after this thread blocks on epoll_wait, will write to + // efd. This is racy -- it's possible that this write will happen after + // epoll_wait times out. + ScopedThread t([&efd] { + sleep(5); + uint64_t val = 1; + EXPECT_THAT(write(efd.get(), &val, sizeof(val)), + SyscallSucceedsWithValue(sizeof(val))); + }); + + // epoll_wait should return once the thread writes. + wait_out = epoll_wait(epollfd.get(), &out_ev, 1, kEpollTimeoutMs); + EXPECT_EQ(wait_out, 1); + EXPECT_EQ(efd.get(), out_ev.data.fd); + + val = 0; + ASSERT_THAT(read(efd.get(), &val, sizeof(val)), SyscallSucceeds()); + EXPECT_EQ(val, 1); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/exceptions.cc b/test/syscalls/linux/exceptions.cc new file mode 100644 index 000000000..420b9543f --- /dev/null +++ b/test/syscalls/linux/exceptions.cc @@ -0,0 +1,367 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/platform_util.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// Default value for the x87 FPU control word. See Intel SDM Vol 1, Ch 8.1.5 +// "x87 FPU Control Word". +constexpr uint16_t kX87ControlWordDefault = 0x37f; + +// Mask for the divide-by-zero exception. +constexpr uint16_t kX87ControlWordDiv0Mask = 1 << 2; + +// Default value for the SSE control register (MXCSR). See Intel SDM Vol 1, Ch +// 11.6.4 "Initialization of SSE/SSE3 Extensions". +constexpr uint32_t kMXCSRDefault = 0x1f80; + +// Mask for the divide-by-zero exception. +constexpr uint32_t kMXCSRDiv0Mask = 1 << 9; + +// Flag for a pending divide-by-zero exception. +constexpr uint32_t kMXCSRDiv0Flag = 1 << 2; + +void inline Halt() { asm("hlt\r\n"); } + +void inline SetAlignmentCheck() { + asm("subq $128, %%rsp\r\n" // Avoid potential red zone clobber + "pushf\r\n" + "pop %%rax\r\n" + "or $0x40000, %%rax\r\n" + "push %%rax\r\n" + "popf\r\n" + "addq $128, %%rsp\r\n" + : + : + : "ax"); +} + +void inline ClearAlignmentCheck() { + asm("subq $128, %%rsp\r\n" // Avoid potential red zone clobber + "pushf\r\n" + "pop %%rax\r\n" + "mov $0x40000, %%rbx\r\n" + "not %%rbx\r\n" + "and %%rbx, %%rax\r\n" + "push %%rax\r\n" + "popf\r\n" + "addq $128, %%rsp\r\n" + : + : + : "ax", "bx"); +} + +void inline Int3Normal() { asm(".byte 0xcd, 0x03\r\n"); } + +void inline Int3Compact() { asm(".byte 0xcc\r\n"); } + +void InIOHelper(int width, int value) { + EXPECT_EXIT( + { + switch (width) { + case 1: + asm volatile("inb %%dx, %%al" ::"d"(value) : "%eax"); + break; + case 2: + asm volatile("inw %%dx, %%ax" ::"d"(value) : "%eax"); + break; + case 4: + asm volatile("inl %%dx, %%eax" ::"d"(value) : "%eax"); + break; + default: + FAIL() << "invalid input width, only 1, 2 or 4 is allowed"; + } + }, + ::testing::KilledBySignal(SIGSEGV), ""); +} + +TEST(ExceptionTest, Halt) { + // In order to prevent the regular handler from messing with things (and + // perhaps refaulting until some other signal occurs), we reset the handler to + // the default action here and ensure that it dies correctly. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa)); + + EXPECT_EXIT(Halt(), ::testing::KilledBySignal(SIGSEGV), ""); +} + +TEST(ExceptionTest, DivideByZero) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa)); + + EXPECT_EXIT( + { + uint32_t remainder; + uint32_t quotient; + uint32_t divisor = 0; + uint64_t value = 1; + asm("divl 0(%2)\r\n" + : "=d"(remainder), "=a"(quotient) + : "r"(&divisor), "d"(value >> 32), "a"(value)); + TEST_CHECK(quotient > 0); // Force dependency. + }, + ::testing::KilledBySignal(SIGFPE), ""); +} + +// By default, x87 exceptions are masked and simply return a default value. +TEST(ExceptionTest, X87DivideByZeroMasked) { + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm("fildl %[value]\r\n" + "fidivl %[divisor]\r\n" + "fistpl %[quotient]\r\n" + : [ quotient ] "=m"(quotient) + : [ value ] "m"(value), [ divisor ] "m"(divisor)); + + EXPECT_EQ(quotient, INT32_MIN); +} + +// When unmasked, division by zero raises SIGFPE. +TEST(ExceptionTest, X87DivideByZeroUnmasked) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa)); + + EXPECT_EXIT( + { + // Clear the divide by zero exception mask. + constexpr uint16_t kControlWord = + kX87ControlWordDefault & ~kX87ControlWordDiv0Mask; + + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm volatile( + "fldcw %[cw]\r\n" + "fildl %[value]\r\n" + "fidivl %[divisor]\r\n" + "fistpl %[quotient]\r\n" + : [ quotient ] "=m"(quotient) + : [ cw ] "m"(kControlWord), [ value ] "m"(value), + [ divisor ] "m"(divisor)); + }, + ::testing::KilledBySignal(SIGFPE), ""); +} + +// Pending exceptions in the x87 status register are not clobbered by syscalls. +TEST(ExceptionTest, X87StatusClobber) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa)); + + EXPECT_EXIT( + { + // Clear the divide by zero exception mask. + constexpr uint16_t kControlWord = + kX87ControlWordDefault & ~kX87ControlWordDiv0Mask; + + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm volatile( + "fildl %[value]\r\n" + "fidivl %[divisor]\r\n" + // Exception is masked, so it does not occur here. + "fistpl %[quotient]\r\n" + + // SYS_getpid placed in rax by constraint. + "syscall\r\n" + + // Unmask exception. The syscall didn't clobber the pending + // exception, so now it can be raised. + // + // N.B. "a floating-point exception will be generated upon execution + // of the *next* floating-point instruction". + "fldcw %[cw]\r\n" + "fwait\r\n" + : [ quotient ] "=m"(quotient) + : [ value ] "m"(value), [ divisor ] "m"(divisor), "a"(SYS_getpid), + [ cw ] "m"(kControlWord) + : "rcx", "r11"); + }, + ::testing::KilledBySignal(SIGFPE), ""); +} + +// By default, SSE exceptions are masked and simply return a default value. +TEST(ExceptionTest, SSEDivideByZeroMasked) { + uint32_t status; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm("cvtsi2ssl %[value], %%xmm0\r\n" + "cvtsi2ssl %[divisor], %%xmm1\r\n" + "divss %%xmm1, %%xmm0\r\n" + "cvtss2sil %%xmm0, %[quotient]\r\n" + : [ quotient ] "=r"(quotient), [ status ] "=r"(status) + : [ value ] "r"(value), [ divisor ] "r"(divisor) + : "xmm0", "xmm1"); + + EXPECT_EQ(quotient, INT32_MIN); +} + +// When unmasked, division by zero raises SIGFPE. +TEST(ExceptionTest, SSEDivideByZeroUnmasked) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa)); + + EXPECT_EXIT( + { + // Clear the divide by zero exception mask. + constexpr uint32_t kMXCSR = kMXCSRDefault & ~kMXCSRDiv0Mask; + + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm volatile( + "ldmxcsr %[mxcsr]\r\n" + "cvtsi2ssl %[value], %%xmm0\r\n" + "cvtsi2ssl %[divisor], %%xmm1\r\n" + "divss %%xmm1, %%xmm0\r\n" + "cvtss2sil %%xmm0, %[quotient]\r\n" + : [ quotient ] "=r"(quotient) + : [ mxcsr ] "m"(kMXCSR), [ value ] "r"(value), + [ divisor ] "r"(divisor) + : "xmm0", "xmm1"); + }, + ::testing::KilledBySignal(SIGFPE), ""); +} + +// Pending exceptions in the SSE status register are not clobbered by syscalls. +TEST(ExceptionTest, SSEStatusClobber) { + uint32_t mxcsr; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm("cvtsi2ssl %[value], %%xmm0\r\n" + "cvtsi2ssl %[divisor], %%xmm1\r\n" + "divss %%xmm1, %%xmm0\r\n" + // Exception is masked, so it does not occur here. + "cvtss2sil %%xmm0, %[quotient]\r\n" + + // SYS_getpid placed in rax by constraint. + "syscall\r\n" + + // Intel SDM Vol 1, Ch 10.2.3.1 "SIMD Floating-Point Mask and Flag Bits": + // "If LDMXCSR or FXRSTOR clears a mask bit and sets the corresponding + // exception flag bit, a SIMD floating-point exception will not be + // generated as a result of this change. The unmasked exception will be + // generated only upon the execution of the next SSE/SSE2/SSE3 instruction + // that detects the unmasked exception condition." + // + // Though ambiguous, empirical evidence indicates that this means that + // exception flags set in the status register will never cause an + // exception to be raised; only a new exception condition will do so. + // + // Thus here we just check for the flag itself rather than trying to raise + // the exception. + "stmxcsr %[mxcsr]\r\n" + : [ quotient ] "=r"(quotient), [ mxcsr ] "+m"(mxcsr) + : [ value ] "r"(value), [ divisor ] "r"(divisor), "a"(SYS_getpid) + : "xmm0", "xmm1", "rcx", "r11"); + + EXPECT_TRUE(mxcsr & kMXCSRDiv0Flag); +} + +TEST(ExceptionTest, IOAccessFault) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa)); + + InIOHelper(1, 0x0); + InIOHelper(2, 0x7); + InIOHelper(4, 0x6); + InIOHelper(1, 0xffff); + InIOHelper(2, 0xffff); + InIOHelper(4, 0xfffd); +} + +TEST(ExceptionTest, Alignment) { + SetAlignmentCheck(); + ClearAlignmentCheck(); +} + +TEST(ExceptionTest, AlignmentHalt) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa)); + + // Reported upstream. We need to ensure that bad flags are cleared even in + // fault paths. Set the alignment flag and then generate an exception. + EXPECT_EXIT( + { + SetAlignmentCheck(); + Halt(); + }, + ::testing::KilledBySignal(SIGSEGV), ""); +} + +TEST(ExceptionTest, AlignmentCheck) { + SKIP_IF(PlatformSupportAlignmentCheck() != PlatformSupport::Allowed); + + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGBUS, sa)); + + EXPECT_EXIT( + { + char array[16]; + SetAlignmentCheck(); + for (int i = 0; i < 8; i++) { + // At least 7/8 offsets will be unaligned here. + uint64_t* ptr = reinterpret_cast<uint64_t*>(&array[i]); + asm("mov %0, 0(%0)\r\n" : : "r"(ptr) : "ax"); + } + }, + ::testing::KilledBySignal(SIGBUS), ""); +} + +TEST(ExceptionTest, Int3Normal) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGTRAP, sa)); + + EXPECT_EXIT(Int3Normal(), ::testing::KilledBySignal(SIGTRAP), ""); +} + +TEST(ExceptionTest, Int3Compact) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGTRAP, sa)); + + EXPECT_EXIT(Int3Compact(), ::testing::KilledBySignal(SIGTRAP), ""); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc new file mode 100644 index 000000000..c5acfc794 --- /dev/null +++ b/test/syscalls/linux/exec.cc @@ -0,0 +1,904 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/exec.h" + +#include <errno.h> +#include <fcntl.h> +#include <sys/eventfd.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <unistd.h> + +#include <iostream> +#include <memory> +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/optional.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr char kBasicWorkload[] = "test/syscalls/linux/exec_basic_workload"; +constexpr char kExitScript[] = "test/syscalls/linux/exit_script"; +constexpr char kStateWorkload[] = "test/syscalls/linux/exec_state_workload"; +constexpr char kProcExeWorkload[] = + "test/syscalls/linux/exec_proc_exe_workload"; +constexpr char kAssertClosedWorkload[] = + "test/syscalls/linux/exec_assert_closed_workload"; +constexpr char kPriorityWorkload[] = "test/syscalls/linux/priority_execve"; + +constexpr char kExit42[] = "--exec_exit_42"; +constexpr char kExecWithThread[] = "--exec_exec_with_thread"; +constexpr char kExecFromThread[] = "--exec_exec_from_thread"; + +// Runs file specified by dirfd and pathname with argv and checks that the exit +// status is expect_status and that stderr contains expect_stderr. +void CheckExecHelper(const absl::optional<int32_t> dirfd, + const std::string& pathname, const ExecveArray& argv, + const ExecveArray& envv, const int flags, + int expect_status, const std::string& expect_stderr) { + int pipe_fds[2]; + ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds()); + + FileDescriptor read_fd(pipe_fds[0]); + FileDescriptor write_fd(pipe_fds[1]); + + pid_t child; + int execve_errno; + + const auto remap_stderr = [pipe_fds] { + // Remap stdin and stdout to /dev/null. + int fd = open("/dev/null", O_RDWR | O_CLOEXEC); + if (fd < 0) { + _exit(errno); + } + + int ret = dup2(fd, 0); + if (ret < 0) { + _exit(errno); + } + + ret = dup2(fd, 1); + if (ret < 0) { + _exit(errno); + } + + // And stderr to the pipe. + ret = dup2(pipe_fds[1], 2); + if (ret < 0) { + _exit(errno); + } + + // Here, we'd ideally close all other FDs inherited from the parent. + // However, that's not worth the effort and CloexecNormalFile and + // CloexecEventfd depend on that not happening. + }; + + Cleanup kill; + if (dirfd.has_value()) { + kill = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(*dirfd, pathname, argv, + envv, flags, remap_stderr, + &child, &execve_errno)); + } else { + kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(pathname, argv, envv, remap_stderr, &child, &execve_errno)); + } + + ASSERT_EQ(0, execve_errno); + + // Not needed anymore. + write_fd.reset(); + + // Read stderr until the child exits. + std::string output; + constexpr int kSize = 128; + char buf[kSize]; + int n; + do { + ASSERT_THAT(n = ReadFd(read_fd.get(), buf, kSize), SyscallSucceeds()); + if (n > 0) { + output.append(buf, n); + } + } while (n > 0); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); + EXPECT_EQ(status, expect_status); + + // Process cleanup no longer needed. + kill.Release(); + + EXPECT_TRUE(absl::StrContains(output, expect_stderr)) << output; +} + +void CheckExec(const std::string& filename, const ExecveArray& argv, + const ExecveArray& envv, int expect_status, + const std::string& expect_stderr) { + CheckExecHelper(/*dirfd=*/absl::optional<int32_t>(), filename, argv, envv, + /*flags=*/0, expect_status, expect_stderr); +} + +void CheckExecveat(const int32_t dirfd, const std::string& pathname, + const ExecveArray& argv, const ExecveArray& envv, + const int flags, int expect_status, + const std::string& expect_stderr) { + CheckExecHelper(absl::optional<int32_t>(dirfd), pathname, argv, envv, flags, + expect_status, expect_stderr); +} + +TEST(ExecTest, EmptyPath) { + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec("", {}, {}, nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, ENOENT); +} + +TEST(ExecTest, Basic) { + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload)}, {}, + ArgEnvExitStatus(0, 0), + absl::StrCat(RunfilePath(kBasicWorkload), "\n")); +} + +TEST(ExecTest, OneArg) { + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload), "1"}, {}, + ArgEnvExitStatus(1, 0), + absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n")); +} + +TEST(ExecTest, FiveArg) { + CheckExec(RunfilePath(kBasicWorkload), + {RunfilePath(kBasicWorkload), "1", "2", "3", "4", "5"}, {}, + ArgEnvExitStatus(5, 0), + absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n2\n3\n4\n5\n")); +} + +TEST(ExecTest, OneEnv) { + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload)}, {"1"}, + ArgEnvExitStatus(0, 1), + absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n")); +} + +TEST(ExecTest, FiveEnv) { + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload)}, + {"1", "2", "3", "4", "5"}, ArgEnvExitStatus(0, 5), + absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n2\n3\n4\n5\n")); +} + +TEST(ExecTest, OneArgOneEnv) { + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload), "arg"}, + {"env"}, ArgEnvExitStatus(1, 1), + absl::StrCat(RunfilePath(kBasicWorkload), "\narg\nenv\n")); +} + +TEST(ExecTest, InterpreterScript) { + CheckExec(RunfilePath(kExitScript), {RunfilePath(kExitScript), "25"}, {}, + ArgEnvExitStatus(25, 0), ""); +} + +// Everything after the path in the interpreter script is a single argument. +TEST(ExecTest, InterpreterScriptArgSplit) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " foo bar"), + 0755)); + + CheckExec(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0), + absl::StrCat(link.path(), "\nfoo bar\n", script.path(), "\n")); +} + +// Original argv[0] is replaced with the script path. +TEST(ExecTest, InterpreterScriptArgvZero) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755)); + + CheckExec(script.path(), {"REPLACED"}, {}, ArgEnvExitStatus(1, 0), + absl::StrCat(link.path(), "\n", script.path(), "\n")); +} + +// Original argv[0] is replaced with the script path, exactly as passed to +// execve. +TEST(ExecTest, InterpreterScriptArgvZeroRelative) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755)); + + auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); + auto script_relative = + ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, script.path())); + + CheckExec(script_relative, {"REPLACED"}, {}, ArgEnvExitStatus(1, 0), + absl::StrCat(link.path(), "\n", script_relative, "\n")); +} + +// argv[0] is added as the script path, even if there was none. +TEST(ExecTest, InterpreterScriptArgvZeroAdded) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755)); + + CheckExec(script.path(), {}, {}, ArgEnvExitStatus(1, 0), + absl::StrCat(link.path(), "\n", script.path(), "\n")); +} + +// A NUL byte in the script line ends parsing. +TEST(ExecTest, InterpreterScriptArgNUL) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), + absl::StrCat("#!", link.path(), " foo", std::string(1, '\0'), "bar"), + 0755)); + + CheckExec(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0), + absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n")); +} + +// Trailing whitespace following interpreter path is ignored. +TEST(ExecTest, InterpreterScriptTrailingWhitespace) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " "), 0755)); + + CheckExec(script.path(), {script.path()}, {}, ArgEnvExitStatus(1, 0), + absl::StrCat(link.path(), "\n", script.path(), "\n")); +} + +// Multiple whitespace characters between interpreter and arg allowed. +TEST(ExecTest, InterpreterScriptArgWhitespace) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " foo"), 0755)); + + CheckExec(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0), + absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n")); +} + +TEST(ExecTest, InterpreterScriptNoPath) { + TempPath script = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "#!", 0755)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, ENOEXEC); +} + +// AT_EXECFN is the path passed to execve. +TEST(ExecTest, ExecFn) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kStateWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " PrintExecFn"), + 0755)); + + // Pass the script as a relative path and assert that is what appears in + // AT_EXECFN. + auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); + auto script_relative = + ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, script.path())); + + CheckExec(script_relative, {script_relative}, {}, ArgEnvExitStatus(0, 0), + absl::StrCat(script_relative, "\n")); +} + +TEST(ExecTest, ExecName) { + std::string path = RunfilePath(kStateWorkload); + + CheckExec(path, {path, "PrintExecName"}, {}, ArgEnvExitStatus(0, 0), + absl::StrCat(Basename(path).substr(0, 15), "\n")); +} + +TEST(ExecTest, ExecNameScript) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kStateWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), + absl::StrCat("#!", link.path(), " PrintExecName"), 0755)); + + std::string script_path = script.path(); + + CheckExec(script_path, {script_path}, {}, ArgEnvExitStatus(0, 0), + absl::StrCat(Basename(script_path).substr(0, 15), "\n")); +} + +// execve may be called by a multithreaded process. +TEST(ExecTest, WithSiblingThread) { + CheckExec("/proc/self/exe", {"/proc/self/exe", kExecWithThread}, {}, + W_EXITCODE(42, 0), ""); +} + +// execve may be called from a thread other than the leader of a multithreaded +// process. +TEST(ExecTest, FromSiblingThread) { + CheckExec("/proc/self/exe", {"/proc/self/exe", kExecFromThread}, {}, + W_EXITCODE(42, 0), ""); +} + +TEST(ExecTest, NotFound) { + char* const argv[] = {nullptr}; + char* const envp[] = {nullptr}; + EXPECT_THAT(execve("/file/does/not/exist", argv, envp), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(ExecTest, NoExecPerm) { + char* const argv[] = {nullptr}; + char* const envp[] = {nullptr}; + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + EXPECT_THAT(execve(f.path().c_str(), argv, envp), + SyscallFailsWithErrno(EACCES)); +} + +// A signal handler we never expect to be called. +void SignalHandler(int signo) { + std::cerr << "Signal " << signo << " raised." << std::endl; + exit(1); +} + +// Signal handlers are reset on execve(2), unless they have default or ignored +// disposition. +TEST(ExecStateTest, HandlerReset) { + struct sigaction sa; + sa.sa_handler = SignalHandler; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + ExecveArray args = { + RunfilePath(kStateWorkload), + "CheckSigHandler", + absl::StrCat(SIGUSR1), + absl::StrCat(absl::Hex(reinterpret_cast<uintptr_t>(SIG_DFL))), + }; + + CheckExec(RunfilePath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); +} + +// Ignored signal dispositions are not reset. +TEST(ExecStateTest, IgnorePreserved) { + struct sigaction sa; + sa.sa_handler = SIG_IGN; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + ExecveArray args = { + RunfilePath(kStateWorkload), + "CheckSigHandler", + absl::StrCat(SIGUSR1), + absl::StrCat(absl::Hex(reinterpret_cast<uintptr_t>(SIG_IGN))), + }; + + CheckExec(RunfilePath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); +} + +// Signal masks are not reset on exec +TEST(ExecStateTest, SignalMask) { + sigset_t s; + sigemptyset(&s); + sigaddset(&s, SIGUSR1); + ASSERT_THAT(sigprocmask(SIG_BLOCK, &s, nullptr), SyscallSucceeds()); + + ExecveArray args = { + RunfilePath(kStateWorkload), + "CheckSigBlocked", + absl::StrCat(SIGUSR1), + }; + + CheckExec(RunfilePath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); +} + +// itimers persist across execve. +// N.B. Timers created with timer_create(2) should not be preserved! +TEST(ExecStateTest, ItimerPreserved) { + // The fork in ForkAndExec clears itimers, so only set them up after fork. + auto setup_itimer = [] { + // Ignore SIGALRM, as we don't actually care about timer + // expirations. + struct sigaction sa; + sa.sa_handler = SIG_IGN; + int ret = sigaction(SIGALRM, &sa, nullptr); + if (ret < 0) { + _exit(errno); + } + + struct itimerval itv; + itv.it_interval.tv_sec = 1; + itv.it_interval.tv_usec = 0; + itv.it_value.tv_sec = 1; + itv.it_value.tv_usec = 0; + ret = setitimer(ITIMER_REAL, &itv, nullptr); + if (ret < 0) { + _exit(errno); + } + }; + + std::string filename = RunfilePath(kStateWorkload); + ExecveArray argv = { + filename, + "CheckItimerEnabled", + absl::StrCat(ITIMER_REAL), + }; + + pid_t child; + int execve_errno; + auto kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(filename, argv, {}, setup_itimer, &child, &execve_errno)); + ASSERT_EQ(0, execve_errno); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); + EXPECT_EQ(0, status); + + // Process cleanup no longer needed. + kill.Release(); +} + +TEST(ProcSelfExe, ChangesAcrossExecve) { + // See exec_proc_exe_workload for more details. We simply + // assert that the /proc/self/exe link changes across execve. + CheckExec(RunfilePath(kProcExeWorkload), + {RunfilePath(kProcExeWorkload), + ASSERT_NO_ERRNO_AND_VALUE(ProcessExePath(getpid()))}, + {}, W_EXITCODE(0, 0), ""); +} + +TEST(ExecTest, CloexecNormalFile) { + TempPath tempFile = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "bar", 0755)); + const FileDescriptor fd_closed_on_exec = + ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY | O_CLOEXEC)); + + CheckExec(RunfilePath(kAssertClosedWorkload), + {RunfilePath(kAssertClosedWorkload), + absl::StrCat(fd_closed_on_exec.get())}, + {}, W_EXITCODE(0, 0), ""); + + // The assert closed workload exits with code 2 if the file still exists. We + // can use this to do a negative test. + const FileDescriptor fd_open_on_exec = + ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY)); + + CheckExec( + RunfilePath(kAssertClosedWorkload), + {RunfilePath(kAssertClosedWorkload), absl::StrCat(fd_open_on_exec.get())}, + {}, W_EXITCODE(2, 0), ""); +} + +TEST(ExecTest, CloexecEventfd) { + int efd; + ASSERT_THAT(efd = eventfd(0, EFD_CLOEXEC), SyscallSucceeds()); + FileDescriptor fd(efd); + + CheckExec(RunfilePath(kAssertClosedWorkload), + {RunfilePath(kAssertClosedWorkload), absl::StrCat(fd.get())}, {}, + W_EXITCODE(0, 0), ""); +} + +constexpr int kLinuxMaxSymlinks = 40; + +TEST(ExecTest, SymlinkLimitExceeded) { + std::string path = RunfilePath(kBasicWorkload); + + // Hold onto TempPath objects so they are not destructed prematurely. + std::vector<TempPath> symlinks; + for (int i = 0; i < kLinuxMaxSymlinks + 1; i++) { + symlinks.push_back( + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateSymlinkTo("/tmp", path))); + path = symlinks[i].path(); + } + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(path, {path}, {}, /*child=*/nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, ELOOP); +} + +TEST(ExecTest, SymlinkLimitRefreshedForInterpreter) { + std::string tmp_dir = "/tmp"; + std::string interpreter_path = "/bin/echo"; + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + tmp_dir, absl::StrCat("#!", interpreter_path), 0755)); + std::string script_path = script.path(); + + // Hold onto TempPath objects so they are not destructed prematurely. + std::vector<TempPath> interpreter_symlinks; + std::vector<TempPath> script_symlinks; + // Replace both the interpreter and script paths with symlink chains of just + // over half the symlink limit each; this is the minimum required to test that + // the symlink limit applies separately to each traversal, while tolerating + // some symlinks in the resolution of (the original) interpreter_path and + // script_path. + for (int i = 0; i < (kLinuxMaxSymlinks / 2) + 1; i++) { + interpreter_symlinks.push_back(ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(tmp_dir, interpreter_path))); + interpreter_path = interpreter_symlinks[i].path(); + script_symlinks.push_back(ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(tmp_dir, script_path))); + script_path = script_symlinks[i].path(); + } + + CheckExec(script_path, {script_path}, {}, ArgEnvExitStatus(0, 0), ""); +} + +TEST(ExecveatTest, BasicWithFDCWD) { + std::string path = RunfilePath(kBasicWorkload); + CheckExecveat(AT_FDCWD, path, {path}, {}, /*flags=*/0, ArgEnvExitStatus(0, 0), + absl::StrCat(path, "\n")); +} + +TEST(ExecveatTest, Basic) { + std::string absolute_path = RunfilePath(kBasicWorkload); + std::string parent_dir = std::string(Dirname(absolute_path)); + std::string base = std::string(Basename(absolute_path)); + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY)); + + CheckExecveat(dirfd.get(), base, {absolute_path}, {}, /*flags=*/0, + ArgEnvExitStatus(0, 0), absl::StrCat(absolute_path, "\n")); +} + +TEST(ExecveatTest, FDNotADirectory) { + std::string absolute_path = RunfilePath(kBasicWorkload); + std::string base = std::string(Basename(absolute_path)); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(absolute_path, 0)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(fd.get(), base, {absolute_path}, {}, + /*flags=*/0, /*child=*/nullptr, + &execve_errno)); + EXPECT_EQ(execve_errno, ENOTDIR); +} + +TEST(ExecveatTest, AbsolutePathWithFDCWD) { + std::string path = RunfilePath(kBasicWorkload); + CheckExecveat(AT_FDCWD, path, {path}, {}, ArgEnvExitStatus(0, 0), 0, + absl::StrCat(path, "\n")); +} + +TEST(ExecveatTest, AbsolutePath) { + std::string path = RunfilePath(kBasicWorkload); + // File descriptor should be ignored when an absolute path is given. + const int32_t badFD = -1; + CheckExecveat(badFD, path, {path}, {}, ArgEnvExitStatus(0, 0), 0, + absl::StrCat(path, "\n")); +} + +TEST(ExecveatTest, EmptyPathBasic) { + std::string path = RunfilePath(kBasicWorkload); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_PATH)); + + CheckExecveat(fd.get(), "", {path}, {}, AT_EMPTY_PATH, ArgEnvExitStatus(0, 0), + absl::StrCat(path, "\n")); +} + +TEST(ExecveatTest, EmptyPathWithDirFD) { + std::string path = RunfilePath(kBasicWorkload); + std::string parent_dir = std::string(Dirname(path)); + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(dirfd.get(), "", {path}, {}, + AT_EMPTY_PATH, + /*child=*/nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, EACCES); +} + +TEST(ExecveatTest, EmptyPathWithoutEmptyPathFlag) { + std::string path = RunfilePath(kBasicWorkload); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_PATH)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat( + fd.get(), "", {path}, {}, /*flags=*/0, /*child=*/nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, ENOENT); +} + +TEST(ExecveatTest, AbsolutePathWithEmptyPathFlag) { + std::string path = RunfilePath(kBasicWorkload); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_PATH)); + + CheckExecveat(fd.get(), path, {path}, {}, AT_EMPTY_PATH, + ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n")); +} + +TEST(ExecveatTest, RelativePathWithEmptyPathFlag) { + std::string absolute_path = RunfilePath(kBasicWorkload); + std::string parent_dir = std::string(Dirname(absolute_path)); + std::string base = std::string(Basename(absolute_path)); + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY)); + + CheckExecveat(dirfd.get(), base, {absolute_path}, {}, AT_EMPTY_PATH, + ArgEnvExitStatus(0, 0), absl::StrCat(absolute_path, "\n")); +} + +TEST(ExecveatTest, SymlinkNoFollowWithRelativePath) { + std::string parent_dir = "/tmp"; + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(parent_dir, RunfilePath(kBasicWorkload))); + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY)); + std::string base = std::string(Basename(link.path())); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(dirfd.get(), base, {base}, {}, + AT_SYMLINK_NOFOLLOW, + /*child=*/nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, ELOOP); +} + +TEST(ExecveatTest, UnshareFiles) { + TempPath tempFile = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "bar", 0755)); + const FileDescriptor fd_closed_on_exec = + ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY | O_CLOEXEC)); + + ExecveArray argv = {"test"}; + ExecveArray envp; + std::string child_path = RunfilePath(kBasicWorkload); + pid_t child = + syscall(__NR_clone, SIGCHLD | CLONE_VFORK | CLONE_FILES, 0, 0, 0, 0); + if (child == 0) { + execve(child_path.c_str(), argv.get(), envp.get()); + _exit(1); + } + ASSERT_THAT(child, SyscallSucceeds()); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); + EXPECT_EQ(status, 0); + + struct stat st; + EXPECT_THAT(fstat(fd_closed_on_exec.get(), &st), SyscallSucceeds()); +} + +TEST(ExecveatTest, SymlinkNoFollowWithAbsolutePath) { + std::string parent_dir = "/tmp"; + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(parent_dir, RunfilePath(kBasicWorkload))); + std::string path = link.path(); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(AT_FDCWD, path, {path}, {}, + AT_SYMLINK_NOFOLLOW, + /*child=*/nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, ELOOP); +} + +TEST(ExecveatTest, SymlinkNoFollowAndEmptyPath) { + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); + std::string path = link.path(); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, 0)); + + CheckExecveat(fd.get(), "", {path}, {}, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, + ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n")); +} + +TEST(ExecveatTest, SymlinkNoFollowIgnoreSymlinkAncestor) { + TempPath parent_link = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateSymlinkTo("/tmp", "/bin")); + std::string path_with_symlink = JoinPath(parent_link.path(), "echo"); + + CheckExecveat(AT_FDCWD, path_with_symlink, {path_with_symlink}, {}, + AT_SYMLINK_NOFOLLOW, ArgEnvExitStatus(0, 0), ""); +} + +TEST(ExecveatTest, SymlinkNoFollowWithNormalFile) { + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/bin", O_DIRECTORY)); + + CheckExecveat(dirfd.get(), "echo", {"echo"}, {}, AT_SYMLINK_NOFOLLOW, + ArgEnvExitStatus(0, 0), ""); +} + +TEST(ExecveatTest, BasicWithCloexecFD) { + std::string path = RunfilePath(kBasicWorkload); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CLOEXEC)); + + CheckExecveat(fd.get(), "", {path}, {}, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH, + ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n")); +} + +TEST(ExecveatTest, InterpreterScriptWithCloexecFD) { + std::string path = RunfilePath(kExitScript); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CLOEXEC)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(fd.get(), "", {path}, {}, + AT_EMPTY_PATH, /*child=*/nullptr, + &execve_errno)); + EXPECT_EQ(execve_errno, ENOENT); +} + +TEST(ExecveatTest, InterpreterScriptWithCloexecDirFD) { + std::string absolute_path = RunfilePath(kExitScript); + std::string parent_dir = std::string(Dirname(absolute_path)); + std::string base = std::string(Basename(absolute_path)); + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_CLOEXEC | O_DIRECTORY)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(dirfd.get(), base, {base}, {}, + /*flags=*/0, /*child=*/nullptr, + &execve_errno)); + EXPECT_EQ(execve_errno, ENOENT); +} + +TEST(ExecveatTest, InvalidFlags) { + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat( + /*dirfd=*/-1, "", {}, {}, /*flags=*/0xFFFF, /*child=*/nullptr, + &execve_errno)); + EXPECT_EQ(execve_errno, EINVAL); +} + +// Priority consistent across calls to execve() +TEST(GetpriorityTest, ExecveMaintainsPriority) { + int prio = 16; + ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), prio), SyscallSucceeds()); + + // To avoid trying to use negative exit values, check for + // 20 - prio. Since prio should always be in the range [-20, 19], + // this leave expected_exit_code in the range [1, 40]. + int expected_exit_code = 20 - prio; + + // Program run (priority_execve) will exit(X) where + // X=getpriority(PRIO_PROCESS,0). Check that this exit value is prio. + CheckExec(RunfilePath(kPriorityWorkload), {RunfilePath(kPriorityWorkload)}, + {}, W_EXITCODE(expected_exit_code, 0), ""); +} + +void ExecWithThread() { + // Used to ensure that the thread has actually started. + absl::Mutex mu; + bool started = false; + + ScopedThread t([&] { + mu.Lock(); + started = true; + mu.Unlock(); + + while (true) { + pause(); + } + }); + + mu.LockWhen(absl::Condition(&started)); + mu.Unlock(); + + const ExecveArray argv = {"/proc/self/exe", kExit42}; + const ExecveArray envv; + + execve("/proc/self/exe", argv.get(), envv.get()); + exit(errno); +} + +void ExecFromThread() { + ScopedThread t([] { + const ExecveArray argv = {"/proc/self/exe", kExit42}; + const ExecveArray envv; + + execve("/proc/self/exe", argv.get(), envv.get()); + exit(errno); + }); + + while (true) { + pause(); + } +} + +bool ValidateProcCmdlineVsArgv(const int argc, const char* const* argv) { + auto contents_or = GetContents("/proc/self/cmdline"); + if (!contents_or.ok()) { + std::cerr << "Unable to get /proc/self/cmdline: " << contents_or.error() + << std::endl; + return false; + } + auto contents = contents_or.ValueOrDie(); + if (contents.back() != '\0') { + std::cerr << "Non-null terminated /proc/self/cmdline!" << std::endl; + return false; + } + contents.pop_back(); + std::vector<std::string> procfs_cmdline = absl::StrSplit(contents, '\0'); + + if (static_cast<int>(procfs_cmdline.size()) != argc) { + std::cerr << "argc = " << argc << " != " << procfs_cmdline.size() + << std::endl; + return false; + } + + for (int i = 0; i < argc; ++i) { + if (procfs_cmdline[i] != argv[i]) { + std::cerr << "Procfs command line argument " << i << " mismatch " + << procfs_cmdline[i] << " != " << argv[i] << std::endl; + return false; + } + } + return true; +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // Start by validating that the stack argv is consistent with procfs. + if (!gvisor::testing::ValidateProcCmdlineVsArgv(argc, argv)) { + return 1; + } + + // Some of these tests require no background threads, so check for them before + // TestInit. + for (int i = 0; i < argc; i++) { + absl::string_view arg(argv[i]); + + if (arg == gvisor::testing::kExit42) { + return 42; + } + if (arg == gvisor::testing::kExecWithThread) { + gvisor::testing::ExecWithThread(); + return 1; + } + if (arg == gvisor::testing::kExecFromThread) { + gvisor::testing::ExecFromThread(); + return 1; + } + } + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/exec.h b/test/syscalls/linux/exec.h new file mode 100644 index 000000000..5c0f7e654 --- /dev/null +++ b/test/syscalls/linux/exec.h @@ -0,0 +1,34 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_EXEC_H_ +#define GVISOR_TEST_SYSCALLS_EXEC_H_ + +#include <sys/wait.h> + +namespace gvisor { +namespace testing { + +// Returns the exit code used by exec_basic_workload. +inline int ArgEnvExitCode(int args, int envs) { return args + envs * 10; } + +// Returns the exit status used by exec_basic_workload. +inline int ArgEnvExitStatus(int args, int envs) { + return W_EXITCODE(ArgEnvExitCode(args, envs), 0); +} + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_EXEC_H_ diff --git a/test/syscalls/linux/exec_assert_closed_workload.cc b/test/syscalls/linux/exec_assert_closed_workload.cc new file mode 100644 index 000000000..95643618d --- /dev/null +++ b/test/syscalls/linux/exec_assert_closed_workload.cc @@ -0,0 +1,45 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <iostream> + +#include "absl/strings/numbers.h" + +int main(int argc, char** argv) { + if (argc != 2) { + std::cerr << "need two arguments, got " << argc; + exit(1); + } + int fd; + if (!absl::SimpleAtoi(argv[1], &fd)) { + std::cerr << "fd: " << argv[1] << " could not be parsed" << std::endl; + exit(1); + } + struct stat s; + if (fstat(fd, &s) == 0) { + std::cerr << "fd: " << argv[1] << " should not be valid" << std::endl; + exit(2); + } + if (errno != EBADF) { + std::cerr << "fstat fd: " << argv[1] << " got errno: " << errno + << " wanted: " << EBADF << std::endl; + exit(1); + } + return 0; +} diff --git a/test/syscalls/linux/exec_basic_workload.cc b/test/syscalls/linux/exec_basic_workload.cc new file mode 100644 index 000000000..1bbd6437e --- /dev/null +++ b/test/syscalls/linux/exec_basic_workload.cc @@ -0,0 +1,31 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdlib.h> + +#include <iostream> + +#include "test/syscalls/linux/exec.h" + +int main(int argc, char** argv, char** envp) { + int i; + for (i = 0; i < argc; i++) { + std::cerr << argv[i] << std::endl; + } + for (i = 0; envp[i] != nullptr; i++) { + std::cerr << envp[i] << std::endl; + } + exit(gvisor::testing::ArgEnvExitCode(argc - 1, i)); + return 0; +} diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc new file mode 100644 index 000000000..18d2f22c1 --- /dev/null +++ b/test/syscalls/linux/exec_binary.cc @@ -0,0 +1,1646 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <elf.h> +#include <errno.h> +#include <signal.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/user.h> +#include <unistd.h> + +#include <algorithm> +#include <functional> +#include <iterator> +#include <tuple> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/proc_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +#if !defined(__x86_64__) && !defined(__aarch64__) +// The assembly stub and ELF internal details must be ported to other arches. +#error "Test only supported on x86-64/arm64" +#endif // __x86_64__ || __aarch64__ + +#if defined(__x86_64__) +#define EM_TYPE EM_X86_64 +#define IP_REG(p) ((p).rip) +#define RAX_REG(p) ((p).rax) +#define RDI_REG(p) ((p).rdi) +#define RETURN_REG(p) ((p).rax) + +// amd64 stub that calls PTRACE_TRACEME and sends itself SIGSTOP. +const char kPtraceCode[] = { + // movq $101, %rax /* ptrace */ + '\x48', + '\xc7', + '\xc0', + '\x65', + '\x00', + '\x00', + '\x00', + // movq $0, %rsi /* PTRACE_TRACEME */ + '\x48', + '\xc7', + '\xc6', + '\x00', + '\x00', + '\x00', + '\x00', + // movq $0, %rdi + '\x48', + '\xc7', + '\xc7', + '\x00', + '\x00', + '\x00', + '\x00', + // movq $0, %rdx + '\x48', + '\xc7', + '\xc2', + '\x00', + '\x00', + '\x00', + '\x00', + // movq $0, %r10 + '\x49', + '\xc7', + '\xc2', + '\x00', + '\x00', + '\x00', + '\x00', + // syscall + '\x0f', + '\x05', + + // movq $39, %rax /* getpid */ + '\x48', + '\xc7', + '\xc0', + '\x27', + '\x00', + '\x00', + '\x00', + // syscall + '\x0f', + '\x05', + + // movq %rax, %rdi /* pid */ + '\x48', + '\x89', + '\xc7', + // movq $62, %rax /* kill */ + '\x48', + '\xc7', + '\xc0', + '\x3e', + '\x00', + '\x00', + '\x00', + // movq $19, %rsi /* SIGSTOP */ + '\x48', + '\xc7', + '\xc6', + '\x13', + '\x00', + '\x00', + '\x00', + // syscall + '\x0f', + '\x05', +}; + +// Size of a syscall instruction. +constexpr int kSyscallSize = 2; + +#elif defined(__aarch64__) +#define EM_TYPE EM_AARCH64 +#define IP_REG(p) ((p).pc) +#define RAX_REG(p) ((p).regs[8]) +#define RDI_REG(p) ((p).regs[0]) +#define RETURN_REG(p) ((p).regs[0]) + +const char kPtraceCode[] = { + // MOVD $117, R8 /* ptrace */ + '\xa8', + '\x0e', + '\x80', + '\xd2', + // MOVD $0, R0 /* PTRACE_TRACEME */ + '\x00', + '\x00', + '\x80', + '\xd2', + // MOVD $0, R1 /* pid */ + '\x01', + '\x00', + '\x80', + '\xd2', + // MOVD $0, R2 /* addr */ + '\x02', + '\x00', + '\x80', + '\xd2', + // MOVD $0, R3 /* data */ + '\x03', + '\x00', + '\x80', + '\xd2', + // SVC + '\x01', + '\x00', + '\x00', + '\xd4', + // MOVD $172, R8 /* getpid */ + '\x88', + '\x15', + '\x80', + '\xd2', + // SVC + '\x01', + '\x00', + '\x00', + '\xd4', + // MOVD $129, R8 /* kill, R0=pid */ + '\x28', + '\x10', + '\x80', + '\xd2', + // MOVD $19, R1 /* SIGSTOP */ + '\x61', + '\x02', + '\x80', + '\xd2', + // SVC + '\x01', + '\x00', + '\x00', + '\xd4', +}; +// Size of a syscall instruction. +constexpr int kSyscallSize = 4; +#else +#error "Unknown architecture" +#endif + +// This test suite tests executable loading in the kernel (ELF and interpreter +// scripts). + +// Parameterized ELF types for 64 and 32 bit. +template <int Size> +struct ElfTypes; + +template <> +struct ElfTypes<64> { + typedef Elf64_Ehdr ElfEhdr; + typedef Elf64_Phdr ElfPhdr; +}; + +template <> +struct ElfTypes<32> { + typedef Elf32_Ehdr ElfEhdr; + typedef Elf32_Phdr ElfPhdr; +}; + +template <int Size> +struct ElfBinary { + using ElfEhdr = typename ElfTypes<Size>::ElfEhdr; + using ElfPhdr = typename ElfTypes<Size>::ElfPhdr; + + ElfEhdr header = {}; + std::vector<ElfPhdr> phdrs; + std::vector<char> data; + + // UpdateOffsets updates p_offset, p_vaddr in all phdrs to account for the + // space taken by the header and phdrs. + // + // It also updates header.e_phnum and adds the offset to header.e_entry to + // account for the headers residing in the first PT_LOAD segment. + // + // Before calling UpdateOffsets each of those fields should be the appropriate + // offset into data. + void UpdateOffsets() { + size_t offset = sizeof(header) + phdrs.size() * sizeof(ElfPhdr); + header.e_entry += offset; + header.e_phnum = phdrs.size(); + for (auto& p : phdrs) { + p.p_offset += offset; + p.p_vaddr += offset; + } + } + + // AddInterpreter adds a PT_INTERP segment with the passed contents. + // + // A later call to UpdateOffsets is required to make the new phdr valid. + void AddInterpreter(std::vector<char> contents) { + const int start = data.size(); + data.insert(data.end(), contents.begin(), contents.end()); + const int size = data.size() - start; + + ElfPhdr phdr = {}; + phdr.p_type = PT_INTERP; + phdr.p_offset = start; + phdr.p_filesz = size; + phdr.p_memsz = size; + // "If [PT_INTERP] is present, it must precede any loadable segment entry." + phdrs.insert(phdrs.begin(), phdr); + } + + // Writes the header, phdrs, and data to fd. + PosixError Write(int fd) const { + int ret = WriteFd(fd, &header, sizeof(header)); + if (ret < 0) { + return PosixError(errno, "failed to write header"); + } else if (ret != sizeof(header)) { + return PosixError(EIO, absl::StrCat("short write of header: ", ret)); + } + + for (auto const& p : phdrs) { + ret = WriteFd(fd, &p, sizeof(p)); + if (ret < 0) { + return PosixError(errno, "failed to write phdr"); + } else if (ret != sizeof(p)) { + return PosixError(EIO, absl::StrCat("short write of phdr: ", ret)); + } + } + + ret = WriteFd(fd, data.data(), data.size()); + if (ret < 0) { + return PosixError(errno, "failed to write data"); + } else if (ret != static_cast<int>(data.size())) { + return PosixError(EIO, absl::StrCat("short write of data: ", ret)); + } + + return NoError(); + } +}; + +// Creates a new temporary executable ELF file in parent with elf as the +// contents. +template <int Size> +PosixErrorOr<TempPath> CreateElfWith(absl::string_view parent, + ElfBinary<Size> const& elf) { + ASSIGN_OR_RETURN_ERRNO( + auto file, TempPath::CreateFileWith(parent, absl::string_view(), 0755)); + ASSIGN_OR_RETURN_ERRNO(auto fd, Open(file.path(), O_RDWR)); + RETURN_IF_ERRNO(elf.Write(fd.get())); + return std::move(file); +} + +// Creates a new temporary executable ELF file with elf as the contents. +template <int Size> +PosixErrorOr<TempPath> CreateElfWith(ElfBinary<Size> const& elf) { + return CreateElfWith(GetAbsoluteTestTmpdir(), elf); +} + +// Wait for pid to stop, and assert that it stopped via SIGSTOP. +PosixError WaitStopped(pid_t pid) { + int status; + int ret = RetryEINTR(waitpid)(pid, &status, 0); + MaybeSave(); + if (ret < 0) { + return PosixError(errno, "wait failed"); + } else if (ret != pid) { + return PosixError(ESRCH, absl::StrCat("wait got ", ret, " want ", pid)); + } + + if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) { + return PosixError(EINVAL, + absl::StrCat("pid did not SIGSTOP; status = ", status)); + } + + return NoError(); +} + +// Returns a valid ELF that PTRACE_TRACEME and SIGSTOPs itself. +// +// UpdateOffsets must be called before writing this ELF. +ElfBinary<64> StandardElf() { + ElfBinary<64> elf; + elf.header.e_ident[EI_MAG0] = ELFMAG0; + elf.header.e_ident[EI_MAG1] = ELFMAG1; + elf.header.e_ident[EI_MAG2] = ELFMAG2; + elf.header.e_ident[EI_MAG3] = ELFMAG3; + elf.header.e_ident[EI_CLASS] = ELFCLASS64; + elf.header.e_ident[EI_DATA] = ELFDATA2LSB; + elf.header.e_ident[EI_VERSION] = EV_CURRENT; + elf.header.e_type = ET_EXEC; + elf.header.e_machine = EM_TYPE; + elf.header.e_version = EV_CURRENT; + elf.header.e_phoff = sizeof(elf.header); + elf.header.e_phentsize = sizeof(decltype(elf)::ElfPhdr); + + // TODO(gvisor.dev/issue/153): Always include a PT_GNU_STACK segment to + // disable executable stacks. With this omitted the stack (and all PROT_READ) + // mappings should be executable, but gVisor doesn't support that. + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_GNU_STACK; + phdr.p_flags = PF_R | PF_W; + elf.phdrs.push_back(phdr); + + phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_X; + phdr.p_offset = 0; + phdr.p_vaddr = 0x40000; + phdr.p_filesz = sizeof(kPtraceCode); + phdr.p_memsz = phdr.p_filesz; + elf.phdrs.push_back(phdr); + + elf.header.e_entry = phdr.p_vaddr; + + elf.data.assign(kPtraceCode, kPtraceCode + sizeof(kPtraceCode)); + + return elf; +} + +// Test that a trivial binary executes. +TEST(ElfTest, Execute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + // Ensure it made it to SIGSTOP. + ASSERT_NO_ERRNO(WaitStopped(child)); + + struct user_regs_struct regs; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + // RIP/PC is just beyond the final syscall instruction. + EXPECT_EQ(IP_REG(regs), elf.header.e_entry + sizeof(kPtraceCode)); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + }))); +} + +// StandardElf without data completes execve, but faults once running. +TEST(ElfTest, MissingText) { + ElfBinary<64> elf = StandardElf(); + elf.data.clear(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + // It runs off the end of the zeroes filling the end of the page. +#if defined(__x86_64__) + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) << status; +#elif defined(__aarch64__) + // 0 is an invalid instruction opcode on arm64. + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGILL) << status; +#endif +} + +// Typical ELF with a data + bss segment +TEST(ElfTest, DataSegment) { + ElfBinary<64> elf = StandardElf(); + + // Create a standard ELF, but extend to 1.5 pages. The second page will be the + // beginning of a multi-page data + bss segment. + elf.data.resize(kPageSize + kPageSize / 2); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_W; + phdr.p_offset = kPageSize; + phdr.p_vaddr = 0x41000; + phdr.p_filesz = kPageSize / 2; + // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a + // bit less than 2 pages so this mapping doesn't extend beyond 0x43000. + phdr.p_memsz = 2 * kPageSize - kPageSize / 2; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT( + child, ContainsMappings(std::vector<ProcMapsEntry>({ + // text page. + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + // data + bss page from file. + {0x41000, 0x42000, true, true, false, true, kPageSize, 0, 0, 0, + file.path().c_str()}, + // bss page from anon. + {0x42000, 0x43000, true, true, false, true, 0, 0, 0, 0, ""}, + }))); +} + +// Additonal pages beyond filesz honor (only) execute protections. +// +// N.B. Linux changed this in 4.11 (16e72e9b30986 "powerpc: do not make the +// entire heap executable"). Previously, extra pages were always RW. +TEST(ElfTest, ExtraMemPages) { + // gVisor has the newer behavior. + if (!IsRunningOnGvisor()) { + auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); + SKIP_IF(version.major < 4 || (version.major == 4 && version.minor < 11)); + } + + ElfBinary<64> elf = StandardElf(); + + // Create a standard ELF, but extend to 1.5 pages. The second page will be the + // beginning of a multi-page data + bss segment. + elf.data.resize(kPageSize + kPageSize / 2); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + // RWX segment. The extra anon page will also be RWX. + // + // N.B. Linux uses clear_user to clear the end of the file-mapped page, which + // respects the mapping protections. Thus if we map this RO with memsz > + // (unaligned) filesz, then execve will fail with EFAULT. See padzero(elf_bss) + // in fs/binfmt_elf.c:load_elf_binary. + // + // N.N.B.B. The above only applies to the last segment. For earlier segments, + // the clear_user error is ignored. + phdr.p_flags = PF_R | PF_W | PF_X; + phdr.p_offset = kPageSize; + phdr.p_vaddr = 0x41000; + phdr.p_filesz = kPageSize / 2; + // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a + // bit less than 2 pages so this mapping doesn't extend beyond 0x43000. + phdr.p_memsz = 2 * kPageSize - kPageSize / 2; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT(child, + ContainsMappings(std::vector<ProcMapsEntry>({ + // text page. + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + // data + bss page from file. + {0x41000, 0x42000, true, true, true, true, kPageSize, 0, 0, 0, + file.path().c_str()}, + // extra page from anon. + {0x42000, 0x43000, true, true, true, true, 0, 0, 0, 0, ""}, + }))); +} + +// An aligned segment with filesz == 0, memsz > 0 is anon-only. +TEST(ElfTest, AnonOnlySegment) { + ElfBinary<64> elf = StandardElf(); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + // RO segment. The extra anon page will be RW anyways. + phdr.p_flags = PF_R; + phdr.p_offset = 0; + phdr.p_vaddr = 0x41000; + phdr.p_filesz = 0; + phdr.p_memsz = kPageSize; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + // UpdateOffsets adjusts p_vaddr and p_offset by the header size, but we need + // a page-aligned p_vaddr to get a truly anon-only page. + elf.phdrs[2].p_vaddr = 0x41000; + // N.B. p_offset is now unaligned, but Linux doesn't care since this is + // anon-only. + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT(child, + ContainsMappings(std::vector<ProcMapsEntry>({ + // text page. + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + // anon page. + {0x41000, 0x42000, true, true, false, true, 0, 0, 0, 0, ""}, + }))); +} + +// p_offset must have the same alignment as p_vaddr. +TEST(ElfTest, UnalignedOffset) { + ElfBinary<64> elf = StandardElf(); + + // Unaligned offset. + elf.phdrs[1].p_offset += 1; + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + + // execve(2) return EINVAL, but behavior varies between Linux and gVisor. + // + // On Linux, the new mm is committed before attempting to map into it. By the + // time we hit EINVAL in the segment mmap, the old mm is gone. Linux returns + // to an empty mm, which immediately segfaults. + // + // OTOH, gVisor maps into the new mm before committing it. Thus when it hits + // failure, the caller is still intact to receive the error. + if (IsRunningOnGvisor()) { + ASSERT_EQ(execve_errno, EINVAL); + } else { + ASSERT_EQ(execve_errno, 0); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) << status; + } +} + +// Linux will allow PT_LOAD segments to overlap. +TEST(ElfTest, DirectlyOverlappingSegments) { + // NOTE(b/37289926): see PIEOutOfOrderSegments. + SKIP_IF(IsRunningOnGvisor()); + + ElfBinary<64> elf = StandardElf(); + + // Same as the StandardElf mapping. + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + // Add PF_W so we can differentiate this mapping from the first. + phdr.p_flags = PF_R | PF_W | PF_X; + phdr.p_offset = 0; + phdr.p_vaddr = 0x40000; + phdr.p_filesz = sizeof(kPtraceCode); + phdr.p_memsz = phdr.p_filesz; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + {0x40000, 0x41000, true, true, true, true, 0, 0, 0, 0, + file.path().c_str()}, + }))); +} + +// Linux allows out-of-order PT_LOAD segments. +TEST(ElfTest, OutOfOrderSegments) { + // NOTE(b/37289926): see PIEOutOfOrderSegments. + SKIP_IF(IsRunningOnGvisor()); + + ElfBinary<64> elf = StandardElf(); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_X; + phdr.p_offset = 0; + phdr.p_vaddr = 0x20000; + phdr.p_filesz = sizeof(kPtraceCode); + phdr.p_memsz = phdr.p_filesz; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + {0x20000, 0x21000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + }))); +} + +// header.e_phoff is bound the end of the file. +TEST(ElfTest, OutOfBoundsPhdrs) { + ElfBinary<64> elf = StandardElf(); + elf.header.e_phoff = 0x100000; + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + // On Linux 3.11, this caused EIO. On newer Linux, it causes ENOEXEC. + EXPECT_THAT(execve_errno, AnyOf(Eq(ENOEXEC), Eq(EIO))); +} + +// Claim there is a phdr beyond the end of the file, but don't include it. +TEST(ElfTest, MissingPhdr) { + ElfBinary<64> elf = StandardElf(); + + // Clear data so the file ends immediately after the phdrs. + // N.B. Per ElfTest.MissingData, StandardElf without data completes execve + // without error. + elf.data.clear(); + elf.UpdateOffsets(); + + // Claim that there is another phdr just beyond the end of the file. Of + // course, it isn't accessible. + elf.header.e_phnum++; + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + // On Linux 3.11, this caused EIO. On newer Linux, it causes ENOEXEC. + EXPECT_THAT(execve_errno, AnyOf(Eq(ENOEXEC), Eq(EIO))); +} + +// No headers at all, just the ELF magic. +TEST(ElfTest, MissingHeader) { + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0755)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + const char kElfMagic[] = {0x7f, 'E', 'L', 'F'}; + + ASSERT_THAT(WriteFd(fd.get(), &kElfMagic, sizeof(kElfMagic)), + SyscallSucceeds()); + fd.reset(); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ENOEXEC); +} + +// Load a PIE ELF with a data + bss segment. +TEST(ElfTest, PIE) { + ElfBinary<64> elf = StandardElf(); + + elf.header.e_type = ET_DYN; + + // Create a standard ELF, but extend to 1.5 pages. The second page will be the + // beginning of a multi-page data + bss segment. + elf.data.resize(kPageSize + kPageSize / 2); + + elf.header.e_entry = 0x0; + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_W; + phdr.p_offset = kPageSize; + // Put the data segment at a bit of an offset. + phdr.p_vaddr = 0x20000; + phdr.p_filesz = kPageSize / 2; + // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a + // bit less than 2 pages so this mapping doesn't extend beyond 0x43000. + phdr.p_memsz = 2 * kPageSize - kPageSize / 2; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + const uint64_t offset = elf.phdrs[1].p_offset; + elf.phdrs[1].p_offset = 0x0; + elf.phdrs[1].p_vaddr = 0x0; + elf.phdrs[1].p_filesz += offset; + elf.phdrs[1].p_memsz += offset; + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // RIP tells us which page the first segment was loaded into. + struct user_regs_struct regs; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + + const uint64_t load_addr = IP_REG(regs) & ~(kPageSize - 1); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + // text page. + {load_addr, load_addr + 0x1000, true, false, true, + true, 0, 0, 0, 0, file.path().c_str()}, + // data + bss page from file. + {load_addr + 0x20000, load_addr + 0x21000, true, true, + false, true, kPageSize, 0, 0, 0, file.path().c_str()}, + // bss page from anon. + {load_addr + 0x21000, load_addr + 0x22000, true, true, + false, true, 0, 0, 0, 0, ""}, + }))); +} + +// PIE binary with a non-zero start address. +// +// This is non-standard for a PIE binary, but valid. The binary is still loaded +// at an arbitrary address, not the first PT_LOAD vaddr. +// +// N.B. Linux changed this behavior in d1fd836dcf00d2028c700c7e44d2c23404062c90. +// Previously, with "randomization" enabled, PIE binaries with a non-zero start +// address would be be loaded at the address they specified because mmap was +// passed the load address, which wasn't 0 as expected. +// +// This change is present in kernel v4.1+. +TEST(ElfTest, PIENonZeroStart) { + // gVisor has the newer behavior. + if (!IsRunningOnGvisor()) { + auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); + SKIP_IF(version.major < 4 || (version.major == 4 && version.minor < 1)); + } + + ElfBinary<64> elf = StandardElf(); + + elf.header.e_type = ET_DYN; + + // Create a standard ELF, but extend to 1.5 pages. The second page will be the + // beginning of a multi-page data + bss segment. + elf.data.resize(kPageSize + kPageSize / 2); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_W; + phdr.p_offset = kPageSize; + // Put the data segment at a bit of an offset. + phdr.p_vaddr = 0x60000; + phdr.p_filesz = kPageSize / 2; + // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a + // bit less than 2 pages so this mapping doesn't extend beyond 0x43000. + phdr.p_memsz = 2 * kPageSize - kPageSize / 2; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // RIP tells us which page the first segment was loaded into. + struct user_regs_struct regs; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + + const uint64_t load_addr = IP_REG(regs) & ~(kPageSize - 1); + + // The ELF is loaded at an arbitrary address, not the first PT_LOAD vaddr. + // + // N.B. this is technically flaky, but Linux is *extremely* unlikely to pick + // this as the start address, as it searches from the top down. + EXPECT_NE(load_addr, 0x40000); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + // text page. + {load_addr, load_addr + 0x1000, true, false, true, + true, 0, 0, 0, 0, file.path().c_str()}, + // data + bss page from file. + {load_addr + 0x20000, load_addr + 0x21000, true, true, + false, true, kPageSize, 0, 0, 0, file.path().c_str()}, + // bss page from anon. + {load_addr + 0x21000, load_addr + 0x22000, true, true, + false, true, 0, 0, 0, 0, ""}, + }))); +} + +TEST(ElfTest, PIEOutOfOrderSegments) { + // TODO(b/37289926): This triggers a bug in Linux where it computes the size + // of the binary as 0x20000 - 0x40000 = 0xfffffffffffe0000, which obviously + // fails to map. + // + // We test gVisor's behavior (of rejecting the binary) because I assert that + // Linux is wrong and needs to be fixed. + SKIP_IF(!IsRunningOnGvisor()); + + ElfBinary<64> elf = StandardElf(); + + elf.header.e_type = ET_DYN; + + // Create a standard ELF, but extend to 1.5 pages. The second page will be the + // beginning of a multi-page data + bss segment. + elf.data.resize(kPageSize + kPageSize / 2); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_W; + phdr.p_offset = kPageSize; + // Put the data segment *before* the first segment. + phdr.p_vaddr = 0x20000; + phdr.p_filesz = kPageSize / 2; + // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a + // bit less than 2 pages so this mapping doesn't extend beyond 0x43000. + phdr.p_memsz = 2 * kPageSize - kPageSize / 2; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ENOEXEC); +} + +// Standard dynamically linked binary with an ELF interpreter. +TEST(ElfTest, ELFInterpreter) { + ElfBinary<64> interpreter = StandardElf(); + interpreter.header.e_type = ET_DYN; + interpreter.header.e_entry = 0x0; + interpreter.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + uint64_t const offset = interpreter.phdrs[1].p_offset; + // N.B. Since Linux 4.10 (0036d1f7eb95b "binfmt_elf: fix calculations for bss + // padding"), Linux unconditionally zeroes the remainder of the highest mapped + // page in an interpreter, failing if the protections don't allow write. Thus + // we must mark this writeable. + interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X; + interpreter.phdrs[1].p_offset = 0x0; + interpreter.phdrs[1].p_vaddr = 0x0; + interpreter.phdrs[1].p_filesz += offset; + interpreter.phdrs[1].p_memsz += offset; + + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + + ElfBinary<64> binary = StandardElf(); + + // Append the interpreter path. + int const interp_data_start = binary.data.size(); + for (char const c : interpreter_file.path()) { + binary.data.push_back(c); + } + // NUL-terminate. + binary.data.push_back(0); + int const interp_data_size = binary.data.size() - interp_data_start; + + decltype(binary)::ElfPhdr phdr = {}; + phdr.p_type = PT_INTERP; + phdr.p_offset = interp_data_start; + phdr.p_filesz = interp_data_size; + phdr.p_memsz = interp_data_size; + // "If [PT_INTERP] is present, it must precede any loadable segment entry." + // + // However, Linux allows it anywhere, so we just stick it at the end to make + // sure out-of-order PT_INTERP is OK. + binary.phdrs.push_back(phdr); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // RIP tells us which page the first segment of the interpreter was loaded + // into. + struct user_regs_struct regs; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + + const uint64_t interp_load_addr = IP_REG(regs) & ~(kPageSize - 1); + + EXPECT_THAT( + child, ContainsMappings(std::vector<ProcMapsEntry>({ + // Main binary + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + binary_file.path().c_str()}, + // Interpreter + {interp_load_addr, interp_load_addr + 0x1000, true, true, true, + true, 0, 0, 0, 0, interpreter_file.path().c_str()}, + }))); +} + +// Test parameter to ElfInterpterStaticTest cases. The first item is a suffix to +// add to the end of the interpreter path in the PT_INTERP segment and the +// second is the expected execve(2) errno. +using ElfInterpreterStaticParam = std::tuple<std::vector<char>, int>; + +class ElfInterpreterStaticTest + : public ::testing::TestWithParam<ElfInterpreterStaticParam> {}; + +// Statically linked ELF with a statically linked ELF interpreter. +TEST_P(ElfInterpreterStaticTest, Test) { + const std::vector<char> segment_suffix = std::get<0>(GetParam()); + const int expected_errno = std::get<1>(GetParam()); + + ElfBinary<64> interpreter = StandardElf(); + // See comment in ElfTest.ELFInterpreter. + interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X; + interpreter.UpdateOffsets(); + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + + ElfBinary<64> binary = StandardElf(); + // The PT_LOAD segment conflicts with the interpreter's PT_LOAD segment. The + // interpreter's will be mapped directly over the binary's. + + // Interpreter path plus the parameterized suffix in the PT_INTERP segment. + const std::string path = interpreter_file.path(); + std::vector<char> segment(path.begin(), path.end()); + segment.insert(segment.end(), segment_suffix.begin(), segment_suffix.end()); + binary.AddInterpreter(segment); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, expected_errno); + + if (expected_errno == 0) { + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + // Interpreter. + {0x40000, 0x41000, true, true, true, true, 0, 0, 0, + 0, interpreter_file.path().c_str()}, + }))); + } +} + +INSTANTIATE_TEST_SUITE_P( + Cases, ElfInterpreterStaticTest, + ::testing::ValuesIn({ + // Simple NUL-terminator to run the interpreter as normal. + std::make_tuple(std::vector<char>({'\0'}), 0), + // Add some garbage to the segment followed by a NUL-terminator. This is + // ignored. + std::make_tuple(std::vector<char>({'\0', 'b', '\0'}), 0), + // Add some garbage to the segment without a NUL-terminator. Linux will + // reject + // this. + std::make_tuple(std::vector<char>({'\0', 'b'}), ENOEXEC), + })); + +// Test parameter to ElfInterpterBadPathTest cases. The first item is the +// contents of the PT_INTERP segment and the second is the expected execve(2) +// errno. +using ElfInterpreterBadPathParam = std::tuple<std::vector<char>, int>; + +class ElfInterpreterBadPathTest + : public ::testing::TestWithParam<ElfInterpreterBadPathParam> {}; + +TEST_P(ElfInterpreterBadPathTest, Test) { + const std::vector<char> segment = std::get<0>(GetParam()); + const int expected_errno = std::get<1>(GetParam()); + + ElfBinary<64> binary = StandardElf(); + binary.AddInterpreter(segment); + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, expected_errno); +} + +INSTANTIATE_TEST_SUITE_P( + Cases, ElfInterpreterBadPathTest, + ::testing::ValuesIn({ + // NUL-terminated fake path in the PT_INTERP segment. + std::make_tuple(std::vector<char>({'/', 'f', '/', 'b', '\0'}), ENOENT), + // ELF interpreter not NUL-terminated. + std::make_tuple(std::vector<char>({'/', 'f', '/', 'b'}), ENOEXEC), + // ELF interpreter path omitted entirely. + // + // fs/binfmt_elf.c:load_elf_binary returns ENOEXEC if p_filesz is < 2 + // bytes. + std::make_tuple(std::vector<char>({'\0'}), ENOEXEC), + // ELF interpreter path = "\0". + // + // fs/binfmt_elf.c:load_elf_binary returns ENOEXEC if p_filesz is < 2 + // bytes, so add an extra byte to pass that check. + // + // load_elf_binary -> open_exec -> do_open_execat fails to check that + // name != '\0' before calling do_filp_open, which thus opens the + // working directory. do_open_execat returns EACCES because the + // directory is not a regular file. + std::make_tuple(std::vector<char>({'\0', '\0'}), EACCES), + })); + +// Relative path to ELF interpreter. +TEST(ElfTest, ELFInterpreterRelative) { + ElfBinary<64> interpreter = StandardElf(); + interpreter.header.e_type = ET_DYN; + interpreter.header.e_entry = 0x0; + interpreter.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + uint64_t const offset = interpreter.phdrs[1].p_offset; + // See comment in ElfTest.ELFInterpreter. + interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X; + interpreter.phdrs[1].p_offset = 0x0; + interpreter.phdrs[1].p_vaddr = 0x0; + interpreter.phdrs[1].p_filesz += offset; + interpreter.phdrs[1].p_memsz += offset; + + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); + auto interpreter_relative = + ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, interpreter_file.path())); + + ElfBinary<64> binary = StandardElf(); + + // NUL-terminated path in the PT_INTERP segment. + std::vector<char> segment(interpreter_relative.begin(), + interpreter_relative.end()); + segment.push_back(0); + binary.AddInterpreter(segment); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // RIP tells us which page the first segment of the interpreter was loaded + // into. + struct user_regs_struct regs; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + + const uint64_t interp_load_addr = IP_REG(regs) & ~(kPageSize - 1); + + EXPECT_THAT( + child, ContainsMappings(std::vector<ProcMapsEntry>({ + // Main binary + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + binary_file.path().c_str()}, + // Interpreter + {interp_load_addr, interp_load_addr + 0x1000, true, true, true, + true, 0, 0, 0, 0, interpreter_file.path().c_str()}, + }))); +} + +// ELF interpreter architecture doesn't match the binary. +TEST(ElfTest, ELFInterpreterWrongArch) { + ElfBinary<64> interpreter = StandardElf(); + interpreter.header.e_machine = EM_PPC64; + interpreter.header.e_type = ET_DYN; + interpreter.header.e_entry = 0x0; + interpreter.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + uint64_t const offset = interpreter.phdrs[1].p_offset; + // See comment in ElfTest.ELFInterpreter. + interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X; + interpreter.phdrs[1].p_offset = 0x0; + interpreter.phdrs[1].p_vaddr = 0x0; + interpreter.phdrs[1].p_filesz += offset; + interpreter.phdrs[1].p_memsz += offset; + + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + + ElfBinary<64> binary = StandardElf(); + + // NUL-terminated path in the PT_INTERP segment. + const std::string path = interpreter_file.path(); + std::vector<char> segment(path.begin(), path.end()); + segment.push_back(0); + binary.AddInterpreter(segment); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, ELIBBAD); +} + +// No execute permissions on the binary. +TEST(ElfTest, NoExecute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + ASSERT_THAT(chmod(file.path().c_str(), 0644), SyscallSucceeds()); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, EACCES); +} + +// Execute, but no read permissions on the binary works just fine. +TEST(ElfTest, NoRead) { + // TODO(gvisor.dev/issue/160): gVisor's backing filesystem may prevent the + // sentry from reading the executable. + SKIP_IF(IsRunningOnGvisor()); + + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + ASSERT_THAT(chmod(file.path().c_str(), 0111), SyscallSucceeds()); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // TODO(gvisor.dev/issue/160): A task with a non-readable executable is marked + // non-dumpable, preventing access to proc files. gVisor does not implement + // this behavior. +} + +// No execute permissions on the ELF interpreter. +TEST(ElfTest, ElfInterpreterNoExecute) { + ElfBinary<64> interpreter = StandardElf(); + interpreter.header.e_type = ET_DYN; + interpreter.header.e_entry = 0x0; + interpreter.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + uint64_t const offset = interpreter.phdrs[1].p_offset; + // See comment in ElfTest.ELFInterpreter. + interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X; + interpreter.phdrs[1].p_offset = 0x0; + interpreter.phdrs[1].p_vaddr = 0x0; + interpreter.phdrs[1].p_filesz += offset; + interpreter.phdrs[1].p_memsz += offset; + + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + + ElfBinary<64> binary = StandardElf(); + + // NUL-terminated path in the PT_INTERP segment. + const std::string path = interpreter_file.path(); + std::vector<char> segment(path.begin(), path.end()); + segment.push_back(0); + binary.AddInterpreter(segment); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + ASSERT_THAT(chmod(interpreter_file.path().c_str(), 0644), SyscallSucceeds()); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(interpreter_file.path(), {interpreter_file.path()}, {}, + &child, &execve_errno)); + EXPECT_EQ(execve_errno, EACCES); +} + +// Execute a basic interpreter script. +TEST(InterpreterScriptTest, Execute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Whitespace after #!. +TEST(InterpreterScriptTest, Whitespace) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#! \t \t", binary.path()), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Interpreter script is missing execute permission. +TEST(InterpreterScriptTest, InterpreterScriptNoExecute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0644)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, EACCES); +} + +// Binary interpreter script refers to is missing execute permission. +TEST(InterpreterScriptTest, BinaryNoExecute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + ASSERT_THAT(chmod(binary.path().c_str(), 0644), SyscallSucceeds()); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, EACCES); +} + +// Linux will load interpreter scripts five levels deep, but no more. +TEST(InterpreterScriptTest, MaxRecursion) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", binary.path()), 0755)); + TempPath script2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script1.path()), 0755)); + TempPath script3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script2.path()), 0755)); + TempPath script4 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script3.path()), 0755)); + TempPath script5 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script4.path()), 0755)); + TempPath script6 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script5.path()), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script6.path(), {script6.path()}, {}, &child, &execve_errno)); + // Too many levels of recursion. + EXPECT_EQ(execve_errno, ELOOP); + + // The next level up is OK. + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script5.path(), {script5.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Interpreter script with a relative path. +TEST(InterpreterScriptTest, RelativePath) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); + auto binary_relative = + ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, binary.path())); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary_relative), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Interpreter script with .. in a path component. +TEST(InterpreterScriptTest, UncleanPath) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!/tmp/../", binary.path()), + 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Passed interpreter script is a symlink. +TEST(InterpreterScriptTest, Symlink) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755)); + + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), script.path())); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(link.path(), {link.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Interpreter script points to a symlink loop. +TEST(InterpreterScriptTest, SymlinkLoop) { + std::string const link1 = NewTempAbsPathInDir("/tmp"); + std::string const link2 = NewTempAbsPathInDir("/tmp"); + + ASSERT_THAT(symlink(link2.c_str(), link1.c_str()), SyscallSucceeds()); + auto remove_link1 = Cleanup( + [&link1] { EXPECT_THAT(unlink(link1.c_str()), SyscallSucceeds()); }); + + ASSERT_THAT(symlink(link1.c_str(), link2.c_str()), SyscallSucceeds()); + auto remove_link2 = Cleanup( + [&link2] { EXPECT_THAT(unlink(link2.c_str()), SyscallSucceeds()); }); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link1), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ELOOP); +} + +// Binary is a symlink loop. +TEST(ExecveTest, SymlinkLoop) { + std::string const link1 = NewTempAbsPathInDir("/tmp"); + std::string const link2 = NewTempAbsPathInDir("/tmp"); + + ASSERT_THAT(symlink(link2.c_str(), link1.c_str()), SyscallSucceeds()); + auto remove_link = Cleanup( + [&link1] { EXPECT_THAT(unlink(link1.c_str()), SyscallSucceeds()); }); + + ASSERT_THAT(symlink(link1.c_str(), link2.c_str()), SyscallSucceeds()); + auto remove_link2 = Cleanup( + [&link2] { EXPECT_THAT(unlink(link2.c_str()), SyscallSucceeds()); }); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(link1, {link1}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ELOOP); +} + +// Binary is a directory. +TEST(ExecveTest, Directory) { + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/tmp", {"/tmp"}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, EACCES); +} + +// Pass a valid binary as a directory (extra / on the end). +TEST(ExecveTest, BinaryAsDirectory) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + std::string const path = absl::StrCat(file.path(), "/"); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(path, {path}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ENOTDIR); +} + +// The initial brk value is after the page at the end of the binary. +TEST(ExecveTest, BrkAfterBinary) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + // Ensure it made it to SIGSTOP. + ASSERT_NO_ERRNO(WaitStopped(child)); + + struct user_regs_struct regs; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + + // RIP is just beyond the final syscall instruction. Rewind to execute a brk + // syscall. + IP_REG(regs) -= kSyscallSize; + RAX_REG(regs) = __NR_brk; + RDI_REG(regs) = 0; + ASSERT_THAT(ptrace(PTRACE_SETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + + // Resume the child, waiting for syscall entry. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child, 0, 0), SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + ASSERT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << "status = " << status; + + // Execute the syscall. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child, 0, 0), SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + ASSERT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << "status = " << status; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + + // brk is after the text page. + // + // The kernel does brk randomization, so we can't be sure what the exact + // address will be, but it is always beyond the final page in the binary. + // i.e., it does not start immediately after memsz in the middle of a page. + // Userspace may expect to use that space. + EXPECT_GE(RETURN_REG(regs), 0x41000); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/exec_proc_exe_workload.cc b/test/syscalls/linux/exec_proc_exe_workload.cc new file mode 100644 index 000000000..2989379b7 --- /dev/null +++ b/test/syscalls/linux/exec_proc_exe_workload.cc @@ -0,0 +1,42 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdlib.h> +#include <unistd.h> + +#include <iostream> + +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" + +int main(int argc, char** argv, char** envp) { + // This is annoying. Because remote build systems may put these binaries + // in a content-addressable-store, you may wind up with /proc/self/exe + // pointing to some random path (but with a sensible argv[0]). + // + // Therefore, this test simply checks that the /proc/self/exe + // is absolute and *doesn't* match argv[1]. + std::string exe = + gvisor::testing::ProcessExePath(getpid()).ValueOrDie(); + if (exe[0] != '/') { + std::cerr << "relative path: " << exe << std::endl; + exit(1); + } + if (exe.find(argv[1]) != std::string::npos) { + std::cerr << "matching path: " << exe << std::endl; + exit(1); + } + + return 0; +} diff --git a/test/syscalls/linux/exec_state_workload.cc b/test/syscalls/linux/exec_state_workload.cc new file mode 100644 index 000000000..028902b14 --- /dev/null +++ b/test/syscalls/linux/exec_state_workload.cc @@ -0,0 +1,202 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <sys/time.h> + +#include <iostream> +#include <ostream> +#include <string> + +#include "absl/strings/numbers.h" + +// Pretty-print a sigset_t. +std::ostream& operator<<(std::ostream& out, const sigset_t& s) { + out << "{ "; + + for (int i = 0; i < NSIG; i++) { + if (sigismember(&s, i)) { + out << i << " "; + } + } + + out << "}"; + return out; +} + +// Verify that the signo handler is handler. +int CheckSigHandler(uint32_t signo, uintptr_t handler) { + struct sigaction sa; + int ret = sigaction(signo, nullptr, &sa); + if (ret < 0) { + perror("sigaction"); + return 1; + } + + if (reinterpret_cast<void (*)(int)>(handler) != sa.sa_handler) { + std::cerr << "signo " << signo << " handler got: " << sa.sa_handler + << " expected: " << std::hex << handler; + return 1; + } + return 0; +} + +// Verify that the signo is blocked. +int CheckSigBlocked(uint32_t signo) { + sigset_t s; + int ret = sigprocmask(SIG_SETMASK, nullptr, &s); + if (ret < 0) { + perror("sigprocmask"); + return 1; + } + + if (!sigismember(&s, signo)) { + std::cerr << "signal " << signo << " not blocked in signal mask: " << s + << std::endl; + return 1; + } + return 0; +} + +// Verify that the itimer is enabled. +int CheckItimerEnabled(uint32_t timer) { + struct itimerval itv; + int ret = getitimer(timer, &itv); + if (ret < 0) { + perror("getitimer"); + return 1; + } + + if (!itv.it_value.tv_sec && !itv.it_value.tv_usec && + !itv.it_interval.tv_sec && !itv.it_interval.tv_usec) { + std::cerr << "timer " << timer + << " not enabled. value sec: " << itv.it_value.tv_sec + << " usec: " << itv.it_value.tv_usec + << " interval sec: " << itv.it_interval.tv_sec + << " usec: " << itv.it_interval.tv_usec << std::endl; + return 1; + } + return 0; +} + +int PrintExecFn() { + unsigned long execfn = getauxval(AT_EXECFN); + if (!execfn) { + std::cerr << "AT_EXECFN missing" << std::endl; + return 1; + } + + std::cerr << reinterpret_cast<const char*>(execfn) << std::endl; + return 0; +} + +int PrintExecName() { + const size_t name_length = 20; + char name[name_length] = {0}; + if (prctl(PR_GET_NAME, name) < 0) { + std::cerr << "prctl(PR_GET_NAME) failed" << std::endl; + return 1; + } + + std::cerr << name << std::endl; + return 0; +} + +void usage(const std::string& prog) { + std::cerr << "usage:\n" + << "\t" << prog << " CheckSigHandler <signo> <handler addr (hex)>\n" + << "\t" << prog << " CheckSigBlocked <signo>\n" + << "\t" << prog << " CheckTimerDisabled <timer>\n" + << "\t" << prog << " PrintExecFn\n" + << "\t" << prog << " PrintExecName" << std::endl; +} + +int main(int argc, char** argv) { + if (argc < 2) { + usage(argv[0]); + return 1; + } + + std::string func(argv[1]); + + if (func == "CheckSigHandler") { + if (argc != 4) { + usage(argv[0]); + return 1; + } + + uint32_t signo; + if (!absl::SimpleAtoi(argv[2], &signo)) { + std::cerr << "invalid signo: " << argv[2] << std::endl; + return 1; + } + + uintptr_t handler; + if (!absl::numbers_internal::safe_strtoi_base(argv[3], &handler, 16)) { + std::cerr << "invalid handler: " << std::hex << argv[3] << std::endl; + return 1; + } + + return CheckSigHandler(signo, handler); + } + + if (func == "CheckSigBlocked") { + if (argc != 3) { + usage(argv[0]); + return 1; + } + + uint32_t signo; + if (!absl::SimpleAtoi(argv[2], &signo)) { + std::cerr << "invalid signo: " << argv[2] << std::endl; + return 1; + } + + return CheckSigBlocked(signo); + } + + if (func == "CheckItimerEnabled") { + if (argc != 3) { + usage(argv[0]); + return 1; + } + + uint32_t timer; + if (!absl::SimpleAtoi(argv[2], &timer)) { + std::cerr << "invalid signo: " << argv[2] << std::endl; + return 1; + } + + return CheckItimerEnabled(timer); + } + + if (func == "PrintExecFn") { + // N.B. This will be called as an interpreter script, with the script passed + // as the third argument. We don't care about that script. + return PrintExecFn(); + } + + if (func == "PrintExecName") { + // N.B. This may be called as an interpreter script like PrintExecFn. + return PrintExecName(); + } + + std::cerr << "Invalid function: " << func << std::endl; + return 1; +} diff --git a/test/syscalls/linux/exit.cc b/test/syscalls/linux/exit.cc new file mode 100644 index 000000000..d52ea786b --- /dev/null +++ b/test/syscalls/linux/exit.cc @@ -0,0 +1,78 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/wait.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" +#include "test/util/time_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void TestExit(int code) { + pid_t pid = fork(); + if (pid == 0) { + _exit(code); + } + + ASSERT_THAT(pid, SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == code) << status; +} + +TEST(ExitTest, Success) { TestExit(0); } + +TEST(ExitTest, Failure) { TestExit(1); } + +// This test ensures that a process's file descriptors are closed when it calls +// exit(). In order to test this, the parent tries to read from a pipe whose +// write end is held by the child. While the read is blocking, the child exits, +// which should cause the parent to read 0 bytes due to EOF. +TEST(ExitTest, CloseFds) { + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + FileDescriptor read_fd(pipe_fds[0]); + FileDescriptor write_fd(pipe_fds[1]); + + pid_t pid = fork(); + if (pid == 0) { + read_fd.reset(); + + SleepSafe(absl::Seconds(10)); + + _exit(0); + } + + EXPECT_THAT(pid, SyscallSucceeds()); + + write_fd.reset(); + + char buf[10]; + EXPECT_THAT(ReadFd(read_fd.get(), buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/exit_script.sh b/test/syscalls/linux/exit_script.sh new file mode 100755 index 000000000..527518e06 --- /dev/null +++ b/test/syscalls/linux/exit_script.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Copyright 2018 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ $# -ne 1 ]; then + echo "Usage: $0 exit_code" + exit 255 +fi + +exit $1 diff --git a/test/syscalls/linux/fadvise64.cc b/test/syscalls/linux/fadvise64.cc new file mode 100644 index 000000000..2af7aa6d9 --- /dev/null +++ b/test/syscalls/linux/fadvise64.cc @@ -0,0 +1,72 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <syscall.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +TEST(FAdvise64Test, Basic) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + // fadvise64 is noop in gVisor, so just test that it succeeds. + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_NORMAL), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_RANDOM), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_SEQUENTIAL), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_WILLNEED), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_DONTNEED), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_NOREUSE), + SyscallSucceeds()); +} + +TEST(FAdvise64Test, InvalidArgs) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + // Note: offset is allowed to be negative. + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, static_cast<off_t>(-1), + POSIX_FADV_NORMAL), + SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, 12345), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(FAdvise64Test, NoPipes) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor read(fds[0]); + const FileDescriptor write(fds[1]); + + ASSERT_THAT(syscall(__NR_fadvise64, read.get(), 0, 10, POSIX_FADV_NORMAL), + SyscallFailsWithErrno(ESPIPE)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc new file mode 100644 index 000000000..cabc2b751 --- /dev/null +++ b/test/syscalls/linux/fallocate.cc @@ -0,0 +1,186 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/eventfd.h> +#include <sys/resource.h> +#include <sys/signalfd.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/timerfd.h> +#include <syscall.h> +#include <time.h> +#include <unistd.h> + +#include <ctime> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/file_base.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/cleanup.h" +#include "test/util/eventfd_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +int fallocate(int fd, int mode, off_t offset, off_t len) { + return RetryEINTR(syscall)(__NR_fallocate, fd, mode, offset, len); +} + +class AllocateTest : public FileTest { + void SetUp() override { FileTest::SetUp(); } +}; + +TEST_F(AllocateTest, Fallocate) { + // Check that it starts at size zero. + struct stat buf; + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Grow to ten bytes. + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 0, 10), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 10); + + // Allocate to a smaller size should be noop. + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 0, 5), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 10); + + // Grow again. + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 0, 20), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 20); + + // Grow with offset. + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 10, 20), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 30); + + // Grow with offset beyond EOF. + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 39, 1), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 40); + + // Given length 0 should fail with EINVAL. + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 50, 0), + SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 40); +} + +TEST_F(AllocateTest, FallocateInvalid) { + // Invalid FD + EXPECT_THAT(fallocate(-1, 0, 0, 10), SyscallFailsWithErrno(EBADF)); + + // Negative offset and size. + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, -1, 10), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, -1), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, -1, -1), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(AllocateTest, FallocateReadonly) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + EXPECT_THAT(fallocate(fd.get(), 0, 0, 10), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(AllocateTest, FallocatePipe) { + int pipes[2]; + EXPECT_THAT(pipe(pipes), SyscallSucceeds()); + auto cleanup = Cleanup([&pipes] { + EXPECT_THAT(close(pipes[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipes[1]), SyscallSucceeds()); + }); + + EXPECT_THAT(fallocate(pipes[1], 0, 0, 10), SyscallFailsWithErrno(ESPIPE)); +} + +TEST_F(AllocateTest, FallocateChar) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDWR)); + EXPECT_THAT(fallocate(fd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); +} + +TEST_F(AllocateTest, FallocateRlimit) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + // Try growing past the file size limit. + sigset_t new_mask; + sigemptyset(&new_mask); + sigaddset(&new_mask, SIGXFSZ); + sigprocmask(SIG_BLOCK, &new_mask, nullptr); + + struct rlimit setlim = {}; + setlim.rlim_cur = 1024; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + + EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 1025), + SyscallFailsWithErrno(EFBIG)); + + struct timespec timelimit = {}; + timelimit.tv_sec = 10; + EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds()); +} + +TEST_F(AllocateTest, FallocateOtherFDs) { + int fd; + ASSERT_THAT(fd = timerfd_create(CLOCK_MONOTONIC, 0), SyscallSucceeds()); + auto timer_fd = FileDescriptor(fd); + EXPECT_THAT(fallocate(timer_fd.get(), 0, 0, 10), + SyscallFailsWithErrno(ENODEV)); + + sigset_t mask; + sigemptyset(&mask); + ASSERT_THAT(fd = signalfd(-1, &mask, 0), SyscallSucceeds()); + auto sfd = FileDescriptor(fd); + EXPECT_THAT(fallocate(sfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); + + auto efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + EXPECT_THAT(fallocate(efd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); + + auto sockfd = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + EXPECT_THAT(fallocate(sockfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); + + int socks[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, socks), + SyscallSucceeds()); + auto sock0 = FileDescriptor(socks[0]); + auto sock1 = FileDescriptor(socks[1]); + EXPECT_THAT(fallocate(sock0.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fault.cc b/test/syscalls/linux/fault.cc new file mode 100644 index 000000000..a85750382 --- /dev/null +++ b/test/syscalls/linux/fault.cc @@ -0,0 +1,74 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define _GNU_SOURCE 1 +#include <signal.h> +#include <ucontext.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +__attribute__((noinline)) void Fault(void) { + volatile int* foo = nullptr; + *foo = 0; +} + +int GetPcFromUcontext(ucontext_t* uc, uintptr_t* pc) { +#if defined(__x86_64__) + *pc = uc->uc_mcontext.gregs[REG_RIP]; + return 1; +#elif defined(__i386__) + *pc = uc->uc_mcontext.gregs[REG_EIP]; + return 1; +#elif defined(__aarch64__) + *pc = uc->uc_mcontext.pc; + return 1; +#else + return 0; +#endif +} + +void sigact_handler(int sig, siginfo_t* siginfo, void* context) { + uintptr_t pc; + if (GetPcFromUcontext(reinterpret_cast<ucontext_t*>(context), &pc)) { + /* Expect Fault() to be at most 64 bytes in size. */ + uintptr_t fault_addr = reinterpret_cast<uintptr_t>(&Fault); + EXPECT_GE(pc, fault_addr); + EXPECT_LT(pc, fault_addr + 64); + exit(0); + } +} + +TEST(FaultTest, InRange) { + // Reset the signal handler to do nothing so that it doesn't freak out + // the test runner when we fire an alarm. + struct sigaction sa = {}; + sa.sa_sigaction = sigact_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + ASSERT_THAT(sigaction(SIGSEGV, &sa, nullptr), SyscallSucceeds()); + + Fault(); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fchdir.cc b/test/syscalls/linux/fchdir.cc new file mode 100644 index 000000000..08bcae1e8 --- /dev/null +++ b/test/syscalls/linux/fchdir.cc @@ -0,0 +1,77 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(FchdirTest, Success) { + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + int fd; + ASSERT_THAT(fd = open(temp_dir.path().c_str(), O_DIRECTORY | O_RDONLY), + SyscallSucceeds()); + + EXPECT_THAT(fchdir(fd), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + // Change CWD to a permanent location as temp dirs will be cleaned up. + EXPECT_THAT(chdir("/"), SyscallSucceeds()); +} + +TEST(FchdirTest, InvalidFD) { + EXPECT_THAT(fchdir(-1), SyscallFailsWithErrno(EBADF)); +} + +TEST(FchdirTest, PermissionDenied) { + // Drop capabilities that allow us to override directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */)); + + int fd; + ASSERT_THAT(fd = open(temp_dir.path().c_str(), O_DIRECTORY | O_RDONLY), + SyscallSucceeds()); + + EXPECT_THAT(fchdir(fd), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(FchdirTest, NotDir) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + int fd; + ASSERT_THAT(fd = open(temp_file.path().c_str(), O_CREAT | O_RDONLY, 0777), + SyscallSucceeds()); + + EXPECT_THAT(fchdir(fd), SyscallFailsWithErrno(ENOTDIR)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc new file mode 100644 index 000000000..5467fa2c8 --- /dev/null +++ b/test/syscalls/linux/fcntl.cc @@ -0,0 +1,1353 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <signal.h> +#include <sys/types.h> +#include <syscall.h> +#include <unistd.h> + +#include <iostream> +#include <list> +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/flags/flag.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/cleanup.h" +#include "test/util/eventfd_util.h" +#include "test/util/fs_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +ABSL_FLAG(std::string, child_setlock_on, "", + "Contains the path to try to set a file lock on."); +ABSL_FLAG(bool, child_setlock_write, false, + "Whether to set a writable lock (otherwise readable)"); +ABSL_FLAG(bool, blocking, false, + "Whether to set a blocking lock (otherwise non-blocking)."); +ABSL_FLAG(bool, retry_eintr, false, + "Whether to retry in the subprocess on EINTR."); +ABSL_FLAG(uint64_t, child_setlock_start, 0, "The value of struct flock start"); +ABSL_FLAG(uint64_t, child_setlock_len, 0, "The value of struct flock len"); +ABSL_FLAG(int32_t, socket_fd, -1, + "A socket to use for communicating more state back " + "to the parent."); + +namespace gvisor { +namespace testing { + +class FcntlLockTest : public ::testing::Test { + public: + void SetUp() override { + // Let's make a socket pair. + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, fds_), SyscallSucceeds()); + } + + void TearDown() override { + EXPECT_THAT(close(fds_[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds_[1]), SyscallSucceeds()); + } + + int64_t GetSubprocessFcntlTimeInUsec() { + int64_t ret = 0; + EXPECT_THAT(ReadFd(fds_[0], reinterpret_cast<void*>(&ret), sizeof(ret)), + SyscallSucceedsWithValue(sizeof(ret))); + return ret; + } + + // The first fd will remain with the process creating the subprocess + // and the second will go to the subprocess. + int fds_[2] = {}; +}; + +namespace { + +PosixErrorOr<Cleanup> SubprocessLock(std::string const& path, bool for_write, + bool blocking, bool retry_eintr, int fd, + off_t start, off_t length, pid_t* child) { + std::vector<std::string> args = { + "/proc/self/exe", "--child_setlock_on", path, + "--child_setlock_start", absl::StrCat(start), "--child_setlock_len", + absl::StrCat(length), "--socket_fd", absl::StrCat(fd)}; + + if (for_write) { + args.push_back("--child_setlock_write"); + } + + if (blocking) { + args.push_back("--blocking"); + } + + if (retry_eintr) { + args.push_back("--retry_eintr"); + } + + int execve_errno = 0; + ASSIGN_OR_RETURN_ERRNO( + auto cleanup, + ForkAndExec("/proc/self/exe", ExecveArray(args.begin(), args.end()), {}, + nullptr, child, &execve_errno)); + + if (execve_errno != 0) { + return PosixError(execve_errno, "execve"); + } + + return std::move(cleanup); +} + +TEST(FcntlTest, SetCloExecBadFD) { + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set. + FileDescriptor f = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + auto fd = f.get(); + f.reset(); + ASSERT_THAT(fcntl(fd, F_GETFD), SyscallFailsWithErrno(EBADF)); + ASSERT_THAT(fcntl(fd, F_SETFD, FD_CLOEXEC), SyscallFailsWithErrno(EBADF)); +} + +TEST(FcntlTest, SetCloExec) { + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0)); + + // Set the FD_CLOEXEC flag. + ASSERT_THAT(fcntl(fd.get(), F_SETFD, FD_CLOEXEC), SyscallSucceeds()); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); +} + +TEST(FcntlTest, ClearCloExec) { + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag set. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_CLOEXEC)); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); + + // Clear the FD_CLOEXEC flag. + ASSERT_THAT(fcntl(fd.get(), F_SETFD, 0), SyscallSucceeds()); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0)); +} + +TEST(FcntlTest, IndependentDescriptorFlags) { + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0)); + + // Duplicate the descriptor. Ensure that it also doesn't have FD_CLOEXEC. + FileDescriptor newfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup()); + ASSERT_THAT(fcntl(newfd.get(), F_GETFD), SyscallSucceedsWithValue(0)); + + // Set FD_CLOEXEC on the first FD. + ASSERT_THAT(fcntl(fd.get(), F_SETFD, FD_CLOEXEC), SyscallSucceeds()); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); + + // Ensure that the second FD is unaffected by the change on the first. + ASSERT_THAT(fcntl(newfd.get(), F_GETFD), SyscallSucceedsWithValue(0)); +} + +// All file description flags passed to open appear in F_GETFL. +TEST(FcntlTest, GetAllFlags) { + TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + int flags = O_RDWR | O_DIRECT | O_SYNC | O_NONBLOCK | O_APPEND; + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), flags)); + + // Linux forces O_LARGEFILE on all 64-bit kernels and gVisor's is 64-bit. + int expected = flags | kOLargeFile; + + int rflags; + EXPECT_THAT(rflags = fcntl(fd.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(rflags, expected); +} + +TEST(FcntlTest, SetFlags) { + TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), 0)); + + int const flags = O_RDWR | O_DIRECT | O_SYNC | O_NONBLOCK | O_APPEND; + EXPECT_THAT(fcntl(fd.get(), F_SETFL, flags), SyscallSucceeds()); + + // Can't set O_RDWR or O_SYNC. + // Linux forces O_LARGEFILE on all 64-bit kernels and gVisor's is 64-bit. + int expected = O_DIRECT | O_NONBLOCK | O_APPEND | kOLargeFile; + + int rflags; + EXPECT_THAT(rflags = fcntl(fd.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(rflags, expected); +} + +void TestLock(int fd, short lock_type = F_RDLCK) { // NOLINT, type in flock + struct flock fl; + fl.l_type = lock_type; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // len 0 locks all bytes despite how large the file grows. + fl.l_len = 0; + EXPECT_THAT(fcntl(fd, F_SETLK, &fl), SyscallSucceeds()); +} + +void TestLockBadFD(int fd, + short lock_type = F_RDLCK) { // NOLINT, type in flock + struct flock fl; + fl.l_type = lock_type; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // len 0 locks all bytes despite how large the file grows. + fl.l_len = 0; + EXPECT_THAT(fcntl(fd, F_SETLK, &fl), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(FcntlLockTest, SetLockBadFd) { TestLockBadFD(-1); } + +TEST_F(FcntlLockTest, SetLockDir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0000)); + TestLock(fd.get()); +} + +TEST_F(FcntlLockTest, SetLockSymlink) { + // TODO(gvisor.dev/issue/2782): Replace with IsRunningWithVFS1() when O_PATH + // is supported. + SKIP_IF(IsRunningOnGvisor()); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto symlink = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path())); + + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(symlink.path(), O_RDONLY | O_PATH, 0000)); + TestLockBadFD(fd.get()); +} + +TEST_F(FcntlLockTest, SetLockProc) { + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/status", O_RDONLY, 0000)); + TestLock(fd.get()); +} + +TEST_F(FcntlLockTest, SetLockPipe) { + SKIP_IF(IsRunningWithVFS1()); + + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + TestLock(fds[0]); + TestLockBadFD(fds[0], F_WRLCK); + + TestLock(fds[1], F_WRLCK); + TestLockBadFD(fds[1]); + + EXPECT_THAT(close(fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(FcntlLockTest, SetLockSocket) { + SKIP_IF(IsRunningWithVFS1()); + + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_THAT(sock, SyscallSucceeds()); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(true /* abstract */, AF_UNIX)); + ASSERT_THAT( + bind(sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallSucceeds()); + + TestLock(sock); + EXPECT_THAT(close(sock), SyscallSucceeds()); +} + +TEST_F(FcntlLockTest, SetLockBadOpenFlagsWrite) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY, 0666)); + + struct flock fl0; + fl0.l_type = F_WRLCK; + fl0.l_whence = SEEK_SET; + fl0.l_start = 0; + fl0.l_len = 0; // Lock all file + + // Expect that setting a write lock using a read only file descriptor + // won't work. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(FcntlLockTest, SetLockBadOpenFlagsRead) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY, 0666)); + + struct flock fl1; + fl1.l_type = F_RDLCK; + fl1.l_whence = SEEK_SET; + fl1.l_start = 0; + // Same as SetLockBadFd. + fl1.l_len = 0; + + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl1), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(FcntlLockTest, SetLockUnlockOnNothing) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); +} + +TEST_F(FcntlLockTest, SetWriteLockSingleProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd0 = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + EXPECT_THAT(fcntl(fd0.get(), F_SETLK, &fl), SyscallSucceeds()); + // Expect to be able to take the same lock on the same fd no problem. + EXPECT_THAT(fcntl(fd0.get(), F_SETLK, &fl), SyscallSucceeds()); + + FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + // Expect to be able to take the same lock from a different fd but for + // the same process. + EXPECT_THAT(fcntl(fd1.get(), F_SETLK, &fl), SyscallSucceeds()); +} + +TEST_F(FcntlLockTest, SetReadLockMultiProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // spawn a child process to take a read lock on the same file. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetReadThenWriteLockMultiProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Assert that another process trying to lock on the same file will fail + // with EAGAIN. It's important that we keep the fd above open so that + // that the other process will contend with the lock. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + // Close the fd: we want to test that another process can acquire the + // lock after this point. + fd.reset(); + // Assert that another process can now acquire the lock. + + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetWriteThenReadLockMultiProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + // Same as SetReadThenWriteLockMultiProc. + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Same as SetReadThenWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Same as SetReadThenWriteLockMultiProc. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + // Same as SetReadThenWriteLockMultiProc. + fd.reset(); // Close the fd. + + // Same as SetReadThenWriteLockMultiProc. + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetWriteLockMultiProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + // Same as SetReadThenWriteLockMultiProc. + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Same as SetReadWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Same as SetReadWriteLockMultiProc. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + fd.reset(); // Close the FD. + // Same as SetReadWriteLockMultiProc. + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockIsRegional) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 4096; + + // Same as SetReadWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Same as SetReadWriteLockMultiProc. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_len, 0, &child_pid)); + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockUpgradeDowngrade) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Same as SetReadWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Upgrade to a write lock. This will prevent anyone else from taking + // the lock. + fl.l_type = F_WRLCK; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Same as SetReadWriteLockMultiProc., + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + // Downgrade back to a read lock. + fl.l_type = F_RDLCK; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Do the same stint as before, but this time it should succeed. + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockDroppedOnClose) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + // While somewhat surprising, obtaining another fd to the same file and + // then closing it in this process drops *all* locks. + FileDescriptor other_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + // Same as SetReadThenWriteLockMultiProc. + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Same as SetReadWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + other_fd.reset(); // Close. + + // Expect to be able to get the lock, given that the close above dropped it. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockUnlock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + // Setup two regional locks with different permissions. + struct flock fl0; + fl0.l_type = F_WRLCK; + fl0.l_whence = SEEK_SET; + fl0.l_start = 0; + fl0.l_len = 4096; + + struct flock fl1; + fl1.l_type = F_RDLCK; + fl1.l_whence = SEEK_SET; + fl1.l_start = 4096; + // Same as SetLockBadFd. + fl1.l_len = 0; + + // Set both region locks. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallSucceeds()); + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl1), SyscallSucceeds()); + + // Another process should fail to take a read lock on the entire file + // due to the regional write lock. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), false /* write lock */, false /* nonblocking */, + false /* no eintr retry */, -1 /* no socket fd */, 0, 0, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + // Then only unlock the writable one. This should ensure that other + // processes can take any read lock that it wants. + fl0.l_type = F_UNLCK; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallSucceeds()); + + // Another process should now succeed to get a read lock on the entire file. + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), false /* write lock */, false /* nonblocking */, + false /* no eintr retry */, -1 /* no socket fd */, 0, 0, &child_pid)); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockAcrossRename) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + // Setup two regional locks with different permissions. + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Set the region lock. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Rename the file to someplace nearby. + std::string const newpath = NewTempAbsPath(); + EXPECT_THAT(rename(file.path().c_str(), newpath.c_str()), SyscallSucceeds()); + + // Another process should fail to take a read lock on the renamed file + // since we still have an open handle to the inode. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(newpath, false /* write lock */, false /* nonblocking */, + false /* no eintr retry */, -1 /* no socket fd */, + fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; +} + +// NOTE: The blocking tests below aren't perfect. It's hard to assert exactly +// what the kernel did while handling a syscall. These tests are timing based +// because there really isn't any other reasonable way to assert that correct +// blocking behavior happened. + +// This test will verify that blocking works as expected when another process +// holds a write lock when obtaining a write lock. This test will hold the lock +// for some amount of time and then wait for the second process to send over the +// socket_fd the amount of time it was blocked for before the lock succeeded. +TEST_F(FcntlLockTest, SetWriteLockThenBlockingWriteLock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + // Take the write lock. + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Attempt to take the read lock in a sub process. This will immediately block + // so we will release our lock after some amount of time and then assert the + // amount of time the other process was blocked for. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), true /* write lock */, true /* Blocking Lock */, + true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */, + fl.l_start, fl.l_len, &child_pid)); + + // We will wait kHoldLockForSec before we release our lock allowing the + // subprocess to obtain it. + constexpr absl::Duration kHoldLockFor = absl::Seconds(5); + const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); + + absl::SleepFor(kHoldLockFor); + + // Unlock our write lock. + fl.l_type = F_UNLCK; + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Read the blocked time from the subprocess socket. + int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); + + // We must have been waiting at least kMinBlockTime. + EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec); + + // The FCNTL write lock must always succeed as it will simply block until it + // can obtain the lock. + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +// This test will verify that blocking works as expected when another process +// holds a read lock when obtaining a write lock. This test will hold the lock +// for some amount of time and then wait for the second process to send over the +// socket_fd the amount of time it was blocked for before the lock succeeded. +TEST_F(FcntlLockTest, SetReadLockThenBlockingWriteLock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + // Take the write lock. + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Attempt to take the read lock in a sub process. This will immediately block + // so we will release our lock after some amount of time and then assert the + // amount of time the other process was blocked for. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), true /* write lock */, true /* Blocking Lock */, + true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */, + fl.l_start, fl.l_len, &child_pid)); + + // We will wait kHoldLockForSec before we release our lock allowing the + // subprocess to obtain it. + constexpr absl::Duration kHoldLockFor = absl::Seconds(5); + + const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); + + absl::SleepFor(kHoldLockFor); + + // Unlock our READ lock. + fl.l_type = F_UNLCK; + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Read the blocked time from the subprocess socket. + int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); + + // We must have been waiting at least kMinBlockTime. + EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec); + + // The FCNTL write lock must always succeed as it will simply block until it + // can obtain the lock. + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +// This test will veirfy that blocking works as expected when another process +// holds a write lock when obtaining a read lock. This test will hold the lock +// for some amount of time and then wait for the second process to send over the +// socket_fd the amount of time it was blocked for before the lock succeeded. +TEST_F(FcntlLockTest, SetWriteLockThenBlockingReadLock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + // Take the write lock. + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Attempt to take the read lock in a sub process. This will immediately block + // so we will release our lock after some amount of time and then assert the + // amount of time the other process was blocked for. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), false /* read lock */, true /* Blocking Lock */, + true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */, + fl.l_start, fl.l_len, &child_pid)); + + // We will wait kHoldLockForSec before we release our lock allowing the + // subprocess to obtain it. + constexpr absl::Duration kHoldLockFor = absl::Seconds(5); + + const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); + + absl::SleepFor(kHoldLockFor); + + // Unlock our write lock. + fl.l_type = F_UNLCK; + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Read the blocked time from the subprocess socket. + int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); + + // We must have been waiting at least kMinBlockTime. + EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec); + + // The FCNTL read lock must always succeed as it will simply block until it + // can obtain the lock. + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +// This test will verify that when one process only holds a read lock that +// another will not block while obtaining a read lock when F_SETLKW is used. +TEST_F(FcntlLockTest, SetReadLockThenBlockingReadLock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + // Take the READ lock. + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Attempt to take the read lock in a sub process. Since multiple processes + // can hold a read lock this should immediately return without blocking + // even though we used F_SETLKW in the subprocess. + pid_t child_pid = 0; + auto sp = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), false /* read lock */, true /* Blocking Lock */, + true /* Retry on EINTR */, -1 /* No fd, should not block */, fl.l_start, + fl.l_len, &child_pid)); + + // We never release the lock and the subprocess should still obtain it without + // blocking for any period of time. + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST(FcntlTest, GetO_ASYNC) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int flag_fl = -1; + ASSERT_THAT(flag_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(flag_fl & O_ASYNC, 0); + + int flag_fd = -1; + ASSERT_THAT(flag_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + EXPECT_EQ(flag_fd & O_ASYNC, 0); +} + +TEST(FcntlTest, SetFlO_ASYNC) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int before_fl = -1; + ASSERT_THAT(before_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + + int before_fd = -1; + ASSERT_THAT(before_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + + ASSERT_THAT(fcntl(s.get(), F_SETFL, before_fl | O_ASYNC), SyscallSucceeds()); + + int after_fl = -1; + ASSERT_THAT(after_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(after_fl, before_fl | O_ASYNC); + + int after_fd = -1; + ASSERT_THAT(after_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + EXPECT_EQ(after_fd, before_fd); +} + +TEST(FcntlTest, SetFdO_ASYNC) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int before_fl = -1; + ASSERT_THAT(before_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + + int before_fd = -1; + ASSERT_THAT(before_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + + ASSERT_THAT(fcntl(s.get(), F_SETFD, before_fd | O_ASYNC), SyscallSucceeds()); + + int after_fl = -1; + ASSERT_THAT(after_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(after_fl, before_fl); + + int after_fd = -1; + ASSERT_THAT(after_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + EXPECT_EQ(after_fd, before_fd); +} + +TEST(FcntlTest, DupAfterO_ASYNC) { + FileDescriptor s1 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int before = -1; + ASSERT_THAT(before = fcntl(s1.get(), F_GETFL), SyscallSucceeds()); + + ASSERT_THAT(fcntl(s1.get(), F_SETFL, before | O_ASYNC), SyscallSucceeds()); + + FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(s1.Dup()); + + int after = -1; + ASSERT_THAT(after = fcntl(fd2.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(after & O_ASYNC, O_ASYNC); +} + +TEST(FcntlTest, GetOwnNone) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + // Use the raw syscall because the glibc wrapper may convert F_{GET,SET}OWN + // into F_{GET,SET}OWN_EX. + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); + MaybeSave(); +} + +TEST(FcntlTest, GetOwnExNone) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &owner), + SyscallSucceedsWithValue(0)); +} + +TEST(FcntlTest, SetOwnInvalidPid) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 12345678), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnInvalidPgrp) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -12345678), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnPid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + pid_t pid; + EXPECT_THAT(pid = getpid(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(pid)); + MaybeSave(); +} + +TEST(FcntlTest, SetOwnPgrp) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + pid_t pgid; + EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds()); + + // Verify with F_GETOWN_EX; using F_GETOWN on Linux may incorrectly treat the + // negative return value as an error, converting the return value to -1 and + // setting errno accordingly. + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, F_OWNER_PGRP); + EXPECT_EQ(got_owner.pid, pgid); + MaybeSave(); +} + +TEST(FcntlTest, SetOwnUnset) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + // Set and unset pid. + pid_t pid; + EXPECT_THAT(pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); + + // Set and unset pgid. + pid_t pgid; + EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); + MaybeSave(); +} + +// F_SETOWN flips the sign of negative values, an operation that is guarded +// against overflow. +TEST(FcntlTest, SetOwnOverflow) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, INT_MIN), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(FcntlTest, SetOwnExInvalidType) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = __pid_type(-1); + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(FcntlTest, SetOwnExInvalidTid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_TID; + owner.pid = -1; + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnExInvalidPid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_PID; + owner.pid = -1; + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnExInvalidPgrp) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_PGRP; + owner.pid = -1; + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnExTid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_TID; + EXPECT_THAT(owner.pid = syscall(__NR_gettid), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(owner.pid)); + MaybeSave(); +} + +TEST(FcntlTest, SetOwnExPid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_PID; + EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(owner.pid)); + MaybeSave(); +} + +TEST(FcntlTest, SetOwnExPgrp) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex set_owner = {}; + set_owner.type = F_OWNER_PGRP; + EXPECT_THAT(set_owner.pid = getpgrp(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), + SyscallSucceeds()); + + // Verify with F_GETOWN_EX; using F_GETOWN on Linux may incorrectly treat the + // negative return value as an error, converting the return value to -1 and + // setting errno accordingly. + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, set_owner.type); + EXPECT_EQ(got_owner.pid, set_owner.pid); + MaybeSave(); +} + +TEST(FcntlTest, SetOwnExUnset) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + // Set and unset pid. + f_owner_ex owner = {}; + owner.type = F_OWNER_PID; + EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + owner.pid = 0; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); + + // Set and unset pgid. + owner.type = F_OWNER_PGRP; + EXPECT_THAT(owner.pid = getpgrp(), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + owner.pid = 0; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); + MaybeSave(); +} + +TEST(FcntlTest, GetOwnExTid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex set_owner = {}; + set_owner.type = F_OWNER_TID; + EXPECT_THAT(set_owner.pid = syscall(__NR_gettid), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), + SyscallSucceeds()); + + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, set_owner.type); + EXPECT_EQ(got_owner.pid, set_owner.pid); +} + +TEST(FcntlTest, GetOwnExPid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex set_owner = {}; + set_owner.type = F_OWNER_PID; + EXPECT_THAT(set_owner.pid = getpid(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), + SyscallSucceeds()); + + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, set_owner.type); + EXPECT_EQ(got_owner.pid, set_owner.pid); +} + +TEST(FcntlTest, GetOwnExPgrp) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex set_owner = {}; + set_owner.type = F_OWNER_PGRP; + EXPECT_THAT(set_owner.pid = getpgrp(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), + SyscallSucceeds()); + + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, set_owner.type); + EXPECT_EQ(got_owner.pid, set_owner.pid); +} + +// Make sure that making multiple concurrent changes to async signal generation +// does not cause any race issues. +TEST(FcntlTest, SetFlSetOwnDoNotRace) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + pid_t pid; + EXPECT_THAT(pid = getpid(), SyscallSucceeds()); + + constexpr absl::Duration runtime = absl::Milliseconds(300); + auto setAsync = [&s, &runtime] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETFL, O_ASYNC), + SyscallSucceeds()); + sched_yield(); + } + }; + auto resetAsync = [&s, &runtime] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETFL, 0), SyscallSucceeds()); + sched_yield(); + } + }; + auto setOwn = [&s, &pid, &runtime] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), + SyscallSucceeds()); + sched_yield(); + } + }; + + std::list<ScopedThread> threads; + for (int i = 0; i < 10; i++) { + threads.emplace_back(setAsync); + threads.emplace_back(resetAsync); + threads.emplace_back(setOwn); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + const std::string setlock_on = absl::GetFlag(FLAGS_child_setlock_on); + if (!setlock_on.empty()) { + int socket_fd = absl::GetFlag(FLAGS_socket_fd); + int fd = open(setlock_on.c_str(), O_RDWR, 0666); + if (fd == -1 && errno != 0) { + int err = errno; + std::cerr << "CHILD open " << setlock_on << " failed " << err + << std::endl; + exit(err); + } + + struct flock fl; + if (absl::GetFlag(FLAGS_child_setlock_write)) { + fl.l_type = F_WRLCK; + } else { + fl.l_type = F_RDLCK; + } + fl.l_whence = SEEK_SET; + fl.l_start = absl::GetFlag(FLAGS_child_setlock_start); + fl.l_len = absl::GetFlag(FLAGS_child_setlock_len); + + // Test the fcntl. + int err = 0; + int ret = 0; + + gvisor::testing::MonotonicTimer timer; + timer.Start(); + do { + ret = fcntl(fd, absl::GetFlag(FLAGS_blocking) ? F_SETLKW : F_SETLK, &fl); + } while (absl::GetFlag(FLAGS_retry_eintr) && ret == -1 && errno == EINTR); + auto usec = absl::ToInt64Microseconds(timer.Duration()); + + if (ret == -1 && errno != 0) { + err = errno; + std::cerr << "CHILD lock " << setlock_on << " failed " << err + << std::endl; + } + + // If there is a socket fd let's send back the time in microseconds it took + // to execute this syscall. + if (socket_fd != -1) { + gvisor::testing::WriteFd(socket_fd, reinterpret_cast<void*>(&usec), + sizeof(usec)); + close(socket_fd); + } + + close(fd); + exit(err); + } + + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h new file mode 100644 index 000000000..fb418e052 --- /dev/null +++ b/test/syscalls/linux/file_base.h @@ -0,0 +1,100 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_FILE_BASE_H_ +#define GVISOR_TEST_SYSCALLS_FILE_BASE_H_ + +#include <arpa/inet.h> +#include <errno.h> +#include <fcntl.h> +#include <netinet/in.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <unistd.h> + +#include <cstring> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +class FileTest : public ::testing::Test { + public: + void SetUp() override { + test_pipe_[0] = -1; + test_pipe_[1] = -1; + + test_file_name_ = NewTempAbsPath(); + test_file_fd_ = ASSERT_NO_ERRNO_AND_VALUE( + Open(test_file_name_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)); + + ASSERT_THAT(pipe(test_pipe_), SyscallSucceeds()); + ASSERT_THAT(fcntl(test_pipe_[0], F_SETFL, O_NONBLOCK), SyscallSucceeds()); + } + + // CloseFile will allow the test to manually close the file descriptor. + void CloseFile() { test_file_fd_.reset(); } + + // UnlinkFile will allow the test to manually unlink the file. + void UnlinkFile() { + if (!test_file_name_.empty()) { + EXPECT_THAT(unlink(test_file_name_.c_str()), SyscallSucceeds()); + test_file_name_.clear(); + } + } + + // ClosePipes will allow the test to manually close the pipes. + void ClosePipes() { + if (test_pipe_[0] > 0) { + EXPECT_THAT(close(test_pipe_[0]), SyscallSucceeds()); + } + + if (test_pipe_[1] > 0) { + EXPECT_THAT(close(test_pipe_[1]), SyscallSucceeds()); + } + + test_pipe_[0] = -1; + test_pipe_[1] = -1; + } + + void TearDown() override { + CloseFile(); + UnlinkFile(); + ClosePipes(); + } + + protected: + std::string test_file_name_; + FileDescriptor test_file_fd_; + + int test_pipe_[2]; +}; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_FILE_BASE_H_ diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc new file mode 100644 index 000000000..638a93979 --- /dev/null +++ b/test/syscalls/linux/flock.cc @@ -0,0 +1,636 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/file.h> + +#include <string> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/file_base.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class FlockTest : public FileTest {}; + +TEST_F(FlockTest, InvalidOpCombinations) { + // The operation cannot be both exclusive and shared. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_SH | LOCK_NB), + SyscallFailsWithErrno(EINVAL)); + + // Locking and Unlocking doesn't make sense. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_UN | LOCK_NB), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_UN | LOCK_NB), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(FlockTest, NoOperationSpecified) { + // Not specifying an operation is invalid. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(FlockTest, TestSimpleExLock) { + // Test that we can obtain an exclusive lock (no other holders) + // and that we can unlock it. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestSimpleShLock) { + // Test that we can obtain a shared lock (no other holders) + // and that we can unlock it. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestLockableAnyMode) { + // flock(2): A shared or exclusive lock can be placed on a file + // regardless of the mode in which the file was opened. + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(test_file_name_, O_RDONLY)); // open read only to test + + // Mode shouldn't prevent us from taking an exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestUnlockWithNoHolders) { + // Test that unlocking when no one holds a lock succeeeds. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestRepeatedExLockingBySameHolder) { + // Test that repeated locking by the same holder for the + // same type of lock works correctly. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestRepeatedExLockingSingleUnlock) { + // Test that repeated locking by the same holder for the + // same type of lock works correctly and that a single unlock is required. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + // Should be unlocked at this point + ASSERT_THAT(flock(fd.get(), LOCK_NB | LOCK_EX), SyscallSucceedsWithValue(0)); + + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestRepeatedShLockingBySameHolder) { + // Test that repeated locking by the same holder for the + // same type of lock works correctly. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestSingleHolderUpgrade) { + // Test that a shared lock is upgradable when no one else holds a lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestSingleHolderDowngrade) { + // Test single holder lock downgrade case. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestMultipleShared) { + // This is a simple test to verify that multiple independent shared + // locks will be granted. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // A shared lock should be granted as there only exists other shared locks. + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock both. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +/* + * flock(2): If a process uses open(2) (or similar) to obtain more than one + * descriptor for the same file, these descriptors are treated + * independently by flock(). An attempt to lock the file using one of + * these file descriptors may be denied by a lock that the calling process + * has already placed via another descriptor. + */ +TEST_F(FlockTest, TestMultipleHolderSharedExclusive) { + // This test will verify that an exclusive lock will not be granted + // while a shared is held. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify We're unable to get an exlcusive lock via the second FD. + // because someone is holding a shared lock. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Unlock + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestSharedLockFailExclusiveHolder) { + // This test will verify that a shared lock is denied while + // someone holds an exclusive lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify we're unable to get an shared lock via the second FD. + // because someone is holding an exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Unlock + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestExclusiveLockFailExclusiveHolder) { + // This test will verify that an exclusive lock is denied while + // someone already holds an exclsuive lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify we're unable to get an exclusive lock via the second FD + // because someone is already holding an exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Unlock + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestMultipleHolderSharedExclusiveUpgrade) { + // This test will verify that we cannot obtain an exclusive lock while + // a shared lock is held by another descriptor, then verify that an upgrade + // is possible on a shared lock once all other shared locks have closed. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify we're unable to get an exclusive lock via the second FD because + // a shared lock is held. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Verify that we can get a shared lock via the second descriptor instead + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock the first and there will only be one shared lock remaining. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + + // Upgrade 2nd fd. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Finally unlock the second + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestMultipleHolderSharedExclusiveDowngrade) { + // This test will verify that a shared lock is not obtainable while an + // exclusive lock is held but that once the first is downgraded that + // the second independent file descriptor can also get a shared lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify We're unable to get a shared lock via the second FD because + // an exclusive lock is held. + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Verify that we can downgrade the first. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + // Now verify that we can obtain a shared lock since the first was downgraded. + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Finally unlock both. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +/* + * flock(2): Locks created by flock() are associated with an open file table + * entry. This means that duplicate file descriptors (created by, for example, + * fork(2) or dup(2)) refer to the same lock, and this lock may be modified or + * released using any of these descriptors. Furthermore, the lock is released + * either by an explicit LOCK_UN operation on any of these duplicate descriptors + * or when all such descriptors have been closed. + */ +TEST_F(FlockTest, TestDupFdUpgrade) { + // This test will verify that a shared lock is upgradeable via a dupped + // file descriptor, if the FD wasn't dupped this would fail. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // Now we should be able to upgrade via the dupped fd. + ASSERT_THAT(flock(dup_fd.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + // Validate unlock via dupped fd. + ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestDupFdDowngrade) { + // This test will verify that a exclusive lock is downgradable via a dupped + // file descriptor, if the FD wasn't dupped this would fail. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // Now we should be able to downgrade via the dupped fd. + ASSERT_THAT(flock(dup_fd.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + // Validate unlock via dupped fd + ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestDupFdCloseRelease) { + // flock(2): Furthermore, the lock is released either by an explicit LOCK_UN + // operation on any of these duplicate descriptors, or when all such + // descriptors have been closed. + // + // This test will verify that a dupped fd closing will not release the + // underlying lock until all such dupped fds have closed. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // At this point we have ONE exclusive locked referenced by two different fds. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Validate that we cannot get a lock on a new unrelated FD. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Closing the dupped fd shouldn't affect the lock until all are closed. + dup_fd.reset(); // Closed the duped fd. + + // Validate that we still cannot get a lock on a new unrelated FD. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Closing the first fd + CloseFile(); // Will validate the syscall succeeds. + + // Now we should actually be able to get a lock since all fds related to + // the first lock are closed. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestDupFdUnlockRelease) { + /* flock(2): Furthermore, the lock is released either by an explicit LOCK_UN + * operation on any of these duplicate descriptors, or when all such + * descriptors have been closed. + */ + // This test will verify that an explict unlock on a dupped FD will release + // the underlying lock unlike the previous case where close on a dup was + // not enough to release the lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // At this point we have ONE exclusive locked referenced by two different fds. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Validate that we cannot get a lock on a new unrelated FD. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Explicitly unlock via the dupped descriptor. + ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + + // Validate that we can now get the lock since we explicitly unlocked. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestDupFdFollowedByLock) { + // This test will verify that taking a lock on a file descriptor that has + // already been dupped means that the lock is shared between both. This is + // slightly different than than duping on an already locked FD. + FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // Take a lock. + ASSERT_THAT(flock(dup_fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); + + // Now dup_fd and test_file_ should both reference the same lock. + // We shouldn't be able to obtain a lock until both are closed. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Closing the first fd + dup_fd.reset(); // Close the duped fd. + + // Validate that we cannot get a lock yet because the dupped descriptor. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Closing the second fd. + CloseFile(); // CloseFile() will validate the syscall succeeds. + + // Now we should be able to get the lock. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); + + // Unlock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +// NOTE: These blocking tests are not perfect. Unfortunately it's very hard to +// determine if a thread was actually blocked in the kernel so we're forced +// to use timing. +TEST_F(FlockTest, BlockingLockNoBlockingForSharedLocks_NoRandomSave) { + // This test will verify that although LOCK_NB isn't specified + // two different fds can obtain shared locks without blocking. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH), SyscallSucceeds()); + + // kHoldLockTime is the amount of time we will hold the lock before releasing. + constexpr absl::Duration kHoldLockTime = absl::Seconds(30); + + const DisableSave ds; // Timing-related. + + // We do this in another thread so we can determine if it was actually + // blocked by timing the amount of time it took for the syscall to complete. + ScopedThread t([&] { + MonotonicTimer timer; + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Only a single shared lock is held, the lock will be granted immediately. + // This should be granted without any blocking. Don't save here to avoid + // wild discrepencies on timing. + timer.Start(); + ASSERT_THAT(flock(fd.get(), LOCK_SH), SyscallSucceeds()); + + // We held the lock for 30 seconds but this thread should not have + // blocked at all so we expect a very small duration on syscall completion. + ASSERT_LT(timer.Duration(), + absl::Seconds(1)); // 1000ms is much less than 30s. + + // We can release our second shared lock + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds()); + }); + + // Sleep before unlocking. + absl::SleepFor(kHoldLockTime); + + // Release the first shared lock. Don't save in this situation to avoid + // discrepencies in timing. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); +} + +TEST_F(FlockTest, BlockingLockFirstSharedSecondExclusive_NoRandomSave) { + // This test will verify that if someone holds a shared lock any attempt to + // obtain an exclusive lock will result in blocking. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH), SyscallSucceeds()); + + // kHoldLockTime is the amount of time we will hold the lock before releasing. + constexpr absl::Duration kHoldLockTime = absl::Seconds(2); + + const DisableSave ds; // Timing-related. + + // We do this in another thread so we can determine if it was actually + // blocked by timing the amount of time it took for the syscall to complete. + ScopedThread t([&] { + MonotonicTimer timer; + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // This exclusive lock should block because someone is already holding a + // shared lock. We don't save here to avoid wild discrepencies on timing. + timer.Start(); + ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_EX), SyscallSucceeds()); + + // We should be blocked, we will expect to be blocked for more than 1.0s. + ASSERT_GT(timer.Duration(), absl::Seconds(1)); + + // We can release our exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds()); + }); + + // Sleep before unlocking. + absl::SleepFor(kHoldLockTime); + + // Release the shared lock allowing the thread to proceed. + // We don't save here to avoid wild discrepencies in timing. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); +} + +TEST_F(FlockTest, BlockingLockFirstExclusiveSecondShared_NoRandomSave) { + // This test will verify that if someone holds an exclusive lock any attempt + // to obtain a shared lock will result in blocking. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX), SyscallSucceeds()); + + // kHoldLockTime is the amount of time we will hold the lock before releasing. + constexpr absl::Duration kHoldLockTime = absl::Seconds(2); + + const DisableSave ds; // Timing-related. + + // We do this in another thread so we can determine if it was actually + // blocked by timing the amount of time it took for the syscall to complete. + ScopedThread t([&] { + MonotonicTimer timer; + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // This shared lock should block because someone is already holding an + // exclusive lock. We don't save here to avoid wild discrepencies on timing. + timer.Start(); + ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_SH), SyscallSucceeds()); + + // We should be blocked, we will expect to be blocked for more than 1.0s. + ASSERT_GT(timer.Duration(), absl::Seconds(1)); + + // We can release our shared lock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds()); + }); + + // Sleep before unlocking. + absl::SleepFor(kHoldLockTime); + + // Release the exclusive lock allowing the blocked thread to proceed. + // We don't save here to avoid wild discrepencies in timing. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); +} + +TEST_F(FlockTest, BlockingLockFirstExclusiveSecondExclusive_NoRandomSave) { + // This test will verify that if someone holds an exclusive lock any attempt + // to obtain another exclusive lock will result in blocking. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX), SyscallSucceeds()); + + // kHoldLockTime is the amount of time we will hold the lock before releasing. + constexpr absl::Duration kHoldLockTime = absl::Seconds(2); + + const DisableSave ds; // Timing-related. + + // We do this in another thread so we can determine if it was actually + // blocked by timing the amount of time it took for the syscall to complete. + ScopedThread t([&] { + MonotonicTimer timer; + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // This exclusive lock should block because someone is already holding an + // exclusive lock. + timer.Start(); + ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_EX), SyscallSucceeds()); + + // We should be blocked, we will expect to be blocked for more than 1.0s. + ASSERT_GT(timer.Duration(), absl::Seconds(1)); + + // We can release our exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds()); + }); + + // Sleep before unlocking. + absl::SleepFor(kHoldLockTime); + + // Release the exclusive lock allowing the blocked thread to proceed. + // We don't save to avoid wild discrepencies in timing. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, BadFD) { + // EBADF: fd is not an open file descriptor. + ASSERT_THAT(flock(-1, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST(FlockTestNoFixture, FlockDir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0000)); + EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, FlockSymlink) { + // TODO(gvisor.dev/issue/2782): Replace with IsRunningWithVFS1() when O_PATH + // is supported. + SKIP_IF(IsRunningOnGvisor()); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto symlink = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path())); + + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(symlink.path(), O_RDONLY | O_PATH, 0000)); + EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallFailsWithErrno(EBADF)); +} + +TEST(FlockTestNoFixture, FlockProc) { + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/status", O_RDONLY, 0000)); + EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, FlockPipe) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + EXPECT_THAT(flock(fds[0], LOCK_EX | LOCK_NB), SyscallSucceeds()); + // Check that the pipe was locked above. + EXPECT_THAT(flock(fds[1], LOCK_EX | LOCK_NB), SyscallFailsWithErrno(EAGAIN)); + + EXPECT_THAT(flock(fds[0], LOCK_UN), SyscallSucceeds()); + EXPECT_THAT(flock(fds[1], LOCK_EX | LOCK_NB), SyscallSucceeds()); + + EXPECT_THAT(close(fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, FlockSocket) { + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_THAT(sock, SyscallSucceeds()); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(true /* abstract */, AF_UNIX)); + ASSERT_THAT( + bind(sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallSucceeds()); + + EXPECT_THAT(flock(sock, LOCK_EX | LOCK_NB), SyscallSucceeds()); + EXPECT_THAT(close(sock), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc new file mode 100644 index 000000000..853f6231a --- /dev/null +++ b/test/syscalls/linux/fork.cc @@ -0,0 +1,464 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <atomic> +#include <cstdlib> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/capability_util.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::Ge; + +class ForkTest : public ::testing::Test { + protected: + // SetUp creates a populated, open file. + void SetUp() override { + // Make a shared mapping. + shared_ = reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0)); + ASSERT_NE(reinterpret_cast<void*>(shared_), MAP_FAILED); + + // Make a private mapping. + private_ = + reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + ASSERT_NE(reinterpret_cast<void*>(private_), MAP_FAILED); + + // Make a pipe. + ASSERT_THAT(pipe(pipes_), SyscallSucceeds()); + } + + // TearDown frees associated resources. + void TearDown() override { + EXPECT_THAT(munmap(shared_, kPageSize), SyscallSucceeds()); + EXPECT_THAT(munmap(private_, kPageSize), SyscallSucceeds()); + EXPECT_THAT(close(pipes_[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipes_[1]), SyscallSucceeds()); + } + + // Fork executes a clone system call. + pid_t Fork() { + pid_t pid = fork(); + MaybeSave(); + TEST_PCHECK_MSG(pid >= 0, "fork failed"); + return pid; + } + + // Wait waits for the given pid and returns the exit status. If the child was + // killed by a signal or an error occurs, then 256+signal is returned. + int Wait(pid_t pid) { + int status; + while (true) { + int rval = wait4(pid, &status, 0, NULL); + if (rval < 0) { + return rval; + } + if (rval != pid) { + continue; + } + if (WIFEXITED(status)) { + return WEXITSTATUS(status); + } + if (WIFSIGNALED(status)) { + return 256 + WTERMSIG(status); + } + } + } + + // Exit exits the proccess. + void Exit(int code) { + _exit(code); + + // Should never reach here. Since the exit above failed, we really don't + // have much in the way of options to indicate failure. So we just try to + // log an assertion failure to the logs. The parent process will likely + // fail anyways if exit is not working. + TEST_CHECK_MSG(false, "_exit returned"); + } + + // ReadByte reads a byte from the shared pipe. + char ReadByte() { + char val = -1; + TEST_PCHECK(ReadFd(pipes_[0], &val, 1) == 1); + MaybeSave(); + return val; + } + + // WriteByte writes a byte from the shared pipe. + void WriteByte(char val) { + TEST_PCHECK(WriteFd(pipes_[1], &val, 1) == 1); + MaybeSave(); + } + + // Shared pipe. + int pipes_[2]; + + // Shared mapping (one page). + char* shared_; + + // Private mapping (one page). + char* private_; +}; + +TEST_F(ForkTest, Simple) { + pid_t child = Fork(); + if (child == 0) { + Exit(0); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST_F(ForkTest, ExitCode) { + pid_t child = Fork(); + if (child == 0) { + Exit(123); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(123)); + child = Fork(); + if (child == 0) { + Exit(1); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(1)); +} + +TEST_F(ForkTest, Multi) { + pid_t child1 = Fork(); + if (child1 == 0) { + Exit(0); + } + pid_t child2 = Fork(); + if (child2 == 0) { + Exit(1); + } + EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0)); + EXPECT_THAT(Wait(child2), SyscallSucceedsWithValue(1)); +} + +TEST_F(ForkTest, Pipe) { + pid_t child = Fork(); + if (child == 0) { + WriteByte(1); + Exit(0); + } + EXPECT_EQ(ReadByte(), 1); + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST_F(ForkTest, SharedMapping) { + pid_t child = Fork(); + if (child == 0) { + // Wait for the parent. + ReadByte(); + if (shared_[0] == 1) { + Exit(0); + } + // Failed. + Exit(1); + } + // Change the mapping. + ASSERT_EQ(shared_[0], 0); + shared_[0] = 1; + // Unblock the child. + WriteByte(0); + // Did it work? + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST_F(ForkTest, PrivateMapping) { + pid_t child = Fork(); + if (child == 0) { + // Wait for the parent. + ReadByte(); + if (private_[0] == 0) { + Exit(0); + } + // Failed. + Exit(1); + } + // Change the mapping. + ASSERT_EQ(private_[0], 0); + private_[0] = 1; + // Unblock the child. + WriteByte(0); + // Did it work? + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +// CPUID is x86 specific. +#ifdef __x86_64__ +// Test that cpuid works after a fork. +TEST_F(ForkTest, Cpuid) { + pid_t child = Fork(); + + // We should be able to determine the CPU vendor. + ASSERT_NE(GetCPUVendor(), CPUVendor::kUnknownVendor); + + if (child == 0) { + Exit(0); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} +#endif + +TEST_F(ForkTest, Mmap) { + pid_t child = Fork(); + + if (child == 0) { + void* addr = + mmap(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MaybeSave(); + Exit(addr == MAP_FAILED); + } + + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +static volatile int alarmed = 0; + +void AlarmHandler(int sig, siginfo_t* info, void* context) { alarmed = 1; } + +TEST_F(ForkTest, Alarm) { + // Setup an alarm handler. + struct sigaction sa; + sa.sa_sigaction = AlarmHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + EXPECT_THAT(sigaction(SIGALRM, &sa, nullptr), SyscallSucceeds()); + + pid_t child = Fork(); + + if (child == 0) { + alarm(1); + sleep(3); + if (!alarmed) { + Exit(1); + } + Exit(0); + } + + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, alarmed); +} + +// Child cannot affect parent private memory. Regression test for b/24137240. +TEST_F(ForkTest, PrivateMemory) { + std::atomic<uint32_t> local(0); + + pid_t child1 = Fork(); + if (child1 == 0) { + local++; + + pid_t child2 = Fork(); + if (child2 == 0) { + local++; + + TEST_CHECK(local.load() == 2); + + Exit(0); + } + + TEST_PCHECK(Wait(child2) == 0); + TEST_CHECK(local.load() == 1); + Exit(0); + } + + EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, local.load()); +} + +// Kernel-accessed buffers should remain coherent across COW. +// +// The buffer must be >= usermem.ZeroCopyMinBytes, as UnsafeAccess operates +// differently. Regression test for b/33811887. +TEST_F(ForkTest, COWSegment) { + constexpr int kBufSize = 1024; + char* read_buf = private_; + char* touch = private_ + kPageSize / 2; + + std::string contents(kBufSize, 'a'); + + ScopedThread t([&] { + // Wait to be sure the parent is blocked in read. + absl::SleepFor(absl::Seconds(3)); + + // Fork to mark private pages for COW. + // + // Use fork directly rather than the Fork wrapper to skip the multi-threaded + // check, and limit the child to async-signal-safe functions: + // + // "After a fork() in a multithreaded program, the child can safely call + // only async-signal-safe functions (see signal(7)) until such time as it + // calls execve(2)." + // + // Skip ASSERT in the child, as it isn't async-signal-safe. + pid_t child = fork(); + if (child == 0) { + // Wait to be sure parent touched memory. + sleep(3); + Exit(0); + } + + // Check success only in the parent. + ASSERT_THAT(child, SyscallSucceedsWithValue(Ge(0))); + + // Trigger COW on private page. + *touch = 42; + + // Write to pipe. Parent should still be able to read this. + EXPECT_THAT(WriteFd(pipes_[1], contents.c_str(), kBufSize), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); + }); + + EXPECT_THAT(ReadFd(pipes_[0], read_buf, kBufSize), + SyscallSucceedsWithValue(kBufSize)); + EXPECT_STREQ(contents.c_str(), read_buf); +} + +TEST_F(ForkTest, SigAltStack) { + std::vector<char> stack_mem(SIGSTKSZ); + stack_t stack = {}; + stack.ss_size = SIGSTKSZ; + stack.ss_sp = stack_mem.data(); + ASSERT_THAT(sigaltstack(&stack, nullptr), SyscallSucceeds()); + + pid_t child = Fork(); + + if (child == 0) { + stack_t oss = {}; + TEST_PCHECK(sigaltstack(nullptr, &oss) == 0); + MaybeSave(); + + TEST_CHECK((oss.ss_flags & SS_DISABLE) == 0); + TEST_CHECK(oss.ss_size == SIGSTKSZ); + TEST_CHECK(oss.ss_sp == stack.ss_sp); + + Exit(0); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST_F(ForkTest, Affinity) { + // Make a non-default cpumask. + cpu_set_t parent_mask; + EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask), + SyscallSucceeds()); + // Knock out the lowest bit. + for (unsigned int n = 0; n < CPU_SETSIZE; n++) { + if (CPU_ISSET(n, &parent_mask)) { + CPU_CLR(n, &parent_mask); + break; + } + } + EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask), + SyscallSucceeds()); + + pid_t child = Fork(); + if (child == 0) { + cpu_set_t child_mask; + + int ret = sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask); + MaybeSave(); + if (ret < 0) { + Exit(-ret); + } + + TEST_CHECK(CPU_EQUAL(&child_mask, &parent_mask)); + + Exit(0); + } + + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST(CloneTest, NewUserNamespacePermitsAllOtherNamespaces) { + // "If CLONE_NEWUSER is specified along with other CLONE_NEW* flags in a + // single clone(2) or unshare(2) call, the user namespace is guaranteed to be + // created first, giving the child (clone(2)) or caller (unshare(2)) + // privileges over the remaining namespaces created by the call. Thus, it is + // possible for an unprivileged caller to specify this combination of flags." + // - user_namespaces(7) + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + Mapping child_stack = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + int child_pid; + // We only test with CLONE_NEWIPC, CLONE_NEWNET, and CLONE_NEWUTS since these + // namespaces were implemented in Linux before user namespaces. + ASSERT_THAT( + child_pid = clone( + +[](void*) { return 0; }, + reinterpret_cast<void*>(child_stack.addr() + kPageSize), + CLONE_NEWUSER | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUTS | SIGCHLD, + /* arg = */ nullptr), + SyscallSucceeds()); + + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; +} + +// Clone with CLONE_SETTLS and a non-canonical TLS address is rejected. +TEST(CloneTest, NonCanonicalTLS) { + constexpr uintptr_t kNonCanonical = 1ull << 48; + + // We need a valid address for the stack pointer. We'll never actually execute + // on this. + char stack; + + // The raw system call interface on x86-64 is: + // long clone(unsigned long flags, void *stack, + // int *parent_tid, int *child_tid, + // unsigned long tls); + // + // While on arm64, the order of the last two arguments is reversed: + // long clone(unsigned long flags, void *stack, + // int *parent_tid, unsigned long tls, + // int *child_tid); +#if defined(__x86_64__) + EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr, + nullptr, kNonCanonical), + SyscallFailsWithErrno(EPERM)); +#elif defined(__aarch64__) + EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr, + kNonCanonical, nullptr), + SyscallFailsWithErrno(EPERM)); +#endif +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fpsig_fork.cc b/test/syscalls/linux/fpsig_fork.cc new file mode 100644 index 000000000..c47567b4e --- /dev/null +++ b/test/syscalls/linux/fpsig_fork.cc @@ -0,0 +1,131 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This test verifies that fork(2) in a signal handler will correctly +// restore floating point state after the signal handler returns in both +// the child and parent. +#include <sys/time.h> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#ifdef __x86_64__ +#define GET_XMM(__var, __xmm) \ + asm volatile("movq %%" #__xmm ", %0" : "=r"(__var)) +#define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var)) +#define GET_FP0(__var) GET_XMM(__var, xmm0) +#define SET_FP0(__var) SET_XMM(__var, xmm0) +#elif __aarch64__ +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) +#define GET_FPREG(var, regname) \ + asm volatile("str " __stringify(regname) ", %0" : "=m"(var)) +#define SET_FPREG(var, regname) \ + asm volatile("ldr " __stringify(regname) ", %0" : "=m"(var)) +#define GET_FP0(var) GET_FPREG(var, d0) +#define SET_FP0(var) SET_FPREG(var, d0) +#endif + +int parent, child; + +void sigusr1(int s, siginfo_t* siginfo, void* _uc) { + // Fork and clobber %xmm0. The fpstate should be restored by sigreturn(2) + // in both parent and child. + child = fork(); + TEST_CHECK_MSG(child >= 0, "fork failed"); + + uint64_t val = SIGUSR1; + SET_FP0(val); + uint64_t got; + GET_FP0(got); + TEST_CHECK_MSG(val == got, "Basic FP check failed in sigusr1()"); +} + +TEST(FPSigTest, Fork) { + parent = getpid(); + pid_t parent_tid = gettid(); + + struct sigaction sa = {}; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = sigusr1; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + // The amd64 ABI specifies that the XMM register set is caller-saved. This + // implies that if there is any function call between SET_XMM and GET_XMM the + // compiler might save/restore xmm0 implicitly. This defeats the entire + // purpose of the test which is to verify that fpstate is restored by + // sigreturn(2). + // + // This is the reason why 'tgkill(getpid(), gettid(), SIGUSR1)' is implemented + // in inline assembly below. + // + // If the OS is broken and registers are clobbered by the child, using tgkill + // to signal the current thread increases the likelihood that this thread will + // be the one clobbered. + + uint64_t expected = 0xdeadbeeffacefeed; + SET_FP0(expected); + +#ifdef __x86_64__ + asm volatile( + "movl %[killnr], %%eax;" + "movl %[parent], %%edi;" + "movl %[tid], %%esi;" + "movl %[sig], %%edx;" + "syscall;" + : + : [ killnr ] "i"(__NR_tgkill), [ parent ] "rm"(parent), + [ tid ] "rm"(parent_tid), [ sig ] "i"(SIGUSR1) + : "rax", "rdi", "rsi", "rdx", + // Clobbered by syscall. + "rcx", "r11"); +#elif __aarch64__ + asm volatile( + "mov x8, %0\n" + "mov x0, %1\n" + "mov x1, %2\n" + "mov x2, %3\n" + "svc #0\n" ::"r"(__NR_tgkill), + "r"(parent), "r"(parent_tid), "r"(SIGUSR1)); +#endif + + uint64_t got; + GET_FP0(got); + + if (getpid() == parent) { // Parent. + int status; + ASSERT_THAT(waitpid(child, &status, 0), SyscallSucceedsWithValue(child)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); + } + + // TEST_CHECK_MSG since this may run in the child. + TEST_CHECK_MSG(expected == got, "Bad xmm0 value"); + + if (getpid() != parent) { // Child. + _exit(0); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fpsig_nested.cc b/test/syscalls/linux/fpsig_nested.cc new file mode 100644 index 000000000..302d928d1 --- /dev/null +++ b/test/syscalls/linux/fpsig_nested.cc @@ -0,0 +1,167 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This program verifies that application floating point state is restored +// correctly after a signal handler returns. It also verifies that this works +// with nested signals. +#include <sys/time.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#ifdef __x86_64__ +#define GET_XMM(__var, __xmm) \ + asm volatile("movq %%" #__xmm ", %0" : "=r"(__var)) +#define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var)) +#define GET_FP0(__var) GET_XMM(__var, xmm0) +#define SET_FP0(__var) SET_XMM(__var, xmm0) +#elif __aarch64__ +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) +#define GET_FPREG(var, regname) \ + asm volatile("str " __stringify(regname) ", %0" : "=m"(var)) +#define SET_FPREG(var, regname) \ + asm volatile("ldr " __stringify(regname) ", %0" : "=m"(var)) +#define GET_FP0(var) GET_FPREG(var, d0) +#define SET_FP0(var) SET_FPREG(var, d0) +#endif + +int pid; +int tid; + +volatile uint64_t entryxmm[2] = {~0UL, ~0UL}; +volatile uint64_t exitxmm[2]; + +void sigusr2(int s, siginfo_t* siginfo, void* _uc) { + uint64_t val = SIGUSR2; + + // Record the value of %xmm0 on entry and then clobber it. + GET_FP0(entryxmm[1]); + SET_FP0(val); + GET_FP0(exitxmm[1]); +} + +void sigusr1(int s, siginfo_t* siginfo, void* _uc) { + uint64_t val = SIGUSR1; + + // Record the value of %xmm0 on entry and then clobber it. + GET_FP0(entryxmm[0]); + SET_FP0(val); + + // Send a SIGUSR2 to ourself. The signal mask is configured such that + // the SIGUSR2 handler will run before this handler returns. +#ifdef __x86_64__ + asm volatile( + "movl %[killnr], %%eax;" + "movl %[pid], %%edi;" + "movl %[tid], %%esi;" + "movl %[sig], %%edx;" + "syscall;" + : + : [ killnr ] "i"(__NR_tgkill), [ pid ] "rm"(pid), [ tid ] "rm"(tid), + [ sig ] "i"(SIGUSR2) + : "rax", "rdi", "rsi", "rdx", + // Clobbered by syscall. + "rcx", "r11"); +#elif __aarch64__ + asm volatile( + "mov x8, %0\n" + "mov x0, %1\n" + "mov x1, %2\n" + "mov x2, %3\n" + "svc #0\n" ::"r"(__NR_tgkill), + "r"(pid), "r"(tid), "r"(SIGUSR2)); +#endif + + // Record value of %xmm0 again to verify that the nested signal handler + // does not clobber it. + GET_FP0(exitxmm[0]); +} + +TEST(FPSigTest, NestedSignals) { + pid = getpid(); + tid = gettid(); + + struct sigaction sa = {}; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = sigusr1; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + sa.sa_sigaction = sigusr2; + ASSERT_THAT(sigaction(SIGUSR2, &sa, nullptr), SyscallSucceeds()); + + // The amd64 ABI specifies that the XMM register set is caller-saved. This + // implies that if there is any function call between SET_XMM and GET_XMM the + // compiler might save/restore xmm0 implicitly. This defeats the entire + // purpose of the test which is to verify that fpstate is restored by + // sigreturn(2). + // + // This is the reason why 'tgkill(getpid(), gettid(), SIGUSR1)' is implemented + // in inline assembly below. + // + // If the OS is broken and registers are clobbered by the signal, using tgkill + // to signal the current thread ensures that this is the clobbered thread. + + uint64_t expected = 0xdeadbeeffacefeed; + SET_FP0(expected); + +#ifdef __x86_64__ + asm volatile( + "movl %[killnr], %%eax;" + "movl %[pid], %%edi;" + "movl %[tid], %%esi;" + "movl %[sig], %%edx;" + "syscall;" + : + : [ killnr ] "i"(__NR_tgkill), [ pid ] "rm"(pid), [ tid ] "rm"(tid), + [ sig ] "i"(SIGUSR1) + : "rax", "rdi", "rsi", "rdx", + // Clobbered by syscall. + "rcx", "r11"); +#elif __aarch64__ + asm volatile( + "mov x8, %0\n" + "mov x0, %1\n" + "mov x1, %2\n" + "mov x2, %3\n" + "svc #0\n" ::"r"(__NR_tgkill), + "r"(pid), "r"(tid), "r"(SIGUSR1)); +#endif + + uint64_t got; + GET_FP0(got); + + // + // The checks below verifies the following: + // - signal handlers must called with a clean fpu state. + // - sigreturn(2) must restore fpstate of the interrupted context. + // + EXPECT_EQ(expected, got); + EXPECT_EQ(entryxmm[0], 0); + EXPECT_EQ(entryxmm[1], 0); + EXPECT_EQ(exitxmm[0], SIGUSR1); + EXPECT_EQ(exitxmm[1], SIGUSR2); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fsync.cc b/test/syscalls/linux/fsync.cc new file mode 100644 index 000000000..e7e057f06 --- /dev/null +++ b/test/syscalls/linux/fsync.cc @@ -0,0 +1,58 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(FsyncTest, TempFileSucceeds) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + const std::string data = "some data to sync"; + EXPECT_THAT(write(fd.get(), data.c_str(), data.size()), + SyscallSucceedsWithValue(data.size())); + EXPECT_THAT(fsync(fd.get()), SyscallSucceeds()); +} + +TEST(FsyncTest, TempDirSucceeds) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY)); + EXPECT_THAT(fsync(fd.get()), SyscallSucceeds()); +} + +TEST(FsyncTest, CannotFsyncOnUnopenedFd) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + int fd; + ASSERT_THAT(fd = open(file.path().c_str(), O_RDONLY), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + + // fd is now invalid. + EXPECT_THAT(fsync(fd), SyscallFailsWithErrno(EBADF)); +} +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/futex.cc b/test/syscalls/linux/futex.cc new file mode 100644 index 000000000..40c80a6e1 --- /dev/null +++ b/test/syscalls/linux/futex.cc @@ -0,0 +1,742 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <linux/futex.h> +#include <linux/types.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <atomic> +#include <memory> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/memory_util.h" +#include "test/util/save_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/time_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Amount of time we wait for threads doing futex_wait to start running before +// doing futex_wake. +constexpr auto kWaiterStartupDelay = absl::Seconds(3); + +// Default timeout for waiters in tests where we expect a futex_wake to be +// ineffective. +constexpr auto kIneffectiveWakeTimeout = absl::Seconds(6); + +static_assert(kWaiterStartupDelay < kIneffectiveWakeTimeout, + "futex_wait will time out before futex_wake is called"); + +int futex_wait(bool priv, std::atomic<int>* uaddr, int val, + absl::Duration timeout = absl::InfiniteDuration()) { + int op = FUTEX_WAIT; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + + if (timeout == absl::InfiniteDuration()) { + return RetryEINTR(syscall)(SYS_futex, uaddr, op, val, nullptr); + } + + // FUTEX_WAIT doesn't adjust the timeout if it returns EINTR, so we have to do + // so. + while (true) { + auto const timeout_ts = absl::ToTimespec(timeout); + MonotonicTimer timer; + timer.Start(); + int const ret = syscall(SYS_futex, uaddr, op, val, &timeout_ts); + if (ret != -1 || errno != EINTR) { + return ret; + } + timeout = std::max(timeout - timer.Duration(), absl::ZeroDuration()); + } +} + +int futex_wait_bitset(bool priv, std::atomic<int>* uaddr, int val, int bitset, + absl::Time deadline = absl::InfiniteFuture()) { + int op = FUTEX_WAIT_BITSET | FUTEX_CLOCK_REALTIME; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + + auto const deadline_ts = absl::ToTimespec(deadline); + return RetryEINTR(syscall)( + SYS_futex, uaddr, op, val, + deadline == absl::InfiniteFuture() ? nullptr : &deadline_ts, nullptr, + bitset); +} + +int futex_wake(bool priv, std::atomic<int>* uaddr, int count) { + int op = FUTEX_WAKE; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + return syscall(SYS_futex, uaddr, op, count); +} + +int futex_wake_bitset(bool priv, std::atomic<int>* uaddr, int count, + int bitset) { + int op = FUTEX_WAKE_BITSET; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + return syscall(SYS_futex, uaddr, op, count, nullptr, nullptr, bitset); +} + +int futex_wake_op(bool priv, std::atomic<int>* uaddr1, std::atomic<int>* uaddr2, + int nwake1, int nwake2, uint32_t sub_op) { + int op = FUTEX_WAKE_OP; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + return syscall(SYS_futex, uaddr1, op, nwake1, nwake2, uaddr2, sub_op); +} + +int futex_lock_pi(bool priv, std::atomic<int>* uaddr) { + int op = FUTEX_LOCK_PI; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + int zero = 0; + if (uaddr->compare_exchange_strong(zero, gettid())) { + return 0; + } + return RetryEINTR(syscall)(SYS_futex, uaddr, op, nullptr, nullptr); +} + +int futex_trylock_pi(bool priv, std::atomic<int>* uaddr) { + int op = FUTEX_TRYLOCK_PI; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + int zero = 0; + if (uaddr->compare_exchange_strong(zero, gettid())) { + return 0; + } + return RetryEINTR(syscall)(SYS_futex, uaddr, op, nullptr, nullptr); +} + +int futex_unlock_pi(bool priv, std::atomic<int>* uaddr) { + int op = FUTEX_UNLOCK_PI; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + int tid = gettid(); + if (uaddr->compare_exchange_strong(tid, 0)) { + return 0; + } + return RetryEINTR(syscall)(SYS_futex, uaddr, op, nullptr, nullptr); +} + +// Fixture for futex tests parameterized by whether to use private or shared +// futexes. +class PrivateAndSharedFutexTest : public ::testing::TestWithParam<bool> { + protected: + bool IsPrivate() const { return GetParam(); } + int PrivateFlag() const { return IsPrivate() ? FUTEX_PRIVATE_FLAG : 0; } +}; + +// FUTEX_WAIT with 0 timeout does not block. +TEST_P(PrivateAndSharedFutexTest, Wait_ZeroTimeout) { + struct timespec timeout = {}; + + // Don't use the futex_wait helper because it adjusts timeout. + int a = 1; + EXPECT_THAT(syscall(SYS_futex, &a, FUTEX_WAIT | PrivateFlag(), a, &timeout), + SyscallFailsWithErrno(ETIMEDOUT)); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_Timeout) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + + MonotonicTimer timer; + timer.Start(); + constexpr absl::Duration kTimeout = absl::Seconds(1); + EXPECT_THAT(futex_wait(IsPrivate(), &a, a, kTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + EXPECT_GE(timer.Duration(), kTimeout); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_BitsetTimeout) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + + MonotonicTimer timer; + timer.Start(); + constexpr absl::Duration kTimeout = absl::Seconds(1); + EXPECT_THAT( + futex_wait_bitset(IsPrivate(), &a, a, 0xffffffff, absl::Now() + kTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + EXPECT_GE(timer.Duration(), kTimeout); +} + +TEST_P(PrivateAndSharedFutexTest, WaitBitset_NegativeTimeout) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + + MonotonicTimer timer; + timer.Start(); + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, a, 0xffffffff, + absl::Now() - absl::Seconds(1)), + SyscallFailsWithErrno(ETIMEDOUT)); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_WrongVal) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + EXPECT_THAT(futex_wait(IsPrivate(), &a, a + 1), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_ZeroBitset) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, a, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(PrivateAndSharedFutexTest, Wake1_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + // Prevent save/restore from interrupting futex_wait, which will cause it to + // return EAGAIN instead of the expected result if futex_wait is restarted + // after we change the value of a below. + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), + SyscallSucceedsWithValue(0)); + }); + absl::SleepFor(kWaiterStartupDelay); + + // Change a so that if futex_wake happens before futex_wait, the latter + // returns EAGAIN instead of hanging the test. + a.fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), &a, 1), SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, Wake0_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + // Prevent save/restore from interrupting futex_wait, which will cause it to + // return EAGAIN instead of the expected result if futex_wait is restarted + // after we change the value of a below. + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), + SyscallSucceedsWithValue(0)); + }); + absl::SleepFor(kWaiterStartupDelay); + + // Change a so that if futex_wake happens before futex_wait, the latter + // returns EAGAIN instead of hanging the test. + a.fetch_add(1); + // The Linux kernel wakes one waiter even if val is 0 or negative. + EXPECT_THAT(futex_wake(IsPrivate(), &a, 0), SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, WakeAll_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + constexpr int kThreads = 5; + std::vector<std::unique_ptr<ScopedThread>> threads; + threads.reserve(kThreads); + for (int i = 0; i < kThreads; i++) { + threads.push_back(absl::make_unique<ScopedThread>([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), + SyscallSucceeds()); + })); + } + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), &a, kThreads), + SyscallSucceedsWithValue(kThreads)); +} + +TEST_P(PrivateAndSharedFutexTest, WakeSome_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + constexpr int kThreads = 5; + constexpr int kWokenThreads = 3; + static_assert(kWokenThreads < kThreads, + "can't wake more threads than are created"); + std::vector<std::unique_ptr<ScopedThread>> threads; + threads.reserve(kThreads); + std::vector<int> rets; + rets.reserve(kThreads); + std::vector<int> errs; + errs.reserve(kThreads); + for (int i = 0; i < kThreads; i++) { + rets.push_back(-1); + errs.push_back(0); + } + for (int i = 0; i < kThreads; i++) { + threads.push_back(absl::make_unique<ScopedThread>([&, i] { + rets[i] = + futex_wait(IsPrivate(), &a, kInitialValue, kIneffectiveWakeTimeout); + errs[i] = errno; + })); + } + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), &a, kWokenThreads), + SyscallSucceedsWithValue(kWokenThreads)); + + int woken = 0; + int timedout = 0; + for (int i = 0; i < kThreads; i++) { + threads[i]->Join(); + if (rets[i] == 0) { + woken++; + } else if (errs[i] == ETIMEDOUT) { + timedout++; + } else { + ADD_FAILURE() << " thread " << i << ": returned " << rets[i] << ", errno " + << errs[i]; + } + } + EXPECT_EQ(woken, kWokenThreads); + EXPECT_EQ(timedout, kThreads - kWokenThreads); +} + +TEST_P(PrivateAndSharedFutexTest, WaitBitset_Wake_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, 0b01001000), + SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), &a, 1), SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_WakeBitset_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, 0b01001000), + SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, WaitBitset_WakeBitsetMatch_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + constexpr int kBitset = 0b01001000; + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, kBitset), + SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, kBitset), + SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, WaitBitset_WakeBitsetNoMatch_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + constexpr int kWaitBitset = 0b01000001; + constexpr int kWakeBitset = 0b00101000; + static_assert((kWaitBitset & kWakeBitset) == 0, + "futex_wake_bitset will wake waiter"); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, kWaitBitset, + absl::Now() + kIneffectiveWakeTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, kWakeBitset), + SyscallSucceedsWithValue(0)); +} + +TEST_P(PrivateAndSharedFutexTest, WakeOpCondSuccess_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + std::atomic<int> b = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread_a([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds()); + }); + ScopedThread thread_b([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &b, kInitialValue), SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + b.fetch_add(1); + // This futex_wake_op should: + // - Wake 1 waiter on a unconditionally. + // - Wake 1 waiter on b if b == kInitialValue + 1, which it is. + // - Do "b += 1". + EXPECT_THAT(futex_wake_op(IsPrivate(), &a, &b, 1, 1, + FUTEX_OP(FUTEX_OP_ADD, 1, FUTEX_OP_CMP_EQ, + (kInitialValue + 1))), + SyscallSucceedsWithValue(2)); + EXPECT_EQ(b, kInitialValue + 2); +} + +TEST_P(PrivateAndSharedFutexTest, WakeOpCondFailure_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + std::atomic<int> b = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread_a([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds()); + }); + ScopedThread thread_b([&] { + EXPECT_THAT( + futex_wait(IsPrivate(), &b, kInitialValue, kIneffectiveWakeTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + b.fetch_add(1); + // This futex_wake_op should: + // - Wake 1 waiter on a unconditionally. + // - Wake 1 waiter on b if b == kInitialValue - 1, which it isn't. + // - Do "b += 1". + EXPECT_THAT(futex_wake_op(IsPrivate(), &a, &b, 1, 1, + FUTEX_OP(FUTEX_OP_ADD, 1, FUTEX_OP_CMP_EQ, + (kInitialValue - 1))), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(b, kInitialValue + 2); +} + +TEST_P(PrivateAndSharedFutexTest, NoWakeInterprocessPrivateAnon_NoRandomSave) { + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr()); + constexpr int kInitialValue = 1; + ptr->store(kInitialValue); + + DisableSave ds; + pid_t const child_pid = fork(); + if (child_pid == 0) { + TEST_PCHECK(futex_wait(IsPrivate(), ptr, kInitialValue, + kIneffectiveWakeTimeout) == -1 && + errno == ETIMEDOUT); + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + absl::SleepFor(kWaiterStartupDelay); + + EXPECT_THAT(futex_wake(IsPrivate(), ptr, 1), SyscallSucceedsWithValue(0)); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST_P(PrivateAndSharedFutexTest, WakeAfterCOWBreak_NoRandomSave) { + // Use a futex on a non-stack mapping so we can be sure that the child process + // below isn't the one that breaks copy-on-write. + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr()); + constexpr int kInitialValue = 1; + ptr->store(kInitialValue); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait(IsPrivate(), ptr, kInitialValue), SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // Wait to be killed by the parent. + while (true) pause(); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + auto cleanup_child = Cleanup([&] { + EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; + }); + + // In addition to preventing a late futex_wait from sleeping, this breaks + // copy-on-write on the mapped page. + ptr->fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), ptr, 1), SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, WakeWrongKind_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT( + futex_wait(IsPrivate(), &a, kInitialValue, kIneffectiveWakeTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + // The value of priv passed to futex_wake is the opposite of that passed to + // the futex_waiter; we expect this not to wake the waiter. + EXPECT_THAT(futex_wake(!IsPrivate(), &a, 1), SyscallSucceedsWithValue(0)); +} + +INSTANTIATE_TEST_SUITE_P(SharedPrivate, PrivateAndSharedFutexTest, + ::testing::Bool()); + +// Passing null as the address only works for private futexes. + +TEST(PrivateFutexTest, WakeOp0Set) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + + int futex_op = FUTEX_OP(FUTEX_OP_SET, 2, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 2); +} + +TEST(PrivateFutexTest, WakeOp0Add) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + int futex_op = FUTEX_OP(FUTEX_OP_ADD, 1, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 2); +} + +TEST(PrivateFutexTest, WakeOp0Or) { + std::atomic<int> a = ATOMIC_VAR_INIT(0b01); + int futex_op = FUTEX_OP(FUTEX_OP_OR, 0b10, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 0b11); +} + +TEST(PrivateFutexTest, WakeOp0Andn) { + std::atomic<int> a = ATOMIC_VAR_INIT(0b11); + int futex_op = FUTEX_OP(FUTEX_OP_ANDN, 0b10, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 0b01); +} + +TEST(PrivateFutexTest, WakeOp0Xor) { + std::atomic<int> a = ATOMIC_VAR_INIT(0b1010); + int futex_op = FUTEX_OP(FUTEX_OP_XOR, 0b1100, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 0b0110); +} + +TEST(SharedFutexTest, WakeInterprocessSharedAnon_NoRandomSave) { + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED)); + auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr()); + constexpr int kInitialValue = 1; + ptr->store(kInitialValue); + + DisableSave ds; + pid_t const child_pid = fork(); + if (child_pid == 0) { + TEST_PCHECK(futex_wait(false, ptr, kInitialValue) == 0); + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + auto kill_child = Cleanup( + [&] { EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); }); + absl::SleepFor(kWaiterStartupDelay); + + ptr->fetch_add(1); + // This is an ASSERT so that if it fails, we immediately abort the test (and + // kill the subprocess). + ASSERT_THAT(futex_wake(false, ptr, 1), SyscallSucceedsWithValue(1)); + + kill_child.Release(); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST(SharedFutexTest, WakeInterprocessFile_NoRandomSave) { + auto const file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_THAT(truncate(file.path().c_str(), kPageSize), SyscallSucceeds()); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0)); + auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr()); + constexpr int kInitialValue = 1; + ptr->store(kInitialValue); + + DisableSave ds; + pid_t const child_pid = fork(); + if (child_pid == 0) { + TEST_PCHECK(futex_wait(false, ptr, kInitialValue) == 0); + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + auto kill_child = Cleanup( + [&] { EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); }); + absl::SleepFor(kWaiterStartupDelay); + + ptr->fetch_add(1); + // This is an ASSERT so that if it fails, we immediately abort the test (and + // kill the subprocess). + ASSERT_THAT(futex_wake(false, ptr, 1), SyscallSucceedsWithValue(1)); + + kill_child.Release(); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST_P(PrivateAndSharedFutexTest, PIBasic) { + std::atomic<int> a = ATOMIC_VAR_INIT(0); + + ASSERT_THAT(futex_lock_pi(IsPrivate(), &a), SyscallSucceeds()); + EXPECT_EQ(a.load(), gettid()); + EXPECT_THAT(futex_lock_pi(IsPrivate(), &a), SyscallFailsWithErrno(EDEADLK)); + + ASSERT_THAT(futex_unlock_pi(IsPrivate(), &a), SyscallSucceeds()); + EXPECT_EQ(a.load(), 0); + EXPECT_THAT(futex_unlock_pi(IsPrivate(), &a), SyscallFailsWithErrno(EPERM)); +} + +TEST_P(PrivateAndSharedFutexTest, PIConcurrency_NoRandomSave) { + DisableSave ds; // Too many syscalls. + + std::atomic<int> a = ATOMIC_VAR_INIT(0); + const bool is_priv = IsPrivate(); + + std::unique_ptr<ScopedThread> threads[100]; + for (size_t i = 0; i < ABSL_ARRAYSIZE(threads); ++i) { + threads[i] = absl::make_unique<ScopedThread>([is_priv, &a] { + for (size_t j = 0; j < 10; ++j) { + ASSERT_THAT(futex_lock_pi(is_priv, &a), SyscallSucceeds()); + EXPECT_EQ(a.load() & FUTEX_TID_MASK, gettid()); + SleepSafe(absl::Milliseconds(5)); + ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds()); + } + }); + } +} + +TEST_P(PrivateAndSharedFutexTest, PIWaiters) { + std::atomic<int> a = ATOMIC_VAR_INIT(0); + const bool is_priv = IsPrivate(); + + ASSERT_THAT(futex_lock_pi(is_priv, &a), SyscallSucceeds()); + EXPECT_EQ(a.load(), gettid()); + + ScopedThread th([is_priv, &a] { + ASSERT_THAT(futex_lock_pi(is_priv, &a), SyscallSucceeds()); + ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds()); + }); + + // Wait until the thread blocks on the futex, setting the waiters bit. + auto start = absl::Now(); + while (a.load() != (FUTEX_WAITERS | gettid())) { + ASSERT_LT(absl::Now() - start, absl::Seconds(5)); + absl::SleepFor(absl::Milliseconds(100)); + } + ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds()); +} + +TEST_P(PrivateAndSharedFutexTest, PITryLock) { + std::atomic<int> a = ATOMIC_VAR_INIT(0); + const bool is_priv = IsPrivate(); + + ASSERT_THAT(futex_trylock_pi(IsPrivate(), &a), SyscallSucceeds()); + EXPECT_EQ(a.load(), gettid()); + + EXPECT_THAT(futex_trylock_pi(is_priv, &a), SyscallFailsWithErrno(EDEADLK)); + ScopedThread th([is_priv, &a] { + EXPECT_THAT(futex_trylock_pi(is_priv, &a), SyscallFailsWithErrno(EAGAIN)); + }); + th.Join(); + + ASSERT_THAT(futex_unlock_pi(IsPrivate(), &a), SyscallSucceeds()); +} + +TEST_P(PrivateAndSharedFutexTest, PITryLockConcurrency_NoRandomSave) { + DisableSave ds; // Too many syscalls. + + std::atomic<int> a = ATOMIC_VAR_INIT(0); + const bool is_priv = IsPrivate(); + + std::unique_ptr<ScopedThread> threads[10]; + for (size_t i = 0; i < ABSL_ARRAYSIZE(threads); ++i) { + threads[i] = absl::make_unique<ScopedThread>([is_priv, &a] { + for (size_t j = 0; j < 10;) { + if (futex_trylock_pi(is_priv, &a) == 0) { + ++j; + EXPECT_EQ(a.load() & FUTEX_TID_MASK, gettid()); + SleepSafe(absl::Milliseconds(5)); + ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds()); + } + } + }); + } +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/getcpu.cc b/test/syscalls/linux/getcpu.cc new file mode 100644 index 000000000..f4d94bd6a --- /dev/null +++ b/test/syscalls/linux/getcpu.cc @@ -0,0 +1,40 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(GetcpuTest, IsValidCpuStress) { + const int num_cpus = NumCPUs(); + absl::Time deadline = absl::Now() + absl::Seconds(10); + while (absl::Now() < deadline) { + int cpu; + ASSERT_THAT(cpu = sched_getcpu(), SyscallSucceeds()); + ASSERT_LT(cpu, num_cpus); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc new file mode 100644 index 000000000..b147d6181 --- /dev/null +++ b/test/syscalls/linux/getdents.cc @@ -0,0 +1,539 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <syscall.h> +#include <unistd.h> + +#include <map> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "test/util/eventfd_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::Contains; +using ::testing::IsEmpty; +using ::testing::IsSupersetOf; +using ::testing::Not; +using ::testing::NotNull; + +namespace gvisor { +namespace testing { + +namespace { + +// New Linux dirent format. +struct linux_dirent64 { + uint64_t d_ino; // Inode number + int64_t d_off; // Offset to next linux_dirent64 + unsigned short d_reclen; // NOLINT, Length of this linux_dirent64 + unsigned char d_type; // NOLINT, File type + char d_name[0]; // Filename (null-terminated) +}; + +// Old Linux dirent format. +struct linux_dirent { + unsigned long d_ino; // NOLINT + unsigned long d_off; // NOLINT + unsigned short d_reclen; // NOLINT + char d_name[0]; +}; + +// Wraps a buffer to provide a set of dirents. +// T is the underlying dirent type. +template <typename T> +class DirentBuffer { + public: + // DirentBuffer manages the buffer. + explicit DirentBuffer(size_t size) + : managed_(true), actual_size_(size), reported_size_(size) { + data_ = new char[actual_size_]; + } + + // The buffer is managed externally. + DirentBuffer(char* data, size_t actual_size, size_t reported_size) + : managed_(false), + data_(data), + actual_size_(actual_size), + reported_size_(reported_size) {} + + ~DirentBuffer() { + if (managed_) { + delete[] data_; + } + } + + T* Data() { return reinterpret_cast<T*>(data_); } + + T* Start(size_t read) { + read_ = read; + if (read_) { + return Data(); + } else { + return nullptr; + } + } + + T* Current() { return reinterpret_cast<T*>(&data_[off_]); } + + T* Next() { + size_t new_off = off_ + Current()->d_reclen; + if (new_off >= read_ || new_off >= actual_size_) { + return nullptr; + } + + off_ = new_off; + return Current(); + } + + size_t Size() { return reported_size_; } + + void Reset() { + off_ = 0; + read_ = 0; + memset(data_, 0, actual_size_); + } + + private: + bool managed_; + char* data_; + size_t actual_size_; + size_t reported_size_; + + size_t off_ = 0; + + size_t read_ = 0; +}; + +// Test for getdents/getdents64. +// T is the Linux dirent type. +template <typename T> +class GetdentsTest : public ::testing::Test { + public: + using LinuxDirentType = T; + using DirentBufferType = DirentBuffer<T>; + + protected: + void SetUp() override { + dir_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(dir_.path(), O_RDONLY | O_DIRECTORY)); + } + + // Must be overridden with explicit specialization. See below. + int SyscallNum(); + + int Getdents(LinuxDirentType* dirp, unsigned int count) { + return RetryEINTR(syscall)(SyscallNum(), fd_.get(), dirp, count); + } + + // Fill directory with num files, named by number starting at 0. + void FillDirectory(size_t num) { + for (size_t i = 0; i < num; i++) { + auto name = JoinPath(dir_.path(), absl::StrCat(i)); + TEST_CHECK(CreateWithContents(name, "").ok()); + } + } + + // Fill directory with a given list of filenames. + void FillDirectoryWithFiles(const std::vector<std::string>& filenames) { + for (const auto& filename : filenames) { + auto name = JoinPath(dir_.path(), filename); + TEST_CHECK(CreateWithContents(name, "").ok()); + } + } + + // Seek to the start of the directory. + PosixError SeekStart() { + constexpr off_t kStartOfFile = 0; + off_t offset = lseek(fd_.get(), kStartOfFile, SEEK_SET); + if (offset < 0) { + return PosixError(errno, absl::StrCat("error seeking to ", kStartOfFile)); + } + if (offset != kStartOfFile) { + return PosixError(EINVAL, absl::StrCat("tried to seek to ", kStartOfFile, + " but got ", offset)); + } + return NoError(); + } + + // Call getdents multiple times, reading all dirents and calling f on each. + // f has the type signature PosixError f(T*). + // If f returns a non-OK error, so does ReadDirents. + template <typename F> + PosixError ReadDirents(DirentBufferType* dirents, F const& f) { + int n; + do { + dirents->Reset(); + + n = Getdents(dirents->Data(), dirents->Size()); + MaybeSave(); + if (n < 0) { + return PosixError(errno, "getdents"); + } + + for (auto d = dirents->Start(n); d; d = dirents->Next()) { + RETURN_IF_ERRNO(f(d)); + } + } while (n > 0); + + return NoError(); + } + + // Call Getdents successively and count all entries. + int ReadAndCountAllEntries(DirentBufferType* dirents) { + int found = 0; + + EXPECT_NO_ERRNO(ReadDirents(dirents, [&](LinuxDirentType* d) { + found++; + return NoError(); + })); + + return found; + } + + private: + TempPath dir_; + FileDescriptor fd_; +}; + +// Multiple template parameters are not allowed, so we must use explicit +// template specialization to set the syscall number. + +// SYS_getdents isn't defined on arm64. +#ifdef __x86_64__ +template <> +int GetdentsTest<struct linux_dirent>::SyscallNum() { + return SYS_getdents; +} +#endif + +template <> +int GetdentsTest<struct linux_dirent64>::SyscallNum() { + return SYS_getdents64; +} + +#ifdef __x86_64__ +// Test both legacy getdents and getdents64 on x86_64. +typedef ::testing::Types<struct linux_dirent, struct linux_dirent64> + GetdentsTypes; +#elif __aarch64__ +// Test only getdents64 on arm64. +typedef ::testing::Types<struct linux_dirent64> GetdentsTypes; +#endif +TYPED_TEST_SUITE(GetdentsTest, GetdentsTypes); + +// N.B. TYPED_TESTs require explicitly using this-> to access members of +// GetdentsTest, since we are inside of a derived class template. + +TYPED_TEST(GetdentsTest, VerifyEntries) { + typename TestFixture::DirentBufferType dirents(1024); + + this->FillDirectory(2); + + // Map of all the entries we expect to find. + std::map<std::string, bool> found; + found["."] = false; + found[".."] = false; + found["0"] = false; + found["1"] = false; + + EXPECT_NO_ERRNO(this->ReadDirents( + &dirents, [&](typename TestFixture::LinuxDirentType* d) { + auto kv = found.find(d->d_name); + EXPECT_NE(kv, found.end()) << "Unexpected file: " << d->d_name; + if (kv != found.end()) { + EXPECT_FALSE(kv->second); + } + found[d->d_name] = true; + return NoError(); + })); + + for (auto& kv : found) { + EXPECT_TRUE(kv.second) << "File not found: " << kv.first; + } +} + +TYPED_TEST(GetdentsTest, VerifyPadding) { + typename TestFixture::DirentBufferType dirents(1024); + + // Create files with names of length 1 through 16. + std::vector<std::string> files; + std::string filename; + for (int i = 0; i < 16; ++i) { + absl::StrAppend(&filename, "a"); + files.push_back(filename); + } + this->FillDirectoryWithFiles(files); + + // We expect to find all the files, plus '.' and '..'. + const int expect_found = 2 + files.size(); + int found = 0; + + EXPECT_NO_ERRNO(this->ReadDirents( + &dirents, [&](typename TestFixture::LinuxDirentType* d) { + EXPECT_EQ(d->d_reclen % 8, 0) + << "Dirent " << d->d_name + << " had reclen that was not byte aligned: " << d->d_name; + found++; + return NoError(); + })); + + // Make sure we found all the files. + EXPECT_EQ(found, expect_found); +} + +// For a small directory, the provided buffer should be large enough +// for all entries. +TYPED_TEST(GetdentsTest, SmallDir) { + // . and .. should be in an otherwise empty directory. + int expect = 2; + + // Add some actual files. + this->FillDirectory(2); + expect += 2; + + typename TestFixture::DirentBufferType dirents(256); + + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); +} + +// A directory with lots of files requires calling getdents multiple times. +TYPED_TEST(GetdentsTest, LargeDir) { + // . and .. should be in an otherwise empty directory. + int expect = 2; + + // Add some actual files. + this->FillDirectory(100); + expect += 100; + + typename TestFixture::DirentBufferType dirents(256); + + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); +} + +// If we lie about the size of the buffer, we should still be able to read the +// entries with the available space. +TYPED_TEST(GetdentsTest, PartialBuffer) { + // . and .. should be in an otherwise empty directory. + int expect = 2; + + // Add some actual files. + this->FillDirectory(100); + expect += 100; + + void* addr = mmap(0, 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ASSERT_NE(addr, MAP_FAILED); + + char* buf = reinterpret_cast<char*>(addr); + + // Guard page + EXPECT_THAT( + mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, PROT_NONE), + SyscallSucceeds()); + + // Limit space in buf to 256 bytes. + buf += kPageSize - 256; + + // Lie about the buffer. Even though we claim the buffer is 1 page, + // we should still get all of the dirents in the first 256 bytes. + typename TestFixture::DirentBufferType dirents(buf, 256, kPageSize); + + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); + + EXPECT_THAT(munmap(addr, 2 * kPageSize), SyscallSucceeds()); +} + +// Open many file descriptors, then scan through /proc/self/fd to find and close +// them all. (The latter is commonly used to handle races between fork/execve +// and the creation of unwanted non-O_CLOEXEC file descriptors.) This tests that +// getdents iterates correctly despite mutation of /proc/self/fd. +TYPED_TEST(GetdentsTest, ProcSelfFd) { + constexpr size_t kNfds = 10; + std::unordered_map<int, FileDescriptor> fds; + fds.reserve(kNfds); + for (size_t i = 0; i < kNfds; i++) { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + fds.emplace(fd.get(), std::move(fd)); + } + + const FileDescriptor proc_self_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/fd", O_RDONLY | O_DIRECTORY)); + + // Make the buffer very small since we want to iterate. + typename TestFixture::DirentBufferType dirents( + 2 * sizeof(typename TestFixture::LinuxDirentType)); + std::unordered_set<int> prev_fds; + while (true) { + dirents.Reset(); + int rv; + ASSERT_THAT(rv = RetryEINTR(syscall)(this->SyscallNum(), proc_self_fd.get(), + dirents.Data(), dirents.Size()), + SyscallSucceeds()); + if (rv == 0) { + break; + } + for (auto* d = dirents.Start(rv); d; d = dirents.Next()) { + int dfd; + if (!absl::SimpleAtoi(d->d_name, &dfd)) continue; + EXPECT_TRUE(prev_fds.insert(dfd).second) + << "Repeated observation of /proc/self/fd/" << dfd; + fds.erase(dfd); + } + } + + // Check that we closed every fd. + EXPECT_THAT(fds, ::testing::IsEmpty()); +} + +// Test that getdents returns ENOTDIR when called on a file. +TYPED_TEST(GetdentsTest, NotDir) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + typename TestFixture::DirentBufferType dirents(256); + EXPECT_THAT(RetryEINTR(syscall)(this->SyscallNum(), fd.get(), dirents.Data(), + dirents.Size()), + SyscallFailsWithErrno(ENOTDIR)); +} + +// Test that SEEK_SET to 0 causes getdents to re-read the entries. +TYPED_TEST(GetdentsTest, SeekResetsCursor) { + // . and .. should be in an otherwise empty directory. + int expect = 2; + + // Add some files to the directory. + this->FillDirectory(10); + expect += 10; + + typename TestFixture::DirentBufferType dirents(256); + + // We should get all the expected entries. + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); + + // Seek back to 0. + ASSERT_NO_ERRNO(this->SeekStart()); + + // We should get all the expected entries again. + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); +} + +// Test that getdents() after SEEK_END succeeds. +// This is a regression test for #128. +TYPED_TEST(GetdentsTest, Issue128ProcSeekEnd) { + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self", O_RDONLY | O_DIRECTORY)); + typename TestFixture::DirentBufferType dirents(256); + + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(syscall)(this->SyscallNum(), fd.get(), dirents.Data(), + dirents.Size()), + SyscallSucceeds()); +} + +// Some tests using the glibc readdir interface. +TEST(ReaddirTest, OpenDir) { + DIR* dev; + ASSERT_THAT(dev = opendir("/dev"), NotNull()); + EXPECT_THAT(closedir(dev), SyscallSucceeds()); +} + +TEST(ReaddirTest, RootContainsBasicDirectories) { + EXPECT_THAT(ListDir("/", true), + IsPosixErrorOkAndHolds(IsSupersetOf( + {"bin", "dev", "etc", "lib", "proc", "sbin", "usr"}))); +} + +TEST(ReaddirTest, Bug24096713Dev) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev", true)); + EXPECT_THAT(contents, Not(IsEmpty())); +} + +TEST(ReaddirTest, Bug24096713ProcTid) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE( + ListDir(absl::StrCat("/proc/", syscall(SYS_gettid), "/"), true)); + EXPECT_THAT(contents, Not(IsEmpty())); +} + +TEST(ReaddirTest, Bug33429925Proc) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc", true)); + EXPECT_THAT(contents, Not(IsEmpty())); +} + +TEST(ReaddirTest, Bug35110122Root) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/", true)); + EXPECT_THAT(contents, Not(IsEmpty())); +} + +// Unlink should invalidate getdents cache. +TEST(ReaddirTest, GoneAfterRemoveCache) { + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + std::string name = std::string(Basename(file.path())); + + auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), true)); + EXPECT_THAT(contents, Contains(name)); + + file.reset(); + + contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), true)); + EXPECT_THAT(contents, Not(Contains(name))); +} + +// Regression test for b/137398511. Rename should invalidate getdents cache. +TEST(ReaddirTest, GoneAfterRenameCache) { + TempPath src = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath dst = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(src.path())); + std::string name = std::string(Basename(file.path())); + + auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(src.path(), true)); + EXPECT_THAT(contents, Contains(name)); + + ASSERT_THAT(rename(file.path().c_str(), JoinPath(dst.path(), name).c_str()), + SyscallSucceeds()); + // Release file since it was renamed. dst cleanup will ultimately delete it. + file.release(); + + contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(src.path(), true)); + EXPECT_THAT(contents, Not(Contains(name))); + + contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dst.path(), true)); + EXPECT_THAT(contents, Contains(name)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/getrandom.cc b/test/syscalls/linux/getrandom.cc new file mode 100644 index 000000000..f87cdd7a1 --- /dev/null +++ b/test/syscalls/linux/getrandom.cc @@ -0,0 +1,63 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#ifndef SYS_getrandom +#if defined(__x86_64__) +#define SYS_getrandom 318 +#elif defined(__i386__) +#define SYS_getrandom 355 +#elif defined(__aarch64__) +#define SYS_getrandom 278 +#else +#error "Unknown architecture" +#endif +#endif // SYS_getrandom + +bool SomeByteIsNonZero(char* random_bytes, int length) { + for (int i = 0; i < length; i++) { + if (random_bytes[i] != 0) { + return true; + } + } + return false; +} + +TEST(GetrandomTest, IsRandom) { + // This test calls get_random and makes sure that the array is filled in with + // something that is non-zero. Perhaps we get back \x00\x00\x00\x00\x00.... as + // a random result, but it's so unlikely that we'll just ignore this. + char random_bytes[64] = {}; + int n = syscall(SYS_getrandom, random_bytes, 64, 0); + SKIP_IF(!IsRunningOnGvisor() && n < 0 && errno == ENOSYS); + EXPECT_THAT(n, SyscallSucceeds()); + EXPECT_GT(n, 0); // Some bytes should be returned. + EXPECT_TRUE(SomeByteIsNonZero(random_bytes, n)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/getrusage.cc b/test/syscalls/linux/getrusage.cc new file mode 100644 index 000000000..0e51d42a8 --- /dev/null +++ b/test/syscalls/linux/getrusage.cc @@ -0,0 +1,177 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(GetrusageTest, BasicFork) { + pid_t pid = fork(); + if (pid == 0) { + struct rusage rusage_self; + TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0); + struct rusage rusage_children; + TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0); + // The child has consumed some memory. + TEST_CHECK(rusage_self.ru_maxrss != 0); + // The child has no children of its own. + TEST_CHECK(rusage_children.ru_maxrss == 0); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds()); + struct rusage rusage_self; + ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds()); + struct rusage rusage_children; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds()); + // The parent has consumed some memory. + EXPECT_GT(rusage_self.ru_maxrss, 0); + // The child has consumed some memory, and because it has exited we can get + // its max RSS. + EXPECT_GT(rusage_children.ru_maxrss, 0); +} + +// Verifies that a process can get the max resident set size of its grandchild, +// i.e. that maxrss propagates correctly from children to waiting parents. +TEST(GetrusageTest, Grandchild) { + constexpr int kGrandchildSizeKb = 1024; + pid_t pid = fork(); + if (pid == 0) { + pid = fork(); + if (pid == 0) { + int flags = MAP_ANONYMOUS | MAP_POPULATE | MAP_PRIVATE; + void* addr = + mmap(nullptr, kGrandchildSizeKb * 1024, PROT_WRITE, flags, -1, 0); + TEST_PCHECK(addr != MAP_FAILED); + } else { + int status; + TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0) == pid); + } + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds()); + struct rusage rusage_self; + ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds()); + struct rusage rusage_children; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds()); + // The parent has consumed some memory. + EXPECT_GT(rusage_self.ru_maxrss, 0); + // The child should consume next to no memory, but the grandchild will + // consume at least 1MB. Verify that usage bubbles up to the grandparent. + EXPECT_GT(rusage_children.ru_maxrss, kGrandchildSizeKb); +} + +// Verifies that processes ignoring SIGCHLD do not have updated child maxrss +// updated. +TEST(GetrusageTest, IgnoreSIGCHLD) { + struct sigaction sa; + sa.sa_handler = SIG_IGN; + sa.sa_flags = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa)); + pid_t pid = fork(); + if (pid == 0) { + struct rusage rusage_self; + TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0); + // The child has consumed some memory. + TEST_CHECK(rusage_self.ru_maxrss != 0); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallFailsWithErrno(ECHILD)); + struct rusage rusage_self; + ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds()); + struct rusage rusage_children; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds()); + // The parent has consumed some memory. + EXPECT_GT(rusage_self.ru_maxrss, 0); + // The child's maxrss should not have propagated up. + EXPECT_EQ(rusage_children.ru_maxrss, 0); +} + +// Verifies that zombie processes do not update their parent's maxrss. Only +// reaped processes should do this. +TEST(GetrusageTest, IgnoreZombie) { + pid_t pid = fork(); + if (pid == 0) { + struct rusage rusage_self; + TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0); + struct rusage rusage_children; + TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0); + // The child has consumed some memory. + TEST_CHECK(rusage_self.ru_maxrss != 0); + // The child has no children of its own. + TEST_CHECK(rusage_children.ru_maxrss == 0); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + // Give the child time to exit. Because we don't call wait, the child should + // remain a zombie. + absl::SleepFor(absl::Seconds(5)); + struct rusage rusage_self; + ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds()); + struct rusage rusage_children; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds()); + // The parent has consumed some memory. + EXPECT_GT(rusage_self.ru_maxrss, 0); + // The child has consumed some memory, but hasn't been reaped. + EXPECT_EQ(rusage_children.ru_maxrss, 0); +} + +TEST(GetrusageTest, Wait4) { + pid_t pid = fork(); + if (pid == 0) { + struct rusage rusage_self; + TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0); + struct rusage rusage_children; + TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0); + // The child has consumed some memory. + TEST_CHECK(rusage_self.ru_maxrss != 0); + // The child has no children of its own. + TEST_CHECK(rusage_children.ru_maxrss == 0); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + struct rusage rusage_children; + int status; + ASSERT_THAT(RetryEINTR(wait4)(pid, &status, 0, &rusage_children), + SyscallSucceeds()); + // The child has consumed some memory, and because it has exited we can get + // its max RSS. + EXPECT_GT(rusage_children.ru_maxrss, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc new file mode 100644 index 000000000..220874aeb --- /dev/null +++ b/test/syscalls/linux/inotify.cc @@ -0,0 +1,2380 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <libgen.h> +#include <sched.h> +#include <sys/epoll.h> +#include <sys/inotify.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/xattr.h> + +#include <atomic> +#include <list> +#include <string> +#include <vector> + +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/epoll_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using ::absl::StreamFormat; +using ::absl::StrFormat; + +constexpr int kBufSize = 1024; + +// C++-friendly version of struct inotify_event. +struct Event { + int32_t wd; + uint32_t mask; + uint32_t cookie; + uint32_t len; + std::string name; + + Event(uint32_t mask, int32_t wd, absl::string_view name, uint32_t cookie) + : wd(wd), + mask(mask), + cookie(cookie), + len(name.size()), + name(std::string(name)) {} + Event(uint32_t mask, int32_t wd, absl::string_view name) + : Event(mask, wd, name, 0) {} + Event(uint32_t mask, int32_t wd) : Event(mask, wd, "", 0) {} + Event() : Event(0, 0, "", 0) {} +}; + +// Prints the symbolic name for a struct inotify_event's 'mask' field. +std::string FlagString(uint32_t flags) { + std::vector<std::string> names; + +#define EMIT(target) \ + if (flags & target) { \ + names.push_back(#target); \ + flags &= ~target; \ + } + + EMIT(IN_ACCESS); + EMIT(IN_ATTRIB); + EMIT(IN_CLOSE_WRITE); + EMIT(IN_CLOSE_NOWRITE); + EMIT(IN_CREATE); + EMIT(IN_DELETE); + EMIT(IN_DELETE_SELF); + EMIT(IN_MODIFY); + EMIT(IN_MOVE_SELF); + EMIT(IN_MOVED_FROM); + EMIT(IN_MOVED_TO); + EMIT(IN_OPEN); + + EMIT(IN_DONT_FOLLOW); + EMIT(IN_EXCL_UNLINK); + EMIT(IN_ONESHOT); + EMIT(IN_ONLYDIR); + + EMIT(IN_IGNORED); + EMIT(IN_ISDIR); + EMIT(IN_Q_OVERFLOW); + EMIT(IN_UNMOUNT); + +#undef EMIT + + // If we have anything left over at the end, print it as a hex value. + if (flags) { + names.push_back(absl::StrCat("0x", absl::Hex(flags))); + } + + return absl::StrJoin(names, "|"); +} + +std::string DumpEvent(const Event& event) { + return StrFormat( + "%s, wd=%d%s%s", FlagString(event.mask), event.wd, + (event.len > 0) ? StrFormat(", name=%s", event.name) : "", + (event.cookie > 0) ? StrFormat(", cookie=%ud", event.cookie) : ""); +} + +std::string DumpEvents(const std::vector<Event>& events, int indent_level) { + std::stringstream ss; + ss << StreamFormat("%d event%s:\n", events.size(), + (events.size() > 1) ? "s" : ""); + int i = 0; + for (const Event& ev : events) { + ss << StreamFormat("%sevents[%d]: %s\n", std::string(indent_level, '\t'), + i++, DumpEvent(ev)); + } + return ss.str(); +} + +// A matcher which takes an expected list of events to match against another +// list of inotify events, in order. This is similar to the ElementsAre matcher, +// but displays more informative messages on mismatch. +class EventsAreMatcher + : public ::testing::MatcherInterface<std::vector<Event>> { + public: + explicit EventsAreMatcher(std::vector<Event> references) + : references_(std::move(references)) {} + + bool MatchAndExplain( + std::vector<Event> events, + ::testing::MatchResultListener* const listener) const override { + if (references_.size() != events.size()) { + *listener << StreamFormat("\n\tCount mismatch, got %s", + DumpEvents(events, 2)); + return false; + } + + bool success = true; + for (unsigned int i = 0; i < references_.size(); ++i) { + const Event& reference = references_[i]; + const Event& target = events[i]; + + if (target.mask != reference.mask || target.wd != reference.wd || + target.name != reference.name || target.cookie != reference.cookie) { + *listener << StreamFormat("\n\tMismatch at index %d, want %s, got %s,", + i, DumpEvent(reference), DumpEvent(target)); + success = false; + } + } + + if (!success) { + *listener << StreamFormat("\n\tIn total of %s", DumpEvents(events, 2)); + } + return success; + } + + void DescribeTo(::std::ostream* const os) const override { + *os << StreamFormat("%s", DumpEvents(references_, 1)); + } + + void DescribeNegationTo(::std::ostream* const os) const override { + *os << StreamFormat("mismatch from %s", DumpEvents(references_, 1)); + } + + private: + std::vector<Event> references_; +}; + +::testing::Matcher<std::vector<Event>> Are(std::vector<Event> events) { + return MakeMatcher(new EventsAreMatcher(std::move(events))); +} + +// Similar to the EventsAre matcher, but the order of events are ignored. +class UnorderedEventsAreMatcher + : public ::testing::MatcherInterface<std::vector<Event>> { + public: + explicit UnorderedEventsAreMatcher(std::vector<Event> references) + : references_(std::move(references)) {} + + bool MatchAndExplain( + std::vector<Event> events, + ::testing::MatchResultListener* const listener) const override { + if (references_.size() != events.size()) { + *listener << StreamFormat("\n\tCount mismatch, got %s", + DumpEvents(events, 2)); + return false; + } + + std::vector<Event> unmatched(references_); + + for (const Event& candidate : events) { + for (auto it = unmatched.begin(); it != unmatched.end();) { + const Event& reference = *it; + if (candidate.mask == reference.mask && candidate.wd == reference.wd && + candidate.name == reference.name && + candidate.cookie == reference.cookie) { + it = unmatched.erase(it); + break; + } else { + ++it; + } + } + } + + // Anything left unmatched? If so, the matcher fails. + if (!unmatched.empty()) { + *listener << StreamFormat("\n\tFailed to match %s", + DumpEvents(unmatched, 2)); + *listener << StreamFormat("\n\tIn total of %s", DumpEvents(events, 2)); + return false; + } + + return true; + } + + void DescribeTo(::std::ostream* const os) const override { + *os << StreamFormat("unordered %s", DumpEvents(references_, 1)); + } + + void DescribeNegationTo(::std::ostream* const os) const override { + *os << StreamFormat("mismatch from unordered %s", + DumpEvents(references_, 1)); + } + + private: + std::vector<Event> references_; +}; + +::testing::Matcher<std::vector<Event>> AreUnordered(std::vector<Event> events) { + return MakeMatcher(new UnorderedEventsAreMatcher(std::move(events))); +} + +// Reads events from an inotify fd until either EOF, or read returns EAGAIN. +PosixErrorOr<std::vector<Event>> DrainEvents(int fd) { + std::vector<Event> events; + while (true) { + int events_size = 0; + if (ioctl(fd, FIONREAD, &events_size) < 0) { + return PosixError(errno, "ioctl(FIONREAD) failed on inotify fd"); + } + // Deliberately use a buffer that is larger than necessary, expecting to + // only read events_size bytes. + std::vector<char> buf(events_size + kBufSize, 0); + const ssize_t readlen = read(fd, buf.data(), buf.size()); + MaybeSave(); + // Read error? + if (readlen < 0) { + if (errno == EAGAIN) { + // If EAGAIN, no more events at the moment. Return what we have so far. + return events; + } + // Some other read error. Return an error. Right now if we encounter this + // after already reading some events, they get lost. However, we don't + // expect to see any error, and the calling test will fail immediately if + // we signal an error anyways, so this is acceptable. + return PosixError(errno, "read() failed on inotify fd"); + } + if (readlen < static_cast<int>(sizeof(struct inotify_event))) { + // Impossibly short read. + return PosixError( + EIO, + "read() didn't return enough data represent even a single event"); + } + if (readlen != events_size) { + return PosixError(EINVAL, absl::StrCat("read ", readlen, + " bytes, expected ", events_size)); + } + if (readlen == 0) { + // EOF. + return events; + } + + // Normal read. + const char* cursor = buf.data(); + while (cursor < (buf.data() + readlen)) { + struct inotify_event event = {}; + memcpy(&event, cursor, sizeof(struct inotify_event)); + + Event ev; + ev.wd = event.wd; + ev.mask = event.mask; + ev.cookie = event.cookie; + ev.len = event.len; + if (event.len > 0) { + TEST_CHECK(static_cast<int>(sizeof(struct inotify_event) + event.len) <= + readlen); + ev.name = std::string(cursor + + offsetof(struct inotify_event, name)); // NOLINT + // Name field should always be smaller than event.len, otherwise we have + // a buffer overflow. The two sizes aren't equal because the string + // constructor will stop at the first null byte, while event.name may be + // padded up to event.len using multiple null bytes. + TEST_CHECK(ev.name.size() <= event.len); + } + + events.push_back(ev); + cursor += sizeof(struct inotify_event) + event.len; + } + } +} + +PosixErrorOr<FileDescriptor> InotifyInit1(int flags) { + int fd; + EXPECT_THAT(fd = inotify_init1(flags), SyscallSucceeds()); + if (fd < 0) { + return PosixError(errno, "inotify_init1() failed"); + } + return FileDescriptor(fd); +} + +PosixErrorOr<int> InotifyAddWatch(int fd, const std::string& path, + uint32_t mask) { + int wd; + EXPECT_THAT(wd = inotify_add_watch(fd, path.c_str(), mask), + SyscallSucceeds()); + if (wd < 0) { + return PosixError(errno, "inotify_add_watch() failed"); + } + return wd; +} + +TEST(Inotify, IllegalSeek) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); +} + +TEST(Inotify, IllegalPread) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + int val; + EXPECT_THAT(pread(fd.get(), &val, sizeof(val), 0), + SyscallFailsWithErrno(ESPIPE)); +} + +TEST(Inotify, IllegalPwrite) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + EXPECT_THAT(pwrite(fd.get(), "x", 1, 0), SyscallFailsWithErrno(ESPIPE)); +} + +TEST(Inotify, IllegalWrite) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + int val = 0; + EXPECT_THAT(write(fd.get(), &val, sizeof(val)), SyscallFailsWithErrno(EBADF)); +} + +TEST(Inotify, InitFlags) { + EXPECT_THAT(inotify_init1(IN_NONBLOCK | IN_CLOEXEC), SyscallSucceeds()); + EXPECT_THAT(inotify_init1(12345), SyscallFailsWithErrno(EINVAL)); +} + +TEST(Inotify, NonBlockingReadReturnsEagain) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + std::vector<char> buf(kBufSize, 0); + + // The read below should return fail with EAGAIN because there is no data to + // read and we've specified IN_NONBLOCK. We're guaranteed that there is no + // data to read because we haven't registered any watches yet. + EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(Inotify, AddWatchOnInvalidFdFails) { + // Garbage fd. + EXPECT_THAT(inotify_add_watch(-1, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(inotify_add_watch(1337, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EBADF)); + + // Non-inotify fds. + EXPECT_THAT(inotify_add_watch(0, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(inotify_add_watch(1, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(inotify_add_watch(2, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EINVAL)); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/tmp", O_RDONLY)); + EXPECT_THAT(inotify_add_watch(fd.get(), "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(Inotify, RemovingWatchGeneratesEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds()); + + // Read events, ensure the first event is IN_IGNORED. + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_IGNORED, wd)})); +} + +TEST(Inotify, CanDeleteFileAfterRemovingWatch) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds()); + file1.reset(); +} + +TEST(Inotify, RemoveWatchAfterDeletingFileFails) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + file1.reset(); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd), Event(IN_DELETE_SELF, wd), + Event(IN_IGNORED, wd)})); + + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallFailsWithErrno(EINVAL)); +} + +TEST(Inotify, DuplicateWatchRemovalFails) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds()); + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallFailsWithErrno(EINVAL)); +} + +TEST(Inotify, ConcurrentFileDeletionAndWatchRemoval) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const std::string filename = NewTempAbsPathInDir(root.path()); + + auto file_create_delete = [filename]() { + const DisableSave ds; // Too expensive. + for (int i = 0; i < 100; ++i) { + FileDescriptor file_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT, S_IRUSR | S_IWUSR)); + file_fd.reset(); // Close before unlinking (although save is disabled). + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); + } + }; + + const int shared_fd = fd.get(); // We need to pass it to the thread. + auto add_remove_watch = [shared_fd, filename]() { + for (int i = 0; i < 100; ++i) { + int wd = inotify_add_watch(shared_fd, filename.c_str(), IN_ALL_EVENTS); + MaybeSave(); + if (wd != -1) { + // Watch added successfully, try removal. + if (inotify_rm_watch(shared_fd, wd)) { + // If removal fails, the only acceptable reason is if the wd + // is invalid, which will be the case if we try to remove + // the watch after the file has been deleted. + EXPECT_EQ(errno, EINVAL); + } + } else { + // Add watch failed, this should only fail if the target file doesn't + // exist. + EXPECT_EQ(errno, ENOENT); + } + } + }; + + ScopedThread t1(file_create_delete); + ScopedThread t2(add_remove_watch); +} + +TEST(Inotify, DeletingChildGeneratesEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + const std::string file1_path = file1.reset(); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + AreUnordered({Event(IN_ATTRIB, file1_wd), Event(IN_DELETE_SELF, file1_wd), + Event(IN_IGNORED, file1_wd), + Event(IN_DELETE, root_wd, Basename(file1_path))})); +} + +// Creating a file in "parent/child" should generate events for child, but not +// parent. +TEST(Inotify, CreatingFileGeneratesEvents) { + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath child = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), parent.path(), IN_ALL_EVENTS)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), child.path(), IN_ALL_EVENTS)); + + // Create a new file in the directory. + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(child.path())); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + // The library function we use to create the new file opens it for writing to + // create it and sets permissions on it, so we expect the three extra events. + ASSERT_THAT(events, Are({Event(IN_CREATE, wd, Basename(file1.path())), + Event(IN_OPEN, wd, Basename(file1.path())), + Event(IN_CLOSE_WRITE, wd, Basename(file1.path())), + Event(IN_ATTRIB, wd, Basename(file1.path()))})); +} + +TEST(Inotify, ReadingFileGeneratesAccessEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + char buf; + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ACCESS, wd, Basename(file1.path()))})); +} + +TEST(Inotify, WritingFileGeneratesModifyEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + const std::string data = "some content"; + EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()), + SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_MODIFY, wd, Basename(file1.path()))})); +} + +TEST(Inotify, SizeZeroReadWriteGeneratesNothing) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + // Read from the empty file. + int val; + ASSERT_THAT(read(file1_fd.get(), &val, sizeof(val)), + SyscallSucceedsWithValue(0)); + + // Write zero bytes. + ASSERT_THAT(write(file1_fd.get(), "", 0), SyscallSucceedsWithValue(0)); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({})); +} + +TEST(Inotify, FailedFileCreationGeneratesNoEvents) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string dir_path = dir.path(); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(fd.get(), dir_path, IN_ALL_EVENTS)); + + const char* p = dir_path.c_str(); + ASSERT_THAT(mkdir(p, 0777), SyscallFails()); + ASSERT_THAT(mknod(p, S_IFIFO, 0777), SyscallFails()); + ASSERT_THAT(symlink(p, p), SyscallFails()); + ASSERT_THAT(link(p, p), SyscallFails()); + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({})); +} + +TEST(Inotify, WatchSetAfterOpenReportsCloseFdEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + FileDescriptor file1_fd_writable = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + FileDescriptor file1_fd_not_writable = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + file1_fd_writable.reset(); // Close file1_fd_writable. + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_CLOSE_WRITE, wd, Basename(file1.path()))})); + + file1_fd_not_writable.reset(); // Close file1_fd_not_writable. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, + Are({Event(IN_CLOSE_NOWRITE, wd, Basename(file1.path()))})); +} + +TEST(Inotify, ChildrenDeletionInWatchedDirGeneratesEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + TempPath dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + const std::string file1_path = file1.reset(); + const std::string dir1_path = dir1.release(); + EXPECT_THAT(rmdir(dir1_path.c_str()), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + ASSERT_THAT(events, + Are({Event(IN_DELETE, wd, Basename(file1_path)), + Event(IN_DELETE | IN_ISDIR, wd, Basename(dir1_path))})); +} + +TEST(Inotify, RmdirOnWatchedTargetGeneratesEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + EXPECT_THAT(rmdir(root.path().c_str()), SyscallSucceeds()); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_DELETE_SELF, wd), Event(IN_IGNORED, wd)})); +} + +TEST(Inotify, MoveGeneratesEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const TempPath dir1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + const TempPath dir2 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int dir1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), dir1.path(), IN_ALL_EVENTS)); + const int dir2_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), dir2.path(), IN_ALL_EVENTS)); + // Test move from root -> root. + std::string newpath = NewTempAbsPathInDir(root.path()); + std::string oldpath = file1.release(); + EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds()); + file1.reset(newpath); + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie), + Event(IN_MOVED_TO, root_wd, Basename(newpath), events[1].cookie)})); + EXPECT_NE(events[0].cookie, 0); + EXPECT_EQ(events[0].cookie, events[1].cookie); + uint32_t last_cookie = events[0].cookie; + + // Test move from root -> root/dir1. + newpath = NewTempAbsPathInDir(dir1.path()); + oldpath = file1.release(); + EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds()); + file1.reset(newpath); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie), + Event(IN_MOVED_TO, dir1_wd, Basename(newpath), events[1].cookie)})); + // Cookies should be distinct between distinct rename events. + EXPECT_NE(events[0].cookie, last_cookie); + EXPECT_EQ(events[0].cookie, events[1].cookie); + last_cookie = events[0].cookie; + + // Test move from root/dir1 -> root/dir2. + newpath = NewTempAbsPathInDir(dir2.path()); + oldpath = file1.release(); + EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds()); + file1.reset(newpath); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_MOVED_FROM, dir1_wd, Basename(oldpath), events[0].cookie), + Event(IN_MOVED_TO, dir2_wd, Basename(newpath), events[1].cookie)})); + EXPECT_NE(events[0].cookie, last_cookie); + EXPECT_EQ(events[0].cookie, events[1].cookie); + last_cookie = events[0].cookie; +} + +TEST(Inotify, MoveWatchedTargetGeneratesEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + const std::string newpath = NewTempAbsPathInDir(root.path()); + const std::string oldpath = file1.release(); + EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds()); + file1.reset(newpath); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie), + Event(IN_MOVED_TO, root_wd, Basename(newpath), events[1].cookie), + // Self move events do not have a cookie. + Event(IN_MOVE_SELF, file1_wd)})); + EXPECT_NE(events[0].cookie, 0); + EXPECT_EQ(events[0].cookie, events[1].cookie); +} + +TEST(Inotify, CoalesceEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + // Read the file a few times. This will would generate multiple IN_ACCESS + // events but they should get coalesced to a single event. + char buf; + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + // Use the close event verify that we haven't simply left the additional + // IN_ACCESS events unread. + file1_fd.reset(); // Close file1_fd. + + const std::string file1_name = std::string(Basename(file1.path())); + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ACCESS, wd, file1_name), + Event(IN_CLOSE_NOWRITE, wd, file1_name)})); + + // Now let's try interleaving other events into a stream of repeated events. + file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds()); + EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds()); + EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + file1_fd.reset(); // Close the file. + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_OPEN, wd, file1_name), Event(IN_ACCESS, wd, file1_name), + Event(IN_MODIFY, wd, file1_name), Event(IN_ACCESS, wd, file1_name), + Event(IN_CLOSE_WRITE, wd, file1_name)})); + + // Ensure events aren't coalesced if they are from different files. + const TempPath file2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + // Discard events resulting from creation of file2. + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + FileDescriptor file2_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file2.path(), O_RDONLY)); + + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file2_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + // Close both files. + file1_fd.reset(); + file2_fd.reset(); + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + const std::string file2_name = std::string(Basename(file2.path())); + ASSERT_THAT( + events, + Are({Event(IN_OPEN, wd, file1_name), Event(IN_OPEN, wd, file2_name), + Event(IN_ACCESS, wd, file1_name), Event(IN_ACCESS, wd, file2_name), + Event(IN_ACCESS, wd, file1_name), + Event(IN_CLOSE_NOWRITE, wd, file1_name), + Event(IN_CLOSE_NOWRITE, wd, file2_name)})); +} + +TEST(Inotify, ClosingInotifyFdWithoutRemovingWatchesWorks) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + // Note: The check on close will happen in FileDescriptor::~FileDescriptor(). +} + +TEST(Inotify, NestedWatches) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + // Read from file1. This should generate an event for both watches. + char buf; + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ACCESS, root_wd, Basename(file1.path())), + Event(IN_ACCESS, file1_wd)})); +} + +TEST(Inotify, ConcurrentThreadsGeneratingEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + std::vector<TempPath> files; + files.reserve(10); + for (int i = 0; i < 10; i++) { + files.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode))); + } + + auto test_thread = [&files]() { + uint32_t seed = time(nullptr); + for (int i = 0; i < 20; i++) { + const TempPath& file = files[rand_r(&seed) % files.size()]; + const FileDescriptor file_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY)); + TEST_PCHECK(write(file_fd.get(), "x", 1) == 1); + } + }; + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + std::list<ScopedThread> threads; + for (int i = 0; i < 3; i++) { + threads.emplace_back(test_thread); + } + for (auto& t : threads) { + t.Join(); + } + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + // 3 threads doing 20 iterations, 3 events per iteration (open, write, + // close). However, some events may be coalesced, and we can't reliably + // predict how they'll be coalesced since the test threads aren't + // synchronized. We can only check that we aren't getting unexpected events. + for (const Event& ev : events) { + EXPECT_NE(ev.mask & (IN_OPEN | IN_MODIFY | IN_CLOSE_WRITE), 0); + } +} + +TEST(Inotify, ReadWithTooSmallBufferFails) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + // Open the file to queue an event. This event will not have a filename, so + // reading from the inotify fd should return sizeof(struct inotify_event) + // bytes of data. + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + std::vector<char> buf(kBufSize, 0); + ssize_t readlen; + + // Try a buffer too small to hold any potential event. This is rejected + // outright without the event being dequeued. + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event) - 1), + SyscallFailsWithErrno(EINVAL)); + // Try a buffer just large enough. This should succeeed. + EXPECT_THAT( + readlen = read(fd.get(), buf.data(), sizeof(struct inotify_event)), + SyscallSucceeds()); + EXPECT_EQ(readlen, sizeof(struct inotify_event)); + // Event queue is now empty, the next read should return EAGAIN. + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)), + SyscallFailsWithErrno(EAGAIN)); + + // Now put a watch on the directory, so that generated events contain a name. + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds()); + + // Drain the event generated from the watch removal. + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + file1_fd.reset(); // Close file to generate an event. + + // Try a buffer too small to hold any event and one too small to hold an event + // with a name. These should both fail without consuming the event. + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event) - 1), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)), + SyscallFailsWithErrno(EINVAL)); + // Now try with a large enough buffer. This should return the one event. + EXPECT_THAT(readlen = read(fd.get(), buf.data(), buf.size()), + SyscallSucceeds()); + EXPECT_GE(readlen, + sizeof(struct inotify_event) + Basename(file1.path()).size()); + // With the single event read, the queue should once again be empty. + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(Inotify, BlockingReadOnInotifyFd) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + // Spawn a thread performing a blocking read for new events on the inotify fd. + std::vector<char> buf(kBufSize, 0); + const int shared_fd = fd.get(); // The thread needs it. + ScopedThread t([shared_fd, &buf]() { + ssize_t readlen; + EXPECT_THAT(readlen = read(shared_fd, buf.data(), buf.size()), + SyscallSucceeds()); + }); + + // Perform a read on the watched file, which should generate an IN_ACCESS + // event, unblocking the event_reader thread. + char c; + EXPECT_THAT(read(file1_fd.get(), &c, 1), SyscallSucceeds()); + + // Wait for the thread to read the event and exit. + t.Join(); + + // Make sure the event we got back is sane. + uint32_t event_mask; + memcpy(&event_mask, buf.data() + offsetof(struct inotify_event, mask), + sizeof(event_mask)); + EXPECT_EQ(event_mask, IN_ACCESS); +} + +TEST(Inotify, WatchOnRelativePath) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + + // Change working directory to root. + const FileDescriptor cwd = ASSERT_NO_ERRNO_AND_VALUE(Open(".", O_PATH)); + EXPECT_THAT(chdir(root.path().c_str()), SyscallSucceeds()); + + // Add a watch on file1 with a relative path. + const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + fd.get(), std::string(Basename(file1.path())), IN_ALL_EVENTS)); + + // Perform a read on file1, this should generate an IN_ACCESS event. + char c; + EXPECT_THAT(read(file1_fd.get(), &c, 1), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_ACCESS, wd)})); + + // Explicitly reset the working directory so that we don't continue to + // reference "root". Once the test ends, "root" will get unlinked. If we + // continue to hold a reference, random save/restore tests can fail if a save + // is triggered after "root" is unlinked; we can't save deleted fs objects + // with active references. + EXPECT_THAT(fchdir(cwd.get()), SyscallSucceeds()); +} + +TEST(Inotify, ZeroLengthReadWriteDoesNotGenerateEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const char kContent[] = "some content"; + TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), kContent, TempPath::kDefaultFileMode)); + const int kContentSize = sizeof(kContent) - 1; + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + std::vector<char> buf(kContentSize, 0); + // Read all available data. + ssize_t readlen; + EXPECT_THAT(readlen = read(file1_fd.get(), buf.data(), kContentSize), + SyscallSucceeds()); + EXPECT_EQ(readlen, kContentSize); + // Drain all events and make sure we got the IN_ACCESS for the read. + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_ACCESS, wd, Basename(file1.path()))})); + + // Now try read again. This should be a 0-length read, since we're at EOF. + char c; + EXPECT_THAT(readlen = read(file1_fd.get(), &c, 1), SyscallSucceeds()); + EXPECT_EQ(readlen, 0); + // We should have no new events. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_TRUE(events.empty()); + + // Try issuing a zero-length read. + EXPECT_THAT(readlen = read(file1_fd.get(), &c, 0), SyscallSucceeds()); + EXPECT_EQ(readlen, 0); + // We should have no new events. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_TRUE(events.empty()); + + // Try issuing a zero-length write. + ssize_t writelen; + EXPECT_THAT(writelen = write(file1_fd.get(), &c, 0), SyscallSucceeds()); + EXPECT_EQ(writelen, 0); + // We should have no new events. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_TRUE(events.empty()); +} + +TEST(Inotify, ChmodGeneratesAttribEvent_NoRandomSave) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + FileDescriptor root_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(root.path(), O_RDONLY)); + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + auto verify_chmod_events = [&]() { + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ATTRIB, root_wd, Basename(file1.path())), + Event(IN_ATTRIB, file1_wd)})); + }; + + // Don't do cooperative S/R tests for any of the {f}chmod* syscalls below, the + // test will always fail because nodes cannot be saved when they have stricter + // permissions than the original host node. + const DisableSave ds; + + // Chmod. + ASSERT_THAT(chmod(file1.path().c_str(), S_IWGRP), SyscallSucceeds()); + verify_chmod_events(); + + // Fchmod. + ASSERT_THAT(fchmod(file1_fd.get(), S_IRGRP | S_IWGRP), SyscallSucceeds()); + verify_chmod_events(); + + // Fchmodat. + const std::string file1_basename = std::string(Basename(file1.path())); + ASSERT_THAT(fchmodat(root_fd.get(), file1_basename.c_str(), S_IWGRP, 0), + SyscallSucceeds()); + verify_chmod_events(); + + // Make sure the chmod'ed file descriptors are destroyed before DisableSave + // is destructed. + root_fd.reset(); + file1_fd.reset(); +} + +TEST(Inotify, TruncateGeneratesModifyEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + auto verify_truncate_events = [&]() { + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_MODIFY, root_wd, Basename(file1.path())), + Event(IN_MODIFY, file1_wd)})); + }; + + // Truncate. + EXPECT_THAT(truncate(file1.path().c_str(), 4096), SyscallSucceeds()); + verify_truncate_events(); + + // Ftruncate. + EXPECT_THAT(ftruncate(file1_fd.get(), 8192), SyscallSucceeds()); + verify_truncate_events(); + + // No events if truncate fails. + EXPECT_THAT(ftruncate(file1_fd.get(), -1), SyscallFailsWithErrno(EINVAL)); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({})); +} + +TEST(Inotify, GetdentsGeneratesAccessEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + // This internally calls getdents(2). We also expect to see an open/close + // event for the dirfd. + ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false)); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + // Linux only seems to generate access events on getdents() on some + // calls. Allow the test to pass even if it isn't generated. gVisor will + // always generate the IN_ACCESS event so the test will at least ensure gVisor + // behaves reasonably. + int i = 0; + EXPECT_EQ(events[i].mask, IN_OPEN | IN_ISDIR); + ++i; + if (IsRunningOnGvisor()) { + EXPECT_EQ(events[i].mask, IN_ACCESS | IN_ISDIR); + ++i; + } else { + if (events[i].mask == (IN_ACCESS | IN_ISDIR)) { + // Skip over the IN_ACCESS event on Linux, it only shows up some of the + // time so we can't assert its existence. + ++i; + } + } + EXPECT_EQ(events[i].mask, IN_CLOSE_NOWRITE | IN_ISDIR); +} + +TEST(Inotify, MknodGeneratesCreateEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + const TempPath file1(root.path() + "/file1"); + const int rc = mknod(file1.path().c_str(), S_IFREG, 0); + // mknod(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0); + ASSERT_THAT(rc, SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_CREATE, wd, Basename(file1.path()))})); +} + +TEST(Inotify, SymlinkGeneratesCreateEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const TempPath link1(NewTempAbsPathInDir(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + ASSERT_THAT(symlink(file1.path().c_str(), link1.path().c_str()), + SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + ASSERT_THAT(events, Are({Event(IN_CREATE, root_wd, Basename(link1.path()))})); +} + +TEST(Inotify, LinkGeneratesAttribAndCreateEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const TempPath link1(root.path() + "/link1"); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + const int rc = link(file1.path().c_str(), link1.path().c_str()); + // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0 && + (errno == EPERM || errno == ENOENT)); + ASSERT_THAT(rc, SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ATTRIB, file1_wd), + Event(IN_CREATE, root_wd, Basename(link1.path()))})); +} + +TEST(Inotify, UtimesGeneratesAttribEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + const struct timeval times[2] = {{1, 0}, {2, 0}}; + EXPECT_THAT(futimes(file1_fd.get(), times), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file1.path()))})); +} + +TEST(Inotify, HardlinksReuseSameWatch) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + TempPath link1(root.path() + "/link1"); + const int rc = link(file1.path().c_str(), link1.path().c_str()); + // link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0 && + (errno == EPERM || errno == ENOENT)); + ASSERT_THAT(rc, SyscallSucceeds()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + const int link1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), link1.path(), IN_ALL_EVENTS)); + + // The watch descriptors for watches on different links to the same file + // should be identical. + EXPECT_NE(root_wd, file1_wd); + EXPECT_EQ(file1_wd, link1_wd); + + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, + AreUnordered({Event(IN_OPEN, root_wd, Basename(file1.path())), + Event(IN_OPEN, file1_wd)})); + + // For the next step, we want to ensure all fds to the file are closed. Do + // that now and drain the resulting events. + file1_fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, + Are({Event(IN_CLOSE_WRITE, root_wd, Basename(file1.path())), + Event(IN_CLOSE_WRITE, file1_wd)})); + + // Try removing the link and let's see what events show up. Note that after + // this, we still have a link to the file so the watch shouldn't be + // automatically removed. + const std::string link1_path = link1.reset(); + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ATTRIB, link1_wd), + Event(IN_DELETE, root_wd, Basename(link1_path))})); + + // Now remove the other link. Since this is the last link to the file, the + // watch should be automatically removed. + const std::string file1_path = file1.reset(); + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + AreUnordered({Event(IN_ATTRIB, file1_wd), Event(IN_DELETE_SELF, file1_wd), + Event(IN_IGNORED, file1_wd), + Event(IN_DELETE, root_wd, Basename(file1_path))})); +} + +// Calling mkdir within "parent/child" should generate an event for child, but +// not parent. +TEST(Inotify, MkdirGeneratesCreateEventWithDirFlag) { + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath child = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), parent.path(), IN_ALL_EVENTS)); + const int child_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), child.path(), IN_ALL_EVENTS)); + + const TempPath dir1(NewTempAbsPathInDir(child.path())); + ASSERT_THAT(mkdir(dir1.path().c_str(), 0777), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_CREATE | IN_ISDIR, child_wd, Basename(dir1.path()))})); +} + +TEST(Inotify, MultipleInotifyInstancesAndWatchesAllGetEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + constexpr int kNumFds = 30; + std::vector<FileDescriptor> inotify_fds; + + for (int i = 0; i < kNumFds; ++i) { + const DisableSave ds; // Too expensive. + inotify_fds.emplace_back( + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK))); + const FileDescriptor& fd = inotify_fds[inotify_fds.size() - 1]; // Back. + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + } + + const std::string data = "some content"; + EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()), + SyscallSucceeds()); + + for (const FileDescriptor& fd : inotify_fds) { + const DisableSave ds; // Too expensive. + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + if (events.size() >= 2) { + EXPECT_EQ(events[0].mask, IN_MODIFY); + EXPECT_EQ(events[0].wd, 1); + EXPECT_EQ(events[0].name, Basename(file1.path())); + EXPECT_EQ(events[1].mask, IN_MODIFY); + EXPECT_EQ(events[1].wd, 2); + EXPECT_EQ(events[1].name, ""); + } + } +} + +TEST(Inotify, EventsGoUpAtMostOneLevel) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath dir1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int dir1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), dir1.path(), IN_ALL_EVENTS)); + + const std::string file1_path = file1.reset(); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_DELETE, dir1_wd, Basename(file1_path))})); +} + +TEST(Inotify, DuplicateWatchReturnsSameWatchDescriptor) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd1 = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + const int wd2 = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + EXPECT_EQ(wd1, wd2); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + // The watch shouldn't be duplicated, we only expect one event. + ASSERT_THAT(events, Are({Event(IN_OPEN, wd1)})); +} + +TEST(Inotify, UnmatchedEventsAreDiscarded) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ACCESS)); + + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + // We only asked for access events, the open event should be discarded. + ASSERT_THAT(events, Are({})); + + // IN_IGNORED events are always generated, regardless of the mask. + file1_fd.reset(); + file1.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_IGNORED, wd)})); +} + +TEST(Inotify, AddWatchWithInvalidEventMaskFails) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + EXPECT_THAT(inotify_add_watch(fd.get(), root.path().c_str(), 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(Inotify, AddWatchOnInvalidPathFails) { + const TempPath nonexistent(NewTempAbsPath()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + // Non-existent path. + EXPECT_THAT( + inotify_add_watch(fd.get(), nonexistent.path().c_str(), IN_CREATE), + SyscallFailsWithErrno(ENOENT)); + + // Garbage path pointer. + EXPECT_THAT(inotify_add_watch(fd.get(), nullptr, IN_CREATE), + SyscallFailsWithErrno(EFAULT)); +} + +TEST(Inotify, InOnlyDirFlagRespected) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + EXPECT_THAT( + inotify_add_watch(fd.get(), root.path().c_str(), IN_ACCESS | IN_ONLYDIR), + SyscallSucceeds()); + + EXPECT_THAT( + inotify_add_watch(fd.get(), file1.path().c_str(), IN_ACCESS | IN_ONLYDIR), + SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(Inotify, MaskAddMergesWithExistingEventMask) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_OPEN | IN_CLOSE_WRITE)); + + const std::string data = "some content"; + EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()), + SyscallSucceeds()); + + // We shouldn't get any events, since IN_MODIFY wasn't in the event mask. + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({})); + + // Add IN_MODIFY to event mask. + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_MODIFY | IN_MASK_ADD)); + + EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()), + SyscallSucceeds()); + + // This time we should get the modify event. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_MODIFY, wd)})); + + // Now close the fd. If the modify event was added to the event mask rather + // than replacing the event mask we won't get the close event. + file1_fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_CLOSE_WRITE, wd)})); +} + +// Test that control events bits are not considered when checking event mask. +TEST(Inotify, ControlEvents) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), dir.path(), IN_ACCESS)); + + // Check that events in the mask are dispatched and that control bits are + // part of the event mask. + std::vector<std::string> files = + ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), false)); + ASSERT_EQ(files.size(), 2); + + const std::vector<Event> events1 = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events1, Are({Event(IN_ACCESS | IN_ISDIR, wd)})); + + // Check that events not in the mask are discarded. + const FileDescriptor dir_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY)); + + const std::vector<Event> events2 = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events2, Are({})); +} + +// Regression test to ensure epoll and directory access doesn't deadlock. +TEST(Inotify, EpollNoDeadlock) { + const DisableSave ds; // Too many syscalls. + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + // Create lots of directories and watch all of them. + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::vector<TempPath> children; + for (size_t i = 0; i < 1000; ++i) { + auto child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), child.path(), IN_ACCESS)); + children.emplace_back(std::move(child)); + } + + // Run epoll_wait constantly in a separate thread. + std::atomic<bool> done(false); + ScopedThread th([&fd, &done] { + for (auto start = absl::Now(); absl::Now() - start < absl::Seconds(5);) { + FileDescriptor epoll_fd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + ASSERT_NO_ERRNO(RegisterEpollFD(epoll_fd.get(), fd.get(), + EPOLLIN | EPOLLOUT | EPOLLET, 0)); + struct epoll_event result[1]; + EXPECT_THAT(RetryEINTR(epoll_wait)(epoll_fd.get(), result, 1, -1), + SyscallSucceedsWithValue(1)); + + sched_yield(); + } + done = true; + }); + + // While epoll thread is running, constantly access all directories to + // generate inotify events. + while (!done) { + std::vector<std::string> files = + ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false)); + ASSERT_EQ(files.size(), 1002); + for (const auto& child : files) { + if (child == "." || child == "..") { + continue; + } + ASSERT_NO_ERRNO_AND_VALUE(ListDir(JoinPath(root.path(), child), false)); + } + sched_yield(); + } +} + +// On Linux, inotify behavior is not very consistent with splice(2). We try our +// best to emulate Linux for very basic calls to splice. +TEST(Inotify, SpliceOnWatchTarget) { + int pipes[2]; + ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), dir.path(), IN_ALL_EVENTS)); + const int file_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS)); + + EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr, 1, /*flags=*/0), + SyscallSucceedsWithValue(1)); + + // Surprisingly, events are not generated in Linux if we read from a file. + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({})); + + EXPECT_THAT(splice(pipes[0], nullptr, fd.get(), nullptr, 1, /*flags=*/0), + SyscallSucceedsWithValue(1)); + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({ + Event(IN_MODIFY, dir_wd, Basename(file.path())), + Event(IN_MODIFY, file_wd), + })); +} + +TEST(Inotify, SpliceOnInotifyFD) { + int pipes[2]; + ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); + + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + const int watcher = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + char buf; + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr, + sizeof(struct inotify_event) + 1, SPLICE_F_NONBLOCK), + SyscallSucceedsWithValue(sizeof(struct inotify_event))); + + const FileDescriptor read_fd(pipes[0]); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(read_fd.get())); + ASSERT_THAT(events, Are({Event(IN_ACCESS, watcher)})); +} + +// Watches on a parent should not be triggered by actions on a hard link to one +// of its children that has a different parent. +TEST(Inotify, LinkOnOtherParent) { + const TempPath dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + std::string link_path = NewTempAbsPathInDir(dir2.path()); + + const int rc = link(file.path().c_str(), link_path.c_str()); + // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0 && + (errno == EPERM || errno == ENOENT)); + ASSERT_THAT(rc, SyscallSucceeds()); + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), dir1.path(), IN_ALL_EVENTS)); + + // Perform various actions on the link outside of dir1, which should trigger + // no inotify events. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(link_path.c_str(), O_RDWR)); + int val = 0; + ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(ftruncate(fd.get(), 12345), SyscallSucceeds()); + ASSERT_THAT(unlink(link_path.c_str()), SyscallSucceeds()); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); +} + +TEST(Inotify, Xattr) { + // TODO(gvisor.dev/issue/1636): Support extended attributes in runsc gofer. + SKIP_IF(IsRunningOnGvisor()); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string path = file.path(); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_RDWR)); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), path, IN_ALL_EVENTS)); + + const char* cpath = path.c_str(); + const char* name = "user.test"; + int val = 123; + ASSERT_THAT(setxattr(cpath, name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); + + ASSERT_THAT(getxattr(cpath, name, &val, sizeof(val)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + char list[100]; + ASSERT_THAT(listxattr(cpath, list, sizeof(list)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + ASSERT_THAT(removexattr(cpath, name), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); + + ASSERT_THAT(fsetxattr(fd.get(), name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); + + ASSERT_THAT(fgetxattr(fd.get(), name, &val, sizeof(val)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + ASSERT_THAT(flistxattr(fd.get(), list, sizeof(list)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + ASSERT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); +} + +TEST(Inotify, Exec) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath bin = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(dir.path(), "/bin/true")); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), bin.path(), IN_ALL_EVENTS)); + + // Perform exec. + ScopedThread t([&bin]() { + ASSERT_THAT(execl(bin.path().c_str(), bin.path().c_str(), (char*)nullptr), + SyscallSucceeds()); + }); + t.Join(); + + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_OPEN, wd), Event(IN_ACCESS, wd)})); +} + +// Watches without IN_EXCL_UNLINK, should continue to emit events for file +// descriptors after their corresponding files have been unlinked. +// +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) { + const DisableSave ds; + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(dir.path(), "123", TempPath::kDefaultFileMode)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), dir.path(), IN_ALL_EVENTS)); + const int file_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS)); + + ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); + int val = 0; + ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_ATTRIB, file_wd), + Event(IN_DELETE, dir_wd, Basename(file.path())), + Event(IN_ACCESS, dir_wd, Basename(file.path())), + Event(IN_ACCESS, file_wd), + Event(IN_MODIFY, dir_wd, Basename(file.path())), + Event(IN_MODIFY, file_wd), + })); + + fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_CLOSE_WRITE, dir_wd, Basename(file.path())), + Event(IN_CLOSE_WRITE, file_wd), + Event(IN_DELETE_SELF, file_wd), + Event(IN_IGNORED, file_wd), + })); +} + +// Watches created with IN_EXCL_UNLINK will stop emitting events on fds for +// children that have already been unlinked. +// +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, ExcludeUnlink_NoRandomSave) { + const DisableSave ds; + // TODO(gvisor.dev/issue/1624): This test fails on VFS1. + SKIP_IF(IsRunningWithVFS1()); + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), file.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + + // Unlink the child, which should cause further operations on the open file + // descriptor to be ignored. + ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); + int val = 0; + ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_ATTRIB, file_wd), + Event(IN_DELETE, dir_wd, Basename(file.path())), + })); + + fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({ + Event(IN_DELETE_SELF, file_wd), + Event(IN_IGNORED, file_wd), + })); +} + +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, ExcludeUnlinkDirectory_NoRandomSave) { + // TODO(gvisor.dev/issue/1624): This test fails on VFS1. Remove once VFS1 is + // deleted. + SKIP_IF(IsRunningWithVFS1()); + + const DisableSave ds; + + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path())); + std::string dirPath = dir.path(); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dirPath.c_str(), O_RDONLY | O_DIRECTORY)); + const int parent_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), parent.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + const int self_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + + // Unlink the dir, and then close the open fd. + ASSERT_THAT(rmdir(dirPath.c_str()), SyscallSucceeds()); + dir.reset(); + + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + // No close event should appear. + ASSERT_THAT(events, + Are({Event(IN_DELETE | IN_ISDIR, parent_wd, Basename(dirPath))})); + + fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({ + Event(IN_DELETE_SELF, self_wd), + Event(IN_IGNORED, self_wd), + })); +} + +// If "dir/child" and "dir/child2" are links to the same file, and "dir/child" +// is unlinked, a watch on "dir" with IN_EXCL_UNLINK will exclude future events +// for fds on "dir/child" but not "dir/child2". +// +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, ExcludeUnlinkMultipleChildren_NoRandomSave) { + const DisableSave ds; + // TODO(gvisor.dev/issue/1624): This test fails on VFS1. + SKIP_IF(IsRunningWithVFS1()); + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + std::string path1 = file.path(); + std::string path2 = NewTempAbsPathInDir(dir.path()); + + const int rc = link(path1.c_str(), path2.c_str()); + // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0 && + (errno == EPERM || errno == ENOENT)); + ASSERT_THAT(rc, SyscallSucceeds()); + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(path1.c_str(), O_RDWR)); + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(path2.c_str(), O_RDWR)); + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + + // After unlinking path1, only events on the fd for path2 should be generated. + ASSERT_THAT(unlink(path1.c_str()), SyscallSucceeds()); + ASSERT_THAT(write(fd1.get(), "x", 1), SyscallSucceeds()); + ASSERT_THAT(write(fd2.get(), "x", 1), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_DELETE, wd, Basename(path1)), + Event(IN_MODIFY, wd, Basename(path2)), + })); +} + +// On native Linux, actions of data type FSNOTIFY_EVENT_INODE are not affected +// by IN_EXCL_UNLINK (see +// fs/notify/inotify/inotify_fsnotify.c:inotify_handle_event). Inode-level +// events include changes to metadata and extended attributes. +// +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) { + const DisableSave ds; + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR)); + + // NOTE(b/157163751): Create another link before unlinking. This is needed for + // the gofer filesystem in gVisor, where open fds will not work once the link + // count hits zero. In VFS2, we end up skipping the gofer test anyway, because + // hard links are not supported for gofer fs. + if (IsRunningOnGvisor()) { + std::string link_path = NewTempAbsPath(); + const int rc = link(file.path().c_str(), link_path.c_str()); + // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(rc != 0 && (errno == EPERM || errno == ENOENT)); + ASSERT_THAT(rc, SyscallSucceeds()); + } + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), file.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + + // Even after unlinking, inode-level operations will trigger events regardless + // of IN_EXCL_UNLINK. + ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); + + // Perform various actions on fd. + ASSERT_THAT(ftruncate(fd.get(), 12345), SyscallSucceeds()); + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_ATTRIB, file_wd), + Event(IN_DELETE, dir_wd, Basename(file.path())), + Event(IN_MODIFY, dir_wd, Basename(file.path())), + Event(IN_MODIFY, file_wd), + })); + + const struct timeval times[2] = {{1, 0}, {2, 0}}; + ASSERT_THAT(futimes(fd.get(), times), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_ATTRIB, dir_wd, Basename(file.path())), + Event(IN_ATTRIB, file_wd), + })); + + // S/R is disabled on this entire test due to behavior with unlink; it must + // also be disabled after this point because of fchmod. + ASSERT_THAT(fchmod(fd.get(), 0777), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_ATTRIB, dir_wd, Basename(file.path())), + Event(IN_ATTRIB, file_wd), + })); +} + +TEST(Inotify, OneShot) { + // TODO(gvisor.dev/issue/1624): IN_ONESHOT not supported in VFS1. + SKIP_IF(IsRunningWithVFS1()); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), file.path(), IN_MODIFY | IN_ONESHOT)); + + // Open an fd, write to it, and then close it. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY)); + ASSERT_THAT(write(fd.get(), "x", 1), SyscallSucceedsWithValue(1)); + fd.reset(); + + // We should get a single event followed by IN_IGNORED indicating removal + // of the one-shot watch. Prior activity (i.e. open) that is not in the mask + // should not trigger removal, and activity after removal (i.e. close) should + // not generate events. + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_MODIFY, wd), + Event(IN_IGNORED, wd), + })); + + // The watch should already have been removed. + EXPECT_THAT(inotify_rm_watch(inotify_fd.get(), wd), + SyscallFailsWithErrno(EINVAL)); +} + +// This test helps verify that the lock order of filesystem and inotify locks +// is respected when inotify instances and watch targets are concurrently being +// destroyed. +TEST(InotifyTest, InotifyAndTargetDestructionDoNotDeadlock_NoRandomSave) { + const DisableSave ds; // Too many syscalls. + + // A file descriptor protected by a mutex. This ensures that while a + // descriptor is in use, it cannot be closed and reused for a different file + // description. + struct atomic_fd { + int fd; + absl::Mutex mu; + }; + + // Set up initial inotify instances. + constexpr int num_fds = 3; + std::vector<atomic_fd> fds(num_fds); + for (int i = 0; i < num_fds; i++) { + int fd; + ASSERT_THAT(fd = inotify_init1(IN_NONBLOCK), SyscallSucceeds()); + fds[i].fd = fd; + } + + // Set up initial watch targets. + std::vector<std::string> paths; + for (int i = 0; i < 3; i++) { + paths.push_back(NewTempAbsPath()); + ASSERT_THAT(mknod(paths[i].c_str(), S_IFREG | 0600, 0), SyscallSucceeds()); + } + + constexpr absl::Duration runtime = absl::Seconds(4); + + // Constantly replace each inotify instance with a new one. + auto replace_fds = [&] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + for (auto& afd : fds) { + int new_fd; + ASSERT_THAT(new_fd = inotify_init1(IN_NONBLOCK), SyscallSucceeds()); + absl::MutexLock l(&afd.mu); + ASSERT_THAT(close(afd.fd), SyscallSucceeds()); + afd.fd = new_fd; + for (auto& p : paths) { + // inotify_add_watch may fail if the file at p was deleted. + ASSERT_THAT(inotify_add_watch(afd.fd, p.c_str(), IN_ALL_EVENTS), + AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ENOENT))); + } + } + sched_yield(); + } + }; + + std::list<ScopedThread> ts; + for (int i = 0; i < 3; i++) { + ts.emplace_back(replace_fds); + } + + // Constantly replace each watch target with a new one. + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + for (auto& p : paths) { + ASSERT_THAT(unlink(p.c_str()), SyscallSucceeds()); + ASSERT_THAT(mknod(p.c_str(), S_IFREG | 0600, 0), SyscallSucceeds()); + } + sched_yield(); + } +} + +// This test helps verify that the lock order of filesystem and inotify locks +// is respected when adding/removing watches occurs concurrently with the +// removal of their targets. +TEST(InotifyTest, AddRemoveUnlinkDoNotDeadlock_NoRandomSave) { + const DisableSave ds; // Too many syscalls. + + // Set up inotify instances. + constexpr int num_fds = 3; + std::vector<int> fds(num_fds); + for (int i = 0; i < num_fds; i++) { + ASSERT_THAT(fds[i] = inotify_init1(IN_NONBLOCK), SyscallSucceeds()); + } + + // Set up initial watch targets. + std::vector<std::string> paths; + for (int i = 0; i < 3; i++) { + paths.push_back(NewTempAbsPath()); + ASSERT_THAT(mknod(paths[i].c_str(), S_IFREG | 0600, 0), SyscallSucceeds()); + } + + constexpr absl::Duration runtime = absl::Seconds(1); + + // Constantly add/remove watches for each inotify instance/watch target pair. + auto add_remove_watches = [&] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + for (int fd : fds) { + for (auto& p : paths) { + // Do not assert on inotify_add_watch and inotify_rm_watch. They may + // fail if the file at p was deleted. inotify_add_watch may also fail + // if another thread beat us to adding a watch. + const int wd = inotify_add_watch(fd, p.c_str(), IN_ALL_EVENTS); + if (wd > 0) { + inotify_rm_watch(fd, wd); + } + } + } + sched_yield(); + } + }; + + std::list<ScopedThread> ts; + for (int i = 0; i < 15; i++) { + ts.emplace_back(add_remove_watches); + } + + // Constantly replace each watch target with a new one. + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + for (auto& p : paths) { + ASSERT_THAT(unlink(p.c_str()), SyscallSucceeds()); + ASSERT_THAT(mknod(p.c_str(), S_IFREG | 0600, 0), SyscallSucceeds()); + } + sched_yield(); + } +} + +// This test helps verify that the lock order of filesystem and inotify locks +// is respected when many inotify events and filesystem operations occur +// simultaneously. +TEST(InotifyTest, NotifyNoDeadlock_NoRandomSave) { + const DisableSave ds; // Too many syscalls. + + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string dir = parent.path(); + + // mu protects file, which will change on rename. + absl::Mutex mu; + std::string file = NewTempAbsPathInDir(dir); + ASSERT_THAT(mknod(file.c_str(), 0644 | S_IFREG, 0), SyscallSucceeds()); + + const absl::Duration runtime = absl::Milliseconds(300); + + // Add/remove watches on dir and file. + ScopedThread add_remove_watches([&] { + const FileDescriptor ifd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + int dir_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(ifd.get(), dir, IN_ALL_EVENTS)); + int file_wd; + { + absl::ReaderMutexLock l(&mu); + file_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(ifd.get(), file, IN_ALL_EVENTS)); + } + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + ASSERT_THAT(inotify_rm_watch(ifd.get(), file_wd), SyscallSucceeds()); + ASSERT_THAT(inotify_rm_watch(ifd.get(), dir_wd), SyscallSucceeds()); + dir_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(ifd.get(), dir, IN_ALL_EVENTS)); + { + absl::ReaderMutexLock l(&mu); + file_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(ifd.get(), file, IN_ALL_EVENTS)); + } + sched_yield(); + } + }); + + // Modify attributes on dir and file. + ScopedThread stats([&] { + int fd, dir_fd; + { + absl::ReaderMutexLock l(&mu); + ASSERT_THAT(fd = open(file.c_str(), O_RDONLY), SyscallSucceeds()); + } + ASSERT_THAT(dir_fd = open(dir.c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + const struct timeval times[2] = {{1, 0}, {2, 0}}; + + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + { + absl::ReaderMutexLock l(&mu); + EXPECT_THAT(utimes(file.c_str(), times), SyscallSucceeds()); + } + EXPECT_THAT(futimes(fd, times), SyscallSucceeds()); + EXPECT_THAT(utimes(dir.c_str(), times), SyscallSucceeds()); + EXPECT_THAT(futimes(dir_fd, times), SyscallSucceeds()); + sched_yield(); + } + }); + + // Modify extended attributes on dir and file. + ScopedThread xattrs([&] { + // TODO(gvisor.dev/issue/1636): Support extended attributes in runsc gofer. + if (!IsRunningOnGvisor()) { + int fd; + { + absl::ReaderMutexLock l(&mu); + ASSERT_THAT(fd = open(file.c_str(), O_RDONLY), SyscallSucceeds()); + } + + const char* name = "user.test"; + int val = 123; + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + { + absl::ReaderMutexLock l(&mu); + ASSERT_THAT( + setxattr(file.c_str(), name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + ASSERT_THAT(removexattr(file.c_str(), name), SyscallSucceeds()); + } + + ASSERT_THAT(fsetxattr(fd, name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + ASSERT_THAT(fremovexattr(fd, name), SyscallSucceeds()); + sched_yield(); + } + } + }); + + // Read and write file's contents. Read and write dir's entries. + ScopedThread read_write([&] { + int fd; + { + absl::ReaderMutexLock l(&mu); + ASSERT_THAT(fd = open(file.c_str(), O_RDWR), SyscallSucceeds()); + } + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + int val = 123; + ASSERT_THAT(write(fd, &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(read(fd, &val, sizeof(val)), SyscallSucceeds()); + TempPath new_file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir)); + ASSERT_NO_ERRNO(ListDir(dir, false)); + new_file.reset(); + sched_yield(); + } + }); + + // Rename file. + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + const std::string new_path = NewTempAbsPathInDir(dir); + { + absl::WriterMutexLock l(&mu); + ASSERT_THAT(rename(file.c_str(), new_path.c_str()), SyscallSucceeds()); + file = new_path; + } + sched_yield(); + } +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ioctl.cc b/test/syscalls/linux/ioctl.cc new file mode 100644 index 000000000..b0a07a064 --- /dev/null +++ b/test/syscalls/linux/ioctl.cc @@ -0,0 +1,406 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <errno.h> +#include <fcntl.h> +#include <net/if.h> +#include <netdb.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +bool CheckNonBlocking(int fd) { + int ret = fcntl(fd, F_GETFL, 0); + TEST_CHECK(ret != -1); + return (ret & O_NONBLOCK) == O_NONBLOCK; +} + +bool CheckCloExec(int fd) { + int ret = fcntl(fd, F_GETFD, 0); + TEST_CHECK(ret != -1); + return (ret & FD_CLOEXEC) == FD_CLOEXEC; +} + +class IoctlTest : public ::testing::Test { + protected: + void SetUp() override { + ASSERT_THAT(fd_ = open("/dev/null", O_RDONLY), SyscallSucceeds()); + } + + void TearDown() override { + if (fd_ >= 0) { + ASSERT_THAT(close(fd_), SyscallSucceeds()); + fd_ = -1; + } + } + + int fd() const { return fd_; } + + private: + int fd_ = -1; +}; + +TEST_F(IoctlTest, BadFileDescriptor) { + EXPECT_THAT(ioctl(-1 /* fd */, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(IoctlTest, InvalidControlNumber) { + EXPECT_THAT(ioctl(STDOUT_FILENO, 0), SyscallFailsWithErrno(ENOTTY)); +} + +TEST_F(IoctlTest, FIONBIOSucceeds) { + EXPECT_FALSE(CheckNonBlocking(fd())); + int set = 1; + EXPECT_THAT(ioctl(fd(), FIONBIO, &set), SyscallSucceeds()); + EXPECT_TRUE(CheckNonBlocking(fd())); + set = 0; + EXPECT_THAT(ioctl(fd(), FIONBIO, &set), SyscallSucceeds()); + EXPECT_FALSE(CheckNonBlocking(fd())); +} + +TEST_F(IoctlTest, FIONBIOFails) { + EXPECT_THAT(ioctl(fd(), FIONBIO, nullptr), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(IoctlTest, FIONCLEXSucceeds) { + EXPECT_THAT(ioctl(fd(), FIONCLEX), SyscallSucceeds()); + EXPECT_FALSE(CheckCloExec(fd())); +} + +TEST_F(IoctlTest, FIOCLEXSucceeds) { + EXPECT_THAT(ioctl(fd(), FIOCLEX), SyscallSucceeds()); + EXPECT_TRUE(CheckCloExec(fd())); +} + +TEST_F(IoctlTest, FIOASYNCFails) { + EXPECT_THAT(ioctl(fd(), FIOASYNC, nullptr), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(IoctlTest, FIOASYNCSucceeds) { + // Not all FDs support FIOASYNC. + const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int before = -1; + ASSERT_THAT(before = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + + int set = 1; + EXPECT_THAT(ioctl(s.get(), FIOASYNC, &set), SyscallSucceeds()); + + int after_set = -1; + ASSERT_THAT(after_set = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(after_set, before | O_ASYNC) << "before was " << before; + + set = 0; + EXPECT_THAT(ioctl(s.get(), FIOASYNC, &set), SyscallSucceeds()); + + ASSERT_THAT(fcntl(s.get(), F_GETFL), SyscallSucceedsWithValue(before)); +} + +/* Count of the number of SIGIOs handled. */ +static volatile int io_received = 0; + +void inc_io_handler(int sig, siginfo_t* siginfo, void* arg) { io_received++; } + +TEST_F(IoctlTest, FIOASYNCNoTarget) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + // Count SIGIOs received. + io_received = 0; + struct sigaction sa; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + EXPECT_EQ(io_received, 0); +} + +TEST_F(IoctlTest, FIOASYNCSelfTarget) { + // FIXME(b/120624367): gVisor erroneously sends SIGIO on close(2), which would + // kill the test when pair goes out of scope. Temporarily ignore SIGIO so that + // that the close signal is ignored. + struct sigaction sa; + sa.sa_handler = SIG_IGN; + auto early_sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + // Count SIGIOs received. + io_received = 0; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + + pid_t pid = getpid(); + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + EXPECT_EQ(io_received, 1); +} + +// Equivalent to FIOASYNCSelfTarget except that FIOSETOWN is called before +// FIOASYNC. +TEST_F(IoctlTest, FIOASYNCSelfTarget2) { + // FIXME(b/120624367): gVisor erroneously sends SIGIO on close(2), which would + // kill the test when pair goes out of scope. Temporarily ignore SIGIO so that + // that the close signal is ignored. + struct sigaction sa; + sa.sa_handler = SIG_IGN; + auto early_sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + // Count SIGIOs received. + io_received = 0; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + pid_t pid = -1; + EXPECT_THAT(pid = getpid(), SyscallSucceeds()); + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + EXPECT_EQ(io_received, 1); +} + +// Check that closing an FD does not result in an event. +TEST_F(IoctlTest, FIOASYNCSelfTargetClose) { + // Count SIGIOs received. + struct sigaction sa; + io_received = 0; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + for (int i = 0; i < 2; i++) { + auto pair = ASSERT_NO_ERRNO_AND_VALUE( + UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + pid_t pid = getpid(); + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + } + + // FIXME(b/120624367): gVisor erroneously sends SIGIO on close. + SKIP_IF(IsRunningOnGvisor()); + + EXPECT_EQ(io_received, 0); +} + +TEST_F(IoctlTest, FIOASYNCInvalidPID) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + int set = 1; + ASSERT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + pid_t pid = INT_MAX; + // This succeeds (with behavior equivalent to a pid of 0) in Linux prior to + // f73127356f34 "fs/fcntl: return -ESRCH in f_setown when pid/pgid can't be + // found", and fails with EPERM after that commit. + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), + AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ESRCH))); +} + +TEST_F(IoctlTest, FIOASYNCUnsetTarget) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + // Count SIGIOs received. + io_received = 0; + struct sigaction sa; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + + pid_t pid = getpid(); + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + // Passing a PID of 0 unsets the target. + pid = 0; + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + EXPECT_EQ(io_received, 0); +} + +using IoctlTestSIOCGIFCONF = SimpleSocketTest; + +TEST_P(IoctlTestSIOCGIFCONF, ValidateNoArrayGetsLength) { + auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Validate that no array can be used to get the length required. + struct ifconf ifconf = {}; + ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds()); + ASSERT_GT(ifconf.ifc_len, 0); +} + +// This test validates that we will only return a partial array list and not +// partial ifrreq structs. +TEST_P(IoctlTestSIOCGIFCONF, ValidateNoPartialIfrsReturned) { + auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + struct ifreq ifr = {}; + struct ifconf ifconf = {}; + ifconf.ifc_len = sizeof(ifr) - 1; // One byte too few. + ifconf.ifc_ifcu.ifcu_req = 𝔦 + + ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds()); + ASSERT_EQ(ifconf.ifc_len, 0); + ASSERT_EQ(ifr.ifr_name[0], '\0'); // Nothing is returned. + + ifconf.ifc_len = sizeof(ifreq); + ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds()); + ASSERT_GT(ifconf.ifc_len, 0); + ASSERT_NE(ifr.ifr_name[0], '\0'); // An interface can now be returned. +} + +TEST_P(IoctlTestSIOCGIFCONF, ValidateLoopbackIsPresent) { + auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + struct ifconf ifconf = {}; + struct ifreq ifr[10] = {}; // Storage for up to 10 interfaces. + + ifconf.ifc_req = ifr; + ifconf.ifc_len = sizeof(ifr); + + ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds()); + size_t num_if = ifconf.ifc_len / sizeof(struct ifreq); + + // We should have at least one interface. + ASSERT_GE(num_if, 1); + + // One of the interfaces should be a loopback. + bool found_loopback = false; + for (size_t i = 0; i < num_if; ++i) { + if (strcmp(ifr[i].ifr_name, "lo") == 0) { + // SIOCGIFCONF returns the ipv4 address of the interface, let's check it. + ASSERT_EQ(ifr[i].ifr_addr.sa_family, AF_INET); + + // Validate the address is correct for loopback. + sockaddr_in* sin = reinterpret_cast<sockaddr_in*>(&ifr[i].ifr_addr); + ASSERT_EQ(htonl(sin->sin_addr.s_addr), INADDR_LOOPBACK); + + found_loopback = true; + break; + } + } + ASSERT_TRUE(found_loopback); +} + +std::vector<SocketKind> IoctlSocketTypes() { + return {SimpleSocket(AF_UNIX, SOCK_STREAM, 0), + SimpleSocket(AF_UNIX, SOCK_DGRAM, 0), + SimpleSocket(AF_INET, SOCK_STREAM, 0), + SimpleSocket(AF_INET6, SOCK_STREAM, 0), + SimpleSocket(AF_INET, SOCK_DGRAM, 0), + SimpleSocket(AF_INET6, SOCK_DGRAM, 0)}; +} + +INSTANTIATE_TEST_SUITE_P(IoctlTest, IoctlTestSIOCGIFCONF, + ::testing::ValuesIn(IoctlSocketTypes())); + +} // namespace + +TEST_F(IoctlTest, FIOGETOWNSucceeds) { + const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int get = -1; + ASSERT_THAT(ioctl(s.get(), FIOGETOWN, &get), SyscallSucceeds()); + EXPECT_EQ(get, 0); +} + +TEST_F(IoctlTest, SIOCGPGRPSucceeds) { + const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int get = -1; + ASSERT_THAT(ioctl(s.get(), SIOCGPGRP, &get), SyscallSucceeds()); + EXPECT_EQ(get, 0); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc new file mode 100644 index 000000000..98d07ae85 --- /dev/null +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -0,0 +1,239 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/ip_socket_test_util.h" + +#include <net/if.h> +#include <netinet/in.h> +#include <sys/socket.h> + +#include <cstring> + +namespace gvisor { +namespace testing { + +uint32_t IPFromInetSockaddr(const struct sockaddr* addr) { + auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr); + return in_addr->sin_addr.s_addr; +} + +uint16_t PortFromInetSockaddr(const struct sockaddr* addr) { + auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr); + return ntohs(in_addr->sin_port); +} + +PosixErrorOr<int> InterfaceIndex(std::string name) { + int index = if_nametoindex(name.c_str()); + if (index) { + return index; + } + return PosixError(errno); +} + +namespace { + +std::string DescribeSocketType(int type) { + return absl::StrCat(((type & SOCK_NONBLOCK) != 0) ? "non-blocking " : "", + ((type & SOCK_CLOEXEC) != 0) ? "close-on-exec " : ""); +} + +} // namespace + +SocketPairKind IPv6TCPAcceptBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv6 TCP socket"); + return SocketPairKind{ + description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind IPv4TCPAcceptBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv4 TCP socket"); + return SocketPairKind{ + description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindSocketPairCreator(AF_INET, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind DualStackTCPAcceptBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected dual stack TCP socket"); + return SocketPairKind{ + description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ true)}; +} + +SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv6 TCP socket"); + return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindPersistentListenerSocketPairCreator( + AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv4 TCP socket"); + return SocketPairKind{description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindPersistentListenerSocketPairCreator( + AF_INET, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected dual stack TCP socket"); + return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindPersistentListenerSocketPairCreator( + AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ true)}; +} + +SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv6 UDP socket"); + return SocketPairKind{ + description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP, + UDPBidirectionalBindSocketPairCreator(AF_INET6, type | SOCK_DGRAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv4 UDP socket"); + return SocketPairKind{ + description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP, + UDPBidirectionalBindSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected dual stack UDP socket"); + return SocketPairKind{ + description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP, + UDPBidirectionalBindSocketPairCreator(AF_INET6, type | SOCK_DGRAM, 0, + /* dual_stack = */ true)}; +} + +SocketPairKind IPv4UDPUnboundSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket"); + return SocketPairKind{ + description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP, + UDPUnboundSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0, + /* dual_stack = */ false)}; +} + +SocketKind IPv4UDPUnboundSocket(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket"); + return SocketKind{ + description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP, + UnboundSocketCreator(AF_INET, type | SOCK_DGRAM, IPPROTO_UDP)}; +} + +SocketKind IPv6UDPUnboundSocket(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "IPv6 UDP socket"); + return SocketKind{ + description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP, + UnboundSocketCreator(AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP)}; +} + +SocketKind IPv4TCPUnboundSocket(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "IPv4 TCP socket"); + return SocketKind{ + description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP, + UnboundSocketCreator(AF_INET, type | SOCK_STREAM, IPPROTO_TCP)}; +} + +SocketKind IPv6TCPUnboundSocket(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "IPv6 TCP socket"); + return SocketKind{ + description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + UnboundSocketCreator(AF_INET6, type | SOCK_STREAM, IPPROTO_TCP)}; +} + +PosixError IfAddrHelper::Load() { + Release(); + RETURN_ERROR_IF_SYSCALL_FAIL(getifaddrs(&ifaddr_)); + return NoError(); +} + +void IfAddrHelper::Release() { + if (ifaddr_) { + freeifaddrs(ifaddr_); + ifaddr_ = nullptr; + } +} + +std::vector<std::string> IfAddrHelper::InterfaceList(int family) const { + std::vector<std::string> names; + for (auto ifa = ifaddr_; ifa != NULL; ifa = ifa->ifa_next) { + if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != family) { + continue; + } + names.emplace(names.end(), ifa->ifa_name); + } + return names; +} + +const sockaddr* IfAddrHelper::GetAddr(int family, std::string name) const { + for (auto ifa = ifaddr_; ifa != NULL; ifa = ifa->ifa_next) { + if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != family) { + continue; + } + if (name == ifa->ifa_name) { + return ifa->ifa_addr; + } + } + return nullptr; +} + +PosixErrorOr<int> IfAddrHelper::GetIndex(std::string name) const { + return InterfaceIndex(name); +} + +std::string GetAddr4Str(const in_addr* a) { + char str[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, a, str, sizeof(str)); + return std::string(str); +} + +std::string GetAddr6Str(const in6_addr* a) { + char str[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, a, str, sizeof(str)); + return std::string(str); +} + +std::string GetAddrStr(const sockaddr* a) { + if (a->sa_family == AF_INET) { + auto src = &(reinterpret_cast<const sockaddr_in*>(a)->sin_addr); + return GetAddr4Str(src); + } else if (a->sa_family == AF_INET6) { + auto src = &(reinterpret_cast<const sockaddr_in6*>(a)->sin6_addr); + return GetAddr6Str(src); + } + return std::string("<invalid>"); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h new file mode 100644 index 000000000..9c3859fcd --- /dev/null +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -0,0 +1,135 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_ + +#include <arpa/inet.h> +#include <ifaddrs.h> +#include <sys/types.h> + +#include <string> + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Extracts the IP address from an inet sockaddr in network byte order. +uint32_t IPFromInetSockaddr(const struct sockaddr* addr); + +// Extracts the port from an inet sockaddr in host byte order. +uint16_t PortFromInetSockaddr(const struct sockaddr* addr); + +// InterfaceIndex returns the index of the named interface. +PosixErrorOr<int> InterfaceIndex(std::string name); + +// IPv6TCPAcceptBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with AF_INET6 and the +// given type bound to the IPv6 loopback. +SocketPairKind IPv6TCPAcceptBindSocketPair(int type); + +// IPv4TCPAcceptBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with AF_INET and the +// given type bound to the IPv4 loopback. +SocketPairKind IPv4TCPAcceptBindSocketPair(int type); + +// DualStackTCPAcceptBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with AF_INET6 and the +// given type bound to the IPv4 loopback. +SocketPairKind DualStackTCPAcceptBindSocketPair(int type); + +// IPv6TCPAcceptBindPersistentListenerSocketPair is like +// IPv6TCPAcceptBindSocketPair except it uses a persistent listening socket to +// create all socket pairs. +SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type); + +// IPv4TCPAcceptBindPersistentListenerSocketPair is like +// IPv4TCPAcceptBindSocketPair except it uses a persistent listening socket to +// create all socket pairs. +SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type); + +// DualStackTCPAcceptBindPersistentListenerSocketPair is like +// DualStackTCPAcceptBindSocketPair except it uses a persistent listening socket +// to create all socket pairs. +SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type); + +// IPv6UDPBidirectionalBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and connect() syscalls with AF_INET6 and the +// given type bound to the IPv6 loopback. +SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type); + +// IPv4UDPBidirectionalBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and connect() syscalls with AF_INET and the +// given type bound to the IPv4 loopback. +SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type); + +// DualStackUDPBidirectionalBindSocketPair returns a SocketPairKind that +// represents SocketPairs created with bind() and connect() syscalls with +// AF_INET6 and the given type bound to the IPv4 loopback. +SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type); + +// IPv4UDPUnboundSocketPair returns a SocketPairKind that represents +// SocketPairs created with AF_INET and the given type. +SocketPairKind IPv4UDPUnboundSocketPair(int type); + +// IPv4UDPUnboundSocket returns a SocketKind that represents a SimpleSocket +// created with AF_INET, SOCK_DGRAM, and the given type. +SocketKind IPv4UDPUnboundSocket(int type); + +// IPv6UDPUnboundSocket returns a SocketKind that represents a SimpleSocket +// created with AF_INET6, SOCK_DGRAM, and the given type. +SocketKind IPv6UDPUnboundSocket(int type); + +// IPv4TCPUnboundSocket returns a SocketKind that represents a SimpleSocket +// created with AF_INET, SOCK_STREAM and the given type. +SocketKind IPv4TCPUnboundSocket(int type); + +// IPv6TCPUnboundSocket returns a SocketKind that represents a SimpleSocket +// created with AF_INET6, SOCK_STREAM and the given type. +SocketKind IPv6TCPUnboundSocket(int type); + +// IfAddrHelper is a helper class that determines the local interfaces present +// and provides functions to obtain their names, index numbers, and IP address. +class IfAddrHelper { + public: + IfAddrHelper() : ifaddr_(nullptr) {} + ~IfAddrHelper() { Release(); } + + PosixError Load(); + void Release(); + + std::vector<std::string> InterfaceList(int family) const; + + const sockaddr* GetAddr(int family, std::string name) const; + PosixErrorOr<int> GetIndex(std::string name) const; + + private: + struct ifaddrs* ifaddr_; +}; + +// GetAddr4Str returns the given IPv4 network address structure as a string. +std::string GetAddr4Str(const in_addr* a); + +// GetAddr6Str returns the given IPv6 network address structure as a string. +std::string GetAddr6Str(const in6_addr* a); + +// GetAddrStr returns the given IPv4 or IPv6 network address structure as a +// string. +std::string GetAddrStr(const sockaddr* a); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_ diff --git a/test/syscalls/linux/iptables.cc b/test/syscalls/linux/iptables.cc new file mode 100644 index 000000000..b8e4ece64 --- /dev/null +++ b/test/syscalls/linux/iptables.cc @@ -0,0 +1,204 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/iptables.h" + +#include <arpa/inet.h> +#include <linux/capability.h> +#include <linux/netfilter/x_tables.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#include <stdio.h> +#include <sys/poll.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr char kNatTablename[] = "nat"; +constexpr char kErrorTarget[] = "ERROR"; +constexpr size_t kEmptyStandardEntrySize = + sizeof(struct ipt_entry) + sizeof(struct ipt_standard_target); +constexpr size_t kEmptyErrorEntrySize = + sizeof(struct ipt_entry) + sizeof(struct ipt_error_target); + +TEST(IPTablesBasic, CreateSocket) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP), + SyscallSucceeds()); + + ASSERT_THAT(close(sock), SyscallSucceeds()); +} + +TEST(IPTablesBasic, FailSockoptNonRaw) { + // Even if the user has CAP_NET_RAW, they shouldn't be able to use the + // iptables sockopts with a non-raw socket. + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET, SOCK_DGRAM, 0), SyscallSucceeds()); + + struct ipt_getinfo info = {}; + snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + socklen_t info_size = sizeof(info); + EXPECT_THAT(getsockopt(sock, IPPROTO_IP, SO_GET_INFO, &info, &info_size), + SyscallFailsWithErrno(ENOPROTOOPT)); + + ASSERT_THAT(close(sock), SyscallSucceeds()); +} + +// Fixture for iptables tests. +class IPTablesTest : public ::testing::Test { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // The socket via which to manipulate iptables. + int s_; +}; + +void IPTablesTest::SetUp() { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT(s_ = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP), SyscallSucceeds()); +} + +void IPTablesTest::TearDown() { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + EXPECT_THAT(close(s_), SyscallSucceeds()); +} + +// This tests the initial state of a machine with empty iptables. We don't have +// a guarantee that the iptables are empty when running in native, but we can +// test that gVisor has the same initial state that a newly-booted Linux machine +// would have. +TEST_F(IPTablesTest, InitialState) { + SKIP_IF(!IsRunningOnGvisor()); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // + // Get info via sockopt. + // + struct ipt_getinfo info = {}; + snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + socklen_t info_size = sizeof(info); + ASSERT_THAT(getsockopt(s_, IPPROTO_IP, SO_GET_INFO, &info, &info_size), + SyscallSucceeds()); + + // The nat table supports PREROUTING, and OUTPUT. + unsigned int valid_hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_POST_ROUTING) | (1 << NF_IP_LOCAL_IN); + + EXPECT_EQ(info.valid_hooks, valid_hooks); + + // Each chain consists of an empty entry with a standard target.. + EXPECT_EQ(info.hook_entry[NF_IP_PRE_ROUTING], 0); + EXPECT_EQ(info.hook_entry[NF_IP_LOCAL_IN], kEmptyStandardEntrySize); + EXPECT_EQ(info.hook_entry[NF_IP_LOCAL_OUT], kEmptyStandardEntrySize * 2); + EXPECT_EQ(info.hook_entry[NF_IP_POST_ROUTING], kEmptyStandardEntrySize * 3); + + // The underflow points are the same as the entry points. + EXPECT_EQ(info.underflow[NF_IP_PRE_ROUTING], 0); + EXPECT_EQ(info.underflow[NF_IP_LOCAL_IN], kEmptyStandardEntrySize); + EXPECT_EQ(info.underflow[NF_IP_LOCAL_OUT], kEmptyStandardEntrySize * 2); + EXPECT_EQ(info.underflow[NF_IP_POST_ROUTING], kEmptyStandardEntrySize * 3); + + // One entry for each chain, plus an error entry at the end. + EXPECT_EQ(info.num_entries, 5); + + EXPECT_EQ(info.size, 4 * kEmptyStandardEntrySize + kEmptyErrorEntrySize); + EXPECT_EQ(strcmp(info.name, kNatTablename), 0); + + // + // Use info to get entries. + // + socklen_t entries_size = sizeof(struct ipt_get_entries) + info.size; + struct ipt_get_entries* entries = + static_cast<struct ipt_get_entries*>(malloc(entries_size)); + snprintf(entries->name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + entries->size = info.size; + ASSERT_THAT( + getsockopt(s_, IPPROTO_IP, SO_GET_ENTRIES, entries, &entries_size), + SyscallSucceeds()); + + // Verify the name and size. + ASSERT_EQ(info.size, entries->size); + ASSERT_EQ(strcmp(entries->name, kNatTablename), 0); + + // Verify that the entrytable is 4 entries with accept targets and no matches + // followed by a single error target. + size_t entry_offset = 0; + while (entry_offset < entries->size) { + struct ipt_entry* entry = reinterpret_cast<struct ipt_entry*>( + reinterpret_cast<char*>(entries->entrytable) + entry_offset); + + // ip should be zeroes. + struct ipt_ip zeroed = {}; + EXPECT_EQ(memcmp(static_cast<void*>(&zeroed), + static_cast<void*>(&entry->ip), sizeof(zeroed)), + 0); + + // target_offset should be zero. + EXPECT_EQ(entry->target_offset, sizeof(ipt_entry)); + + if (entry_offset < kEmptyStandardEntrySize * 4) { + // The first 4 entries are standard targets + struct ipt_standard_target* target = + reinterpret_cast<struct ipt_standard_target*>(entry->elems); + EXPECT_EQ(entry->next_offset, kEmptyStandardEntrySize); + EXPECT_EQ(target->target.u.user.target_size, sizeof(*target)); + EXPECT_EQ(strcmp(target->target.u.user.name, ""), 0); + EXPECT_EQ(target->target.u.user.revision, 0); + // This is what's returned for an accept verdict. I don't know why. + EXPECT_EQ(target->verdict, -NF_ACCEPT - 1); + } else { + // The last entry is an error target + struct ipt_error_target* target = + reinterpret_cast<struct ipt_error_target*>(entry->elems); + EXPECT_EQ(entry->next_offset, kEmptyErrorEntrySize); + EXPECT_EQ(target->target.u.user.target_size, sizeof(*target)); + EXPECT_EQ(strcmp(target->target.u.user.name, kErrorTarget), 0); + EXPECT_EQ(target->target.u.user.revision, 0); + EXPECT_EQ(strcmp(target->errorname, kErrorTarget), 0); + } + + entry_offset += entry->next_offset; + } + + free(entries); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/iptables.h b/test/syscalls/linux/iptables.h new file mode 100644 index 000000000..0719c60a4 --- /dev/null +++ b/test/syscalls/linux/iptables.h @@ -0,0 +1,198 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// There are a number of structs and values that we can't #include because of a +// difference between C and C++ (C++ won't let you implicitly cast from void* to +// struct something*). We re-define them here. + +#ifndef GVISOR_TEST_SYSCALLS_IPTABLES_TYPES_H_ +#define GVISOR_TEST_SYSCALLS_IPTABLES_TYPES_H_ + +// Netfilter headers require some headers to preceed them. +// clang-format off +#include <netinet/in.h> +#include <stddef.h> +// clang-format on + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4.h> +#include <net/if.h> +#include <netinet/ip.h> +#include <stdint.h> + +#define ipt_standard_target xt_standard_target +#define ipt_entry_target xt_entry_target +#define ipt_error_target xt_error_target + +enum SockOpts { + // For setsockopt. + BASE_CTL = 64, + SO_SET_REPLACE = BASE_CTL, + SO_SET_ADD_COUNTERS, + SO_SET_MAX = SO_SET_ADD_COUNTERS, + + // For getsockopt. + SO_GET_INFO = BASE_CTL, + SO_GET_ENTRIES, + SO_GET_REVISION_MATCH, + SO_GET_REVISION_TARGET, + SO_GET_MAX = SO_GET_REVISION_TARGET +}; + +// ipt_ip specifies basic matching criteria that can be applied by examining +// only the IP header of a packet. +struct ipt_ip { + // Source IP address. + struct in_addr src; + + // Destination IP address. + struct in_addr dst; + + // Source IP address mask. + struct in_addr smsk; + + // Destination IP address mask. + struct in_addr dmsk; + + // Input interface. + char iniface[IFNAMSIZ]; + + // Output interface. + char outiface[IFNAMSIZ]; + + // Input interface mask. + unsigned char iniface_mask[IFNAMSIZ]; + + // Output interface mask. + unsigned char outiface_mask[IFNAMSIZ]; + + // Transport protocol. + uint16_t proto; + + // Flags. + uint8_t flags; + + // Inverse flags. + uint8_t invflags; +}; + +// ipt_entry is an iptables rule. It contains information about what packets the +// rule matches and what action (target) to perform for matching packets. +struct ipt_entry { + // Basic matching information used to match a packet's IP header. + struct ipt_ip ip; + + // A caching field that isn't used by userspace. + unsigned int nfcache; + + // The number of bytes between the start of this ipt_entry struct and the + // rule's target. + uint16_t target_offset; + + // The total size of this rule, from the beginning of the entry to the end of + // the target. + uint16_t next_offset; + + // A return pointer not used by userspace. + unsigned int comefrom; + + // Counters for packets and bytes, which we don't yet implement. + struct xt_counters counters; + + // The data for all this rules matches followed by the target. This runs + // beyond the value of sizeof(struct ipt_entry). + unsigned char elems[0]; +}; + +// Passed to getsockopt(SO_GET_INFO). +struct ipt_getinfo { + // The name of the table. The user only fills this in, the rest is filled in + // when returning from getsockopt. Currently "nat" and "mangle" are supported. + char name[XT_TABLE_MAXNAMELEN]; + + // A bitmap of which hooks apply to the table. For example, a table with hooks + // PREROUTING and FORWARD has the value + // (1 << NF_IP_PRE_REOUTING) | (1 << NF_IP_FORWARD). + unsigned int valid_hooks; + + // The offset into the entry table for each valid hook. The entry table is + // returned by getsockopt(SO_GET_ENTRIES). + unsigned int hook_entry[NF_IP_NUMHOOKS]; + + // For each valid hook, the underflow is the offset into the entry table to + // jump to in case traversing the table yields no verdict (although I have no + // clue how that could happen - builtin chains always end with a policy, and + // user-defined chains always end with a RETURN. + // + // The entry referred to must be an "unconditional" entry, meaning it has no + // matches, specifies no IP criteria, and either DROPs or ACCEPTs packets. It + // basically has to be capable of making a definitive decision no matter what + // it's passed. + unsigned int underflow[NF_IP_NUMHOOKS]; + + // The number of entries in the entry table returned by + // getsockopt(SO_GET_ENTRIES). + unsigned int num_entries; + + // The size of the entry table returned by getsockopt(SO_GET_ENTRIES). + unsigned int size; +}; + +// Passed to getsockopt(SO_GET_ENTRIES). +struct ipt_get_entries { + // The name of the table. The user fills this in. Currently "nat" and "mangle" + // are supported. + char name[XT_TABLE_MAXNAMELEN]; + + // The size of the entry table in bytes. The user fills this in with the value + // from struct ipt_getinfo.size. + unsigned int size; + + // The entries for the given table. This will run past the size defined by + // sizeof(struct ipt_get_entries). + struct ipt_entry entrytable[0]; +}; + +// Passed to setsockopt(SO_SET_REPLACE). +struct ipt_replace { + // The name of the table. + char name[XT_TABLE_MAXNAMELEN]; + + // The same as struct ipt_getinfo.valid_hooks. Users don't change this. + unsigned int valid_hooks; + + // The same as struct ipt_getinfo.num_entries. + unsigned int num_entries; + + // The same as struct ipt_getinfo.size. + unsigned int size; + + // The same as struct ipt_getinfo.hook_entry. + unsigned int hook_entry[NF_IP_NUMHOOKS]; + + // The same as struct ipt_getinfo.underflow. + unsigned int underflow[NF_IP_NUMHOOKS]; + + // The number of counters, which should equal the number of entries. + unsigned int num_counters; + + // The unchanged values from each ipt_entry's counters. + struct xt_counters* counters; + + // The entries to write to the table. This will run past the size defined by + // sizeof(srtuct ipt_replace); + struct ipt_entry entries[0]; +}; + +#endif // GVISOR_TEST_SYSCALLS_IPTABLES_TYPES_H_ diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc new file mode 100644 index 000000000..e397d5f57 --- /dev/null +++ b/test/syscalls/linux/itimer.cc @@ -0,0 +1,366 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> + +#include <atomic> +#include <functional> +#include <iostream> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr char kSIGALRMToMainThread[] = "--itimer_sigarlm_to_main_thread"; +constexpr char kSIGPROFFairnessActive[] = "--itimer_sigprof_fairness_active"; +constexpr char kSIGPROFFairnessIdle[] = "--itimer_sigprof_fairness_idle"; + +// Time period to be set for the itimers. +constexpr absl::Duration kPeriod = absl::Milliseconds(25); +// Total amount of time to spend per thread. +constexpr absl::Duration kTestDuration = absl::Seconds(20); +// Amount of spin iterations to perform as the minimum work item per thread. +// Chosen to be sub-millisecond range. +constexpr int kIterations = 10000000; +// Allow deviation in the number of samples. +constexpr double kNumSamplesDeviationRatio = 0.2; + +TEST(ItimerTest, ItimervalUpdatedBeforeExpiration) { + constexpr int kSleepSecs = 10; + constexpr int kAlarmSecs = 15; + static_assert( + kSleepSecs < kAlarmSecs, + "kSleepSecs must be less than kAlarmSecs for the test to be meaningful"); + constexpr int kMaxRemainingSecs = kAlarmSecs - kSleepSecs; + + // Install a no-op handler for SIGALRM. + struct sigaction sa = {}; + sigfillset(&sa.sa_mask); + sa.sa_handler = +[](int signo) {}; + auto const cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Set an itimer-based alarm for kAlarmSecs from now. + struct itimerval itv = {}; + itv.it_value.tv_sec = kAlarmSecs; + auto const cleanup_itimer = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv)); + + // After sleeping for kSleepSecs, the itimer value should reflect the elapsed + // time even if it hasn't expired. + absl::SleepFor(absl::Seconds(kSleepSecs)); + ASSERT_THAT(getitimer(ITIMER_REAL, &itv), SyscallSucceeds()); + EXPECT_TRUE( + itv.it_value.tv_sec < kMaxRemainingSecs || + (itv.it_value.tv_sec == kMaxRemainingSecs && itv.it_value.tv_usec == 0)) + << "Remaining time: " << itv.it_value.tv_sec << " seconds + " + << itv.it_value.tv_usec << " microseconds"; +} + +ABSL_CONST_INIT static thread_local std::atomic_int signal_test_num_samples = + ATOMIC_VAR_INIT(0); + +void SignalTestSignalHandler(int /*signum*/) { signal_test_num_samples++; } + +struct SignalTestResult { + int expected_total; + int main_thread_samples; + std::vector<int> worker_samples; +}; + +std::ostream& operator<<(std::ostream& os, const SignalTestResult& r) { + os << "{expected_total: " << r.expected_total + << ", main_thread_samples: " << r.main_thread_samples + << ", worker_samples: ["; + bool first = true; + for (int sample : r.worker_samples) { + if (!first) { + os << ", "; + } + os << sample; + first = false; + } + os << "]}"; + return os; +} + +// Starts two worker threads and itimer id and measures the number of signal +// delivered to each thread. +SignalTestResult ItimerSignalTest(int id, clock_t main_clock, + clock_t worker_clock, int signal, + absl::Duration sleep) { + signal_test_num_samples = 0; + + struct sigaction sa = {}; + sa.sa_handler = &SignalTestSignalHandler; + sa.sa_flags = SA_RESTART; + sigemptyset(&sa.sa_mask); + auto sigaction_cleanup = ScopedSigaction(signal, sa).ValueOrDie(); + + int socketfds[2]; + TEST_PCHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, socketfds) == 0); + + // Do the spinning in the workers. + std::function<void*(int)> work = [&](int socket_fd) { + FileDescriptor fd(socket_fd); + + absl::Time finish = Now(worker_clock) + kTestDuration; + while (Now(worker_clock) < finish) { + // Blocked on read. + char c; + RetryEINTR(read)(fd.get(), &c, 1); + for (int i = 0; i < kIterations; i++) { + // Ensure compiler won't optimize this loop away. + asm(""); + } + + if (sleep != absl::ZeroDuration()) { + // Sleep so that the entire process is idle for a while. + absl::SleepFor(sleep); + } + + // Unblock the other thread. + RetryEINTR(write)(fd.get(), &c, 1); + } + + return reinterpret_cast<void*>(signal_test_num_samples.load()); + }; + + ScopedThread th1( + static_cast<std::function<void*()>>(std::bind(work, socketfds[0]))); + ScopedThread th2( + static_cast<std::function<void*()>>(std::bind(work, socketfds[1]))); + + absl::Time start = Now(main_clock); + // Start the timer. + struct itimerval timer = {}; + timer.it_value = absl::ToTimeval(kPeriod); + timer.it_interval = absl::ToTimeval(kPeriod); + auto cleanup_itimer = ScopedItimer(id, timer).ValueOrDie(); + + // Unblock th1. + // + // N.B. th2 owns socketfds[1] but can't close it until it unblocks. + char c = 0; + TEST_CHECK(write(socketfds[1], &c, 1) == 1); + + SignalTestResult result; + + // Wait for the workers to be done and collect their sample counts. + result.worker_samples.push_back(reinterpret_cast<int64_t>(th1.Join())); + result.worker_samples.push_back(reinterpret_cast<int64_t>(th2.Join())); + cleanup_itimer.Release()(); + result.expected_total = (Now(main_clock) - start) / kPeriod; + result.main_thread_samples = signal_test_num_samples.load(); + + return result; +} + +int TestSIGALRMToMainThread() { + SignalTestResult result = + ItimerSignalTest(ITIMER_REAL, CLOCK_REALTIME, CLOCK_REALTIME, SIGALRM, + absl::ZeroDuration()); + + std::cerr << "result: " << result << std::endl; + + // ITIMER_REAL-generated SIGALRMs prefer to deliver to the thread group leader + // (but don't guarantee it), so we expect to see most samples on the main + // thread. + // + // The number of SIGALRMs delivered to a worker should not exceed 20% + // of the number of total signals expected (this is somewhat arbitrary). + const int worker_threshold = result.expected_total / 5; + + // + // Linux only guarantees timers will never expire before the requested time. + // Thus, we only check the upper bound and also it at least have one sample. + TEST_CHECK(result.main_thread_samples <= result.expected_total); + TEST_CHECK(result.main_thread_samples > 0); + for (int num : result.worker_samples) { + TEST_CHECK_MSG(num <= worker_threshold, "worker received too many samples"); + } + + return 0; +} + +// Random save/restore is disabled as it introduces additional latency and +// unpredictable distribution patterns. +TEST(ItimerTest, DeliversSIGALRMToMainThread_NoRandomSave) { + pid_t child; + int execve_errno; + auto kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGALRMToMainThread}, + {}, &child, &execve_errno)); + EXPECT_EQ(0, execve_errno); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + + // Not required anymore. + kill.Release(); + + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status; +} + +// Signals are delivered to threads fairly. +// +// sleep indicates how long to sleep worker threads each iteration to make the +// entire process idle. +int TestSIGPROFFairness(absl::Duration sleep) { + SignalTestResult result = + ItimerSignalTest(ITIMER_PROF, CLOCK_PROCESS_CPUTIME_ID, + CLOCK_THREAD_CPUTIME_ID, SIGPROF, sleep); + + std::cerr << "result: " << result << std::endl; + + // The number of samples on the main thread should be very low as it did + // nothing. + TEST_CHECK(result.main_thread_samples < 80); + + // Both workers should get roughly equal number of samples. + TEST_CHECK(result.worker_samples.size() == 2); + + TEST_CHECK(result.expected_total > 0); + + // In an ideal world each thread would get exactly 50% of the signals, + // but since that's unlikely to happen we allow for them to get no less than + // kNumSamplesDeviationRatio of the total observed samples. + TEST_CHECK_MSG(std::abs(result.worker_samples[0] - result.worker_samples[1]) < + ((result.worker_samples[0] + result.worker_samples[1]) * + kNumSamplesDeviationRatio), + "one worker received disproportionate share of samples"); + + return 0; +} + +// Random save/restore is disabled as it introduces additional latency and +// unpredictable distribution patterns. +TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyActive_NoRandomSave) { + // On the KVM and ptrace platforms, switches between sentry and application + // context are sometimes extremely slow, causing the itimer to send SIGPROF to + // a thread that either already has one pending or has had SIGPROF delivered, + // but hasn't handled it yet (and thus therefore still has SIGPROF masked). In + // either case, since itimer signals are group-directed, signal sending falls + // back to notifying the thread group leader. ItimerSignalTest() fails if "too + // many" signals are delivered to the thread group leader, so these tests are + // flaky on these platforms. + // + // TODO(b/143247272): Clarify why context switches are so slow on KVM. + const auto gvisor_platform = GvisorPlatform(); + SKIP_IF(gvisor_platform == Platform::kKVM || + gvisor_platform == Platform::kPtrace); + + pid_t child; + int execve_errno; + auto kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGPROFFairnessActive}, + {}, &child, &execve_errno)); + EXPECT_EQ(0, execve_errno); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + + // Not required anymore. + kill.Release(); + + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +// Random save/restore is disabled as it introduces additional latency and +// unpredictable distribution patterns. +TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyIdle_NoRandomSave) { + // See comment in DeliversSIGPROFToThreadsRoughlyFairlyActive. + const auto gvisor_platform = GvisorPlatform(); + SKIP_IF(gvisor_platform == Platform::kKVM || + gvisor_platform == Platform::kPtrace); + + pid_t child; + int execve_errno; + auto kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGPROFFairnessIdle}, + {}, &child, &execve_errno)); + EXPECT_EQ(0, execve_errno); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + + // Not required anymore. + kill.Release(); + + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +} // namespace +} // namespace testing +} // namespace gvisor + +namespace { +void MaskSIGPIPE() { + // Always mask SIGPIPE as it's common and tests aren't expected to handle it. + // We don't take the TestInit() path so we must do this manually. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + TEST_CHECK(sigaction(SIGPIPE, &sa, nullptr) == 0); +} +} // namespace + +int main(int argc, char** argv) { + // These tests require no background threads, so check for them before + // TestInit. + for (int i = 0; i < argc; i++) { + absl::string_view arg(argv[i]); + + if (arg == gvisor::testing::kSIGALRMToMainThread) { + MaskSIGPIPE(); + return gvisor::testing::TestSIGALRMToMainThread(); + } + if (arg == gvisor::testing::kSIGPROFFairnessActive) { + MaskSIGPIPE(); + return gvisor::testing::TestSIGPROFFairness(absl::ZeroDuration()); + } + if (arg == gvisor::testing::kSIGPROFFairnessIdle) { + MaskSIGPIPE(); + // Sleep time > ClockTick (10ms) exercises sleeping gVisor's + // kernel.cpuClockTicker. + return gvisor::testing::TestSIGPROFFairness(absl::Milliseconds(25)); + } + } + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/kill.cc b/test/syscalls/linux/kill.cc new file mode 100644 index 000000000..db29bd59c --- /dev/null +++ b/test/syscalls/linux/kill.cc @@ -0,0 +1,383 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <cerrno> +#include <csignal> + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +ABSL_FLAG(int32_t, scratch_uid, 65534, "scratch UID"); +ABSL_FLAG(int32_t, scratch_gid, 65534, "scratch GID"); + +using ::testing::Ge; + +namespace gvisor { +namespace testing { + +namespace { + +TEST(KillTest, CanKillValidPid) { + // If pid is positive, then signal sig is sent to the process with the ID + // specified by pid. + EXPECT_THAT(kill(getpid(), 0), SyscallSucceeds()); + // If pid equals 0, then sig is sent to every process in the process group of + // the calling process. + EXPECT_THAT(kill(0, 0), SyscallSucceeds()); + + ScopedThread([] { EXPECT_THAT(kill(gettid(), 0), SyscallSucceeds()); }); +} + +void SigHandler(int sig, siginfo_t* info, void* context) { _exit(0); } + +// If pid equals -1, then sig is sent to every process for which the calling +// process has permission to send signals, except for process 1 (init). +TEST(KillTest, CanKillAllPIDs) { + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + FileDescriptor read_fd(pipe_fds[0]); + FileDescriptor write_fd(pipe_fds[1]); + + pid_t pid = fork(); + if (pid == 0) { + read_fd.reset(); + + struct sigaction sa; + sa.sa_sigaction = SigHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + TEST_PCHECK(sigaction(SIGWINCH, &sa, nullptr) == 0); + MaybeSave(); + + // Indicate to the parent that we're ready. + write_fd.reset(); + + // Wait until we get the signal from the parent. + while (true) { + pause(); + } + } + + ASSERT_THAT(pid, SyscallSucceeds()); + + write_fd.reset(); + + // Wait for the child to indicate that it's unmasked the signal by closing + // the write end. + char buf; + ASSERT_THAT(ReadFd(read_fd.get(), &buf, 1), SyscallSucceedsWithValue(0)); + + // Signal the child and wait for it to die with status 0, indicating that + // it got the expected signal. + EXPECT_THAT(kill(-1, SIGWINCH), SyscallSucceeds()); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(KillTest, CannotKillInvalidPID) { + // We need an unused pid to verify that kill fails when given one. + // + // There is no way to guarantee that a PID is unused, but the PID of a + // recently exited process likely won't be reused soon. + pid_t fake_pid = fork(); + if (fake_pid == 0) { + _exit(0); + } + + ASSERT_THAT(fake_pid, SyscallSucceeds()); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(fake_pid, &status, 0), + SyscallSucceedsWithValue(fake_pid)); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + EXPECT_THAT(kill(fake_pid, 0), SyscallFailsWithErrno(ESRCH)); +} + +TEST(KillTest, CannotUseInvalidSignal) { + EXPECT_THAT(kill(getpid(), 200), SyscallFailsWithErrno(EINVAL)); +} + +TEST(KillTest, CanKillRemoteProcess) { + pid_t pid = fork(); + if (pid == 0) { + while (true) { + pause(); + } + } + + ASSERT_THAT(pid, SyscallSucceeds()); + + EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds()); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(SIGKILL, WTERMSIG(status)); +} + +TEST(KillTest, CanKillOwnProcess) { + EXPECT_THAT(kill(getpid(), 0), SyscallSucceeds()); +} + +// Verify that you can kill a process even using a tid from a thread other than +// the group leader. +TEST(KillTest, CannotKillTid) { + pid_t tid; + bool tid_available = false; + bool finished = false; + absl::Mutex mu; + ScopedThread t([&] { + mu.Lock(); + tid = gettid(); + tid_available = true; + mu.Await(absl::Condition(&finished)); + mu.Unlock(); + }); + mu.LockWhen(absl::Condition(&tid_available)); + EXPECT_THAT(kill(tid, 0), SyscallSucceeds()); + finished = true; + mu.Unlock(); +} + +TEST(KillTest, SetPgid) { + for (int i = 0; i < 10; i++) { + // The following in the normal pattern for creating a new process group. + // Both the parent and child process will call setpgid in order to avoid any + // race conditions. We do this ten times to catch races. + pid_t pid = fork(); + if (pid == 0) { + setpgid(0, 0); + while (true) { + pause(); + } + } + + ASSERT_THAT(pid, SyscallSucceeds()); + + // Set the child's group and exit. + ASSERT_THAT(setpgid(pid, pid), SyscallSucceeds()); + EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(-pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(SIGKILL, WTERMSIG(status)); + } +} + +TEST(KillTest, ProcessGroups) { + // Fork a new child. + // + // other_child is used as a placeholder process. We use this PID as our "does + // not exist" process group to ensure some amount of safety. (It is still + // possible to violate this assumption, but extremely unlikely.) + pid_t child = fork(); + if (child == 0) { + while (true) { + pause(); + } + } + ASSERT_THAT(child, SyscallSucceeds()); + + pid_t other_child = fork(); + if (other_child == 0) { + while (true) { + pause(); + } + } + ASSERT_THAT(other_child, SyscallSucceeds()); + + // Ensure the kill does not succeed without the new group. + EXPECT_THAT(kill(-child, SIGKILL), SyscallFailsWithErrno(ESRCH)); + + // Put the child in its own process group. + ASSERT_THAT(setpgid(child, child), SyscallSucceeds()); + + // This should be not allowed: you can only create a new group with the same + // id or join an existing one. The other_child group should not exist. + ASSERT_THAT(setpgid(child, other_child), SyscallFailsWithErrno(EPERM)); + + // Done with other_child; kill it. + EXPECT_THAT(kill(other_child, SIGKILL), SyscallSucceeds()); + int status; + EXPECT_THAT(RetryEINTR(waitpid)(other_child, &status, 0), SyscallSucceeds()); + + // Linux returns success for the no-op call. + ASSERT_THAT(setpgid(child, child), SyscallSucceeds()); + + // Kill the child's process group. + ASSERT_THAT(kill(-child, SIGKILL), SyscallSucceeds()); + + // Wait on the process group; ensure that the signal was as expected. + EXPECT_THAT(RetryEINTR(waitpid)(-child, &status, 0), + SyscallSucceedsWithValue(child)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(SIGKILL, WTERMSIG(status)); + + // Try to kill the process group again; ensure that the wait fails. + EXPECT_THAT(kill(-child, SIGKILL), SyscallFailsWithErrno(ESRCH)); + EXPECT_THAT(RetryEINTR(waitpid)(-child, &status, 0), + SyscallFailsWithErrno(ECHILD)); +} + +TEST(KillTest, ChildDropsPrivsCannotKill) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + const int uid = absl::GetFlag(FLAGS_scratch_uid); + const int gid = absl::GetFlag(FLAGS_scratch_gid); + + // Create the child that drops privileges and tries to kill the parent. + pid_t pid = fork(); + if (pid == 0) { + TEST_PCHECK(setresgid(gid, gid, gid) == 0); + MaybeSave(); + + TEST_PCHECK(setresuid(uid, uid, uid) == 0); + MaybeSave(); + + // setresuid should have dropped CAP_KILL. Make sure. + TEST_CHECK(!HaveCapability(CAP_KILL).ValueOrDie()); + + // Try to kill parent with every signal-sending syscall possible. + pid_t parent = getppid(); + + TEST_CHECK(kill(parent, SIGKILL) < 0); + TEST_PCHECK_MSG(errno == EPERM, "kill failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(tgkill(parent, parent, SIGKILL) < 0); + TEST_PCHECK_MSG(errno == EPERM, "tgkill failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(syscall(SYS_tkill, parent, SIGKILL) < 0); + TEST_PCHECK_MSG(errno == EPERM, "tkill failed with wrong errno"); + MaybeSave(); + + siginfo_t uinfo; + uinfo.si_code = -1; // SI_QUEUE (allowed). + + TEST_CHECK(syscall(SYS_rt_sigqueueinfo, parent, SIGKILL, &uinfo) < 0); + TEST_PCHECK_MSG(errno == EPERM, "rt_sigqueueinfo failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(syscall(SYS_rt_tgsigqueueinfo, parent, parent, SIGKILL, &uinfo) < + 0); + TEST_PCHECK_MSG(errno == EPERM, "rt_sigqueueinfo failed with wrong errno"); + MaybeSave(); + + _exit(0); + } + + ASSERT_THAT(pid, SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; +} + +TEST(KillTest, CanSIGCONTSameSession) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + pid_t stopped_child = fork(); + if (stopped_child == 0) { + raise(SIGSTOP); + _exit(0); + } + + ASSERT_THAT(stopped_child, SyscallSucceeds()); + + // Put the child in its own process group. The child and parent process + // groups also share a session. + ASSERT_THAT(setpgid(stopped_child, stopped_child), SyscallSucceeds()); + + // Make sure child stopped. + int status; + EXPECT_THAT(RetryEINTR(waitpid)(stopped_child, &status, WUNTRACED), + SyscallSucceedsWithValue(stopped_child)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << "status " << status; + + const int uid = absl::GetFlag(FLAGS_scratch_uid); + const int gid = absl::GetFlag(FLAGS_scratch_gid); + + // Drop privileges only in child process, or else this parent process won't be + // able to open some log files after the test ends. + pid_t other_child = fork(); + if (other_child == 0) { + // Drop privileges. + TEST_PCHECK(setresgid(gid, gid, gid) == 0); + MaybeSave(); + + TEST_PCHECK(setresuid(uid, uid, uid) == 0); + MaybeSave(); + + // setresuid should have dropped CAP_KILL. + TEST_CHECK(!HaveCapability(CAP_KILL).ValueOrDie()); + + // Child 2 and child should now not share a thread group and any UIDs. + // Child 2 should have no privileges. That means any signal other than + // SIGCONT should fail. + TEST_CHECK(kill(stopped_child, SIGKILL) < 0); + TEST_PCHECK_MSG(errno == EPERM, "kill failed with wrong errno"); + MaybeSave(); + + TEST_PCHECK(kill(stopped_child, SIGCONT) == 0); + MaybeSave(); + + _exit(0); + } + + ASSERT_THAT(stopped_child, SyscallSucceeds()); + + // Make sure child exited normally. + EXPECT_THAT(RetryEINTR(waitpid)(stopped_child, &status, 0), + SyscallSucceedsWithValue(stopped_child)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; + + // Make sure other_child exited normally. + EXPECT_THAT(RetryEINTR(waitpid)(other_child, &status, 0), + SyscallSucceedsWithValue(other_child)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/link.cc b/test/syscalls/linux/link.cc new file mode 100644 index 000000000..544681168 --- /dev/null +++ b/test/syscalls/linux/link.cc @@ -0,0 +1,305 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/strings/str_cat.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +ABSL_FLAG(int32_t, scratch_uid, 65534, "scratch UID"); + +namespace gvisor { +namespace testing { + +namespace { + +// IsSameFile returns true if both filenames have the same device and inode. +bool IsSameFile(const std::string& f1, const std::string& f2) { + // Use lstat rather than stat, so that symlinks are not followed. + struct stat stat1 = {}; + EXPECT_THAT(lstat(f1.c_str(), &stat1), SyscallSucceeds()); + struct stat stat2 = {}; + EXPECT_THAT(lstat(f2.c_str(), &stat2), SyscallSucceeds()); + + return stat1.st_dev == stat2.st_dev && stat1.st_ino == stat2.st_ino; +} + +TEST(LinkTest, CanCreateLinkFile) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string newname = NewTempAbsPath(); + + // Get the initial link count. + uint64_t initial_link_count = + ASSERT_NO_ERRNO_AND_VALUE(Links(oldfile.path())); + + EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()), SyscallSucceeds()); + + EXPECT_TRUE(IsSameFile(oldfile.path(), newname)); + + // Link count should be incremented. + EXPECT_THAT(Links(oldfile.path()), + IsPosixErrorOkAndHolds(initial_link_count + 1)); + + // Delete the link. + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + + // Link count should be back to initial. + EXPECT_THAT(Links(oldfile.path()), + IsPosixErrorOkAndHolds(initial_link_count)); +} + +TEST(LinkTest, PermissionDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); + + // Make the file "unsafe" to link by making it only readable, but not + // writable. + const auto unwriteable_file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0400)); + const std::string special_path = NewTempAbsPath(); + ASSERT_THAT(mkfifo(special_path.c_str(), 0666), SyscallSucceeds()); + const auto setuid_file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666 | S_ISUID)); + + const std::string newname = NewTempAbsPath(); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + // Also drops capabilities. + EXPECT_THAT(syscall(SYS_setuid, absl::GetFlag(FLAGS_scratch_uid)), + SyscallSucceeds()); + + EXPECT_THAT(link(unwriteable_file.path().c_str(), newname.c_str()), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(link(special_path.c_str(), newname.c_str()), + SyscallFailsWithErrno(EPERM)); + if (!IsRunningWithVFS1()) { + EXPECT_THAT(link(setuid_file.path().c_str(), newname.c_str()), + SyscallFailsWithErrno(EPERM)); + } + }); +} + +TEST(LinkTest, CannotLinkDirectory) { + auto olddir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string newdir = NewTempAbsPath(); + + EXPECT_THAT(link(olddir.path().c_str(), newdir.c_str()), + SyscallFailsWithErrno(EPERM)); + + EXPECT_THAT(rmdir(olddir.path().c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, CannotLinkWithSlash) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + // Put a final "/" on newname. + const std::string newname = absl::StrCat(NewTempAbsPath(), "/"); + + EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(LinkTest, OldnameIsEmpty) { + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT)); +} + +TEST(LinkTest, OldnameDoesNotExist) { + const std::string oldname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT)); +} + +TEST(LinkTest, NewnameCannotExist) { + const std::string newname = + JoinPath(GetAbsoluteTestTmpdir(), "thisdoesnotexist", "foo"); + EXPECT_THAT(link("/thisdoesnotmatter", newname.c_str()), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(LinkTest, WithOldDirFD) { + const std::string oldname_parent = NewTempAbsPath(); + const std::string oldname_base = "child"; + const std::string oldname = JoinPath(oldname_parent, oldname_base); + const std::string newname = NewTempAbsPath(); + + // Create oldname_parent directory, and get an FD. + ASSERT_THAT(mkdir(oldname_parent.c_str(), 0777), SyscallSucceeds()); + const FileDescriptor oldname_parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(oldname_parent, O_DIRECTORY | O_RDONLY)); + + // Create oldname file. + const FileDescriptor oldname_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(oldname, O_CREAT | O_RDWR, 0666)); + + // Link oldname to newname, using oldname_parent_fd. + EXPECT_THAT(linkat(oldname_parent_fd.get(), oldname_base.c_str(), AT_FDCWD, + newname.c_str(), 0), + SyscallSucceeds()); + + EXPECT_TRUE(IsSameFile(oldname, newname)); + + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds()); + EXPECT_THAT(rmdir(oldname_parent.c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, BogusFlags) { + ASSERT_THAT(linkat(1, "foo", 2, "bar", 3), SyscallFailsWithErrno(EINVAL)); +} + +TEST(LinkTest, WithNewDirFD) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string newname_parent = NewTempAbsPath(); + const std::string newname_base = "child"; + const std::string newname = JoinPath(newname_parent, newname_base); + + // Create newname_parent directory, and get an FD. + EXPECT_THAT(mkdir(newname_parent.c_str(), 0777), SyscallSucceeds()); + const FileDescriptor newname_parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(newname_parent, O_DIRECTORY | O_RDONLY)); + + // Link newname to oldfile, using newname_parent_fd. + EXPECT_THAT(linkat(AT_FDCWD, oldfile.path().c_str(), newname_parent_fd.get(), + newname.c_str(), 0), + SyscallSucceeds()); + + EXPECT_TRUE(IsSameFile(oldfile.path(), newname)); + + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + EXPECT_THAT(rmdir(newname_parent.c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, RelPathsWithNonDirFDs) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Create a file that will be passed as the directory fd for old/new names. + const std::string filename = NewTempAbsPath(); + const FileDescriptor file_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0666)); + + // Using file_fd as olddirfd will fail. + EXPECT_THAT(linkat(file_fd.get(), "foo", AT_FDCWD, "bar", 0), + SyscallFailsWithErrno(ENOTDIR)); + + // Using file_fd as newdirfd will fail. + EXPECT_THAT(linkat(AT_FDCWD, oldfile.path().c_str(), file_fd.get(), "bar", 0), + SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(LinkTest, AbsPathsWithNonDirFDs) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string newname = NewTempAbsPath(); + + // Create a file that will be passed as the directory fd for old/new names. + const std::string filename = NewTempAbsPath(); + const FileDescriptor file_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0666)); + + // Using file_fd as the dirfds is OK as long as paths are absolute. + EXPECT_THAT(linkat(file_fd.get(), oldfile.path().c_str(), file_fd.get(), + newname.c_str(), 0), + SyscallSucceeds()); +} + +TEST(LinkTest, LinkDoesNotFollowSymlinks) { + // Create oldfile, and oldsymlink which points to it. + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string oldsymlink = NewTempAbsPath(); + EXPECT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()), + SyscallSucceeds()); + + // Now hard link newname to oldsymlink. + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(link(oldsymlink.c_str(), newname.c_str()), SyscallSucceeds()); + + // The link should not have resolved the symlink, so newname and oldsymlink + // are the same. + EXPECT_TRUE(IsSameFile(oldsymlink, newname)); + EXPECT_FALSE(IsSameFile(oldfile.path(), newname)); + + EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, LinkatDoesNotFollowSymlinkByDefault) { + // Create oldfile, and oldsymlink which points to it. + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string oldsymlink = NewTempAbsPath(); + EXPECT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()), + SyscallSucceeds()); + + // Now hard link newname to oldsymlink. + const std::string newname = NewTempAbsPath(); + EXPECT_THAT( + linkat(AT_FDCWD, oldsymlink.c_str(), AT_FDCWD, newname.c_str(), 0), + SyscallSucceeds()); + + // The link should not have resolved the symlink, so newname and oldsymlink + // are the same. + EXPECT_TRUE(IsSameFile(oldsymlink, newname)); + EXPECT_FALSE(IsSameFile(oldfile.path(), newname)); + + EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, LinkatWithSymlinkFollow) { + // Create oldfile, and oldsymlink which points to it. + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string oldsymlink = NewTempAbsPath(); + ASSERT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()), + SyscallSucceeds()); + + // Now hard link newname to oldsymlink, and pass AT_SYMLINK_FOLLOW flag. + const std::string newname = NewTempAbsPath(); + ASSERT_THAT(linkat(AT_FDCWD, oldsymlink.c_str(), AT_FDCWD, newname.c_str(), + AT_SYMLINK_FOLLOW), + SyscallSucceeds()); + + // The link should have resolved the symlink, so oldfile and newname are the + // same. + EXPECT_TRUE(IsSameFile(oldfile.path(), newname)); + EXPECT_FALSE(IsSameFile(oldsymlink, newname)); + + EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/lseek.cc b/test/syscalls/linux/lseek.cc new file mode 100644 index 000000000..6ce1e6cc3 --- /dev/null +++ b/test/syscalls/linux/lseek.cc @@ -0,0 +1,202 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(LseekTest, InvalidWhence) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + ASSERT_THAT(lseek(fd.get(), 0, -1), SyscallFailsWithErrno(EINVAL)); +} + +TEST(LseekTest, NegativeOffset) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + EXPECT_THAT(lseek(fd.get(), -(kFileData.length() + 1), SEEK_CUR), + SyscallFailsWithErrno(EINVAL)); +} + +// A 32-bit off_t is not large enough to represent an offset larger than +// maximum file size on standard file systems, so it isn't possible to cause +// overflow. +#if defined(__x86_64__) || defined(__aarch64__) +TEST(LseekTest, Overflow) { + // HA! Classic Linux. We really should have an EOVERFLOW + // here, since we're seeking to something that cannot be + // represented.. but instead we are given an EINVAL. + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + EXPECT_THAT(lseek(fd.get(), 0x7fffffffffffffff, SEEK_END), + SyscallFailsWithErrno(EINVAL)); +} +#endif + +TEST(LseekTest, Set) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + char buf = '\0'; + EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[0]); + EXPECT_THAT(lseek(fd.get(), 6, SEEK_SET), SyscallSucceedsWithValue(6)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[6]); +} + +TEST(LseekTest, Cur) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + char buf = '\0'; + EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[0]); + EXPECT_THAT(lseek(fd.get(), 3, SEEK_CUR), SyscallSucceedsWithValue(4)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[4]); +} + +TEST(LseekTest, End) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + char buf = '\0'; + EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[0]); + EXPECT_THAT(lseek(fd.get(), -2, SEEK_END), SyscallSucceedsWithValue(10)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[kFileData.length() - 2]); +} + +TEST(LseekTest, InvalidFD) { + EXPECT_THAT(lseek(-1, 0, SEEK_SET), SyscallFailsWithErrno(EBADF)); +} + +TEST(LseekTest, DirCurEnd) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/tmp", O_RDONLY)); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); +} + +TEST(LseekTest, ProcDir) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self", O_RDONLY)); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(LseekTest, ProcFile) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/meminfo", O_RDONLY)); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL)); +} + +TEST(LseekTest, SysDir) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/sys/devices", O_RDONLY)); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(LseekTest, SeekCurrentDir) { + // From include/linux/fs.h. + constexpr loff_t MAX_LFS_FILESIZE = 0x7fffffffffffffff; + + char* dir = get_current_dir_name(); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir, O_RDONLY)); + + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), + // Some filesystems (like ext4) allow lseek(SEEK_END) on a + // directory and return MAX_LFS_FILESIZE, others return EINVAL. + AnyOf(SyscallSucceedsWithValue(MAX_LFS_FILESIZE), + SyscallFailsWithErrno(EINVAL))); + free(dir); +} + +TEST(LseekTest, ProcStatTwice) { + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY)); + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY)); + + ASSERT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + ASSERT_THAT(lseek(fd1.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceeds()); + // Check that just because we moved fd1, fd2 doesn't move. + ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + const FileDescriptor fd3 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY)); + ASSERT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); +} + +TEST(LseekTest, EtcPasswdDup) { + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/etc/passwd", O_RDONLY)); + const FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(fd1.Dup()); + + ASSERT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + ASSERT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceeds()); + // Check that just because we moved fd1, fd2 doesn't move. + ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(1000)); + + const FileDescriptor fd3 = ASSERT_NO_ERRNO_AND_VALUE(fd1.Dup()); + ASSERT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(1000)); +} + +// TODO(magi): Add tests where we have donated in sockets. + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/madvise.cc b/test/syscalls/linux/madvise.cc new file mode 100644 index 000000000..5a1973f60 --- /dev/null +++ b/test/syscalls/linux/madvise.cc @@ -0,0 +1,251 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void ExpectAllMappingBytes(Mapping const& m, char c) { + auto const v = m.view(); + for (size_t i = 0; i < kPageSize; i++) { + ASSERT_EQ(v[i], c) << "at offset " << i; + } +} + +// Equivalent to ExpectAllMappingBytes but async-signal-safe and with less +// helpful failure messages. +void CheckAllMappingBytes(Mapping const& m, char c) { + auto const v = m.view(); + for (size_t i = 0; i < kPageSize; i++) { + TEST_CHECK_MSG(v[i] == c, "mapping contains wrong value"); + } +} + +TEST(MadviseDontneedTest, ZerosPrivateAnonPage) { + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + ExpectAllMappingBytes(m, 0); + memset(m.ptr(), 1, m.len()); + ExpectAllMappingBytes(m, 1); + ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); + ExpectAllMappingBytes(m, 0); +} + +TEST(MadviseDontneedTest, ZerosCOWAnonPageInCallerOnly) { + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + ExpectAllMappingBytes(m, 0); + memset(m.ptr(), 2, m.len()); + ExpectAllMappingBytes(m, 2); + + // Do madvise in a child process. + pid_t pid = fork(); + CheckAllMappingBytes(m, 2); + if (pid == 0) { + TEST_PCHECK(madvise(m.ptr(), m.len(), MADV_DONTNEED) == 0); + CheckAllMappingBytes(m, 0); + _exit(0); + } + + ASSERT_THAT(pid, SyscallSucceeds()); + + int status = 0; + ASSERT_THAT(waitpid(-1, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(WEXITSTATUS(status), 0); + // The child's madvise should not have affected the parent. + ExpectAllMappingBytes(m, 2); +} + +TEST(MadviseDontneedTest, DoesNotModifySharedAnonPage) { + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED)); + ExpectAllMappingBytes(m, 0); + memset(m.ptr(), 3, m.len()); + ExpectAllMappingBytes(m, 3); + ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); + ExpectAllMappingBytes(m, 3); +} + +TEST(MadviseDontneedTest, CleansPrivateFilePage) { + TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + /* parent = */ GetAbsoluteTestTmpdir(), + /* content = */ std::string(kPageSize, 4), TempPath::kDefaultFileMode)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + + Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0)); + ExpectAllMappingBytes(m, 4); + memset(m.ptr(), 5, m.len()); + ExpectAllMappingBytes(m, 5); + ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); + ExpectAllMappingBytes(m, 4); +} + +TEST(MadviseDontneedTest, DoesNotModifySharedFilePage) { + TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + /* parent = */ GetAbsoluteTestTmpdir(), + /* content = */ std::string(kPageSize, 6), TempPath::kDefaultFileMode)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + + Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0)); + ExpectAllMappingBytes(m, 6); + memset(m.ptr(), 7, m.len()); + ExpectAllMappingBytes(m, 7); + ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); + ExpectAllMappingBytes(m, 7); +} + +TEST(MadviseDontneedTest, IgnoresPermissions) { + auto m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE)); + EXPECT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); +} + +TEST(MadviseDontforkTest, AddressLength) { + auto m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE)); + char* addr = static_cast<char*>(m.ptr()); + + // Address must be page aligned. + EXPECT_THAT(madvise(addr + 1, kPageSize, MADV_DONTFORK), + SyscallFailsWithErrno(EINVAL)); + + // Zero length madvise always succeeds. + EXPECT_THAT(madvise(addr, 0, MADV_DONTFORK), SyscallSucceeds()); + + // Length must not roll over after rounding up. + size_t badlen = std::numeric_limits<std::size_t>::max() - (kPageSize / 2); + EXPECT_THAT(madvise(0, badlen, MADV_DONTFORK), SyscallFailsWithErrno(EINVAL)); + + // Length need not be page aligned - it is implicitly rounded up. + EXPECT_THAT(madvise(addr, 1, MADV_DONTFORK), SyscallSucceeds()); + EXPECT_THAT(madvise(addr, kPageSize, MADV_DONTFORK), SyscallSucceeds()); +} + +TEST(MadviseDontforkTest, DontforkShared) { + // Mmap two shared file-backed pages and MADV_DONTFORK the second page. + TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + /* parent = */ GetAbsoluteTestTmpdir(), + /* content = */ std::string(kPageSize * 2, 2), + TempPath::kDefaultFileMode)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + + Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0)); + + const Mapping ms1 = Mapping(reinterpret_cast<void*>(m.addr()), kPageSize); + const Mapping ms2 = + Mapping(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize); + m.release(); + + ASSERT_THAT(madvise(ms2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds()); + + const auto rest = [&] { + // First page is mapped in child and modifications are visible to parent + // via the shared mapping. + TEST_CHECK(IsMapped(ms1.addr())); + ExpectAllMappingBytes(ms1, 2); + memset(ms1.ptr(), 1, kPageSize); + ExpectAllMappingBytes(ms1, 1); + + // Second page must not be mapped in child. + TEST_CHECK(!IsMapped(ms2.addr())); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); + + ExpectAllMappingBytes(ms1, 1); // page contents modified by child. + ExpectAllMappingBytes(ms2, 2); // page contents unchanged. +} + +TEST(MadviseDontforkTest, DontforkAnonPrivate) { + // Mmap three anonymous pages and MADV_DONTFORK the middle page. + Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize * 3, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + const Mapping mp1 = Mapping(reinterpret_cast<void*>(m.addr()), kPageSize); + const Mapping mp2 = + Mapping(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize); + const Mapping mp3 = + Mapping(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), kPageSize); + m.release(); + + ASSERT_THAT(madvise(mp2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds()); + + // Verify that all pages are zeroed and memset the first, second and third + // pages to 1, 2, and 3 respectively. + ExpectAllMappingBytes(mp1, 0); + memset(mp1.ptr(), 1, kPageSize); + + ExpectAllMappingBytes(mp2, 0); + memset(mp2.ptr(), 2, kPageSize); + + ExpectAllMappingBytes(mp3, 0); + memset(mp3.ptr(), 3, kPageSize); + + const auto rest = [&] { + // Verify first page is mapped, verify its contents and then modify the + // page. The mapping is private so the modifications are not visible to + // the parent. + TEST_CHECK(IsMapped(mp1.addr())); + ExpectAllMappingBytes(mp1, 1); + memset(mp1.ptr(), 11, kPageSize); + ExpectAllMappingBytes(mp1, 11); + + // Verify second page is not mapped. + TEST_CHECK(!IsMapped(mp2.addr())); + + // Verify third page is mapped, verify its contents and then modify the + // page. The mapping is private so the modifications are not visible to + // the parent. + TEST_CHECK(IsMapped(mp3.addr())); + ExpectAllMappingBytes(mp3, 3); + memset(mp3.ptr(), 13, kPageSize); + ExpectAllMappingBytes(mp3, 13); + }; + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); + + // The fork and COW by child should not affect the parent mappings. + ExpectAllMappingBytes(mp1, 1); + ExpectAllMappingBytes(mp2, 2); + ExpectAllMappingBytes(mp3, 3); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc new file mode 100644 index 000000000..f8b7f7938 --- /dev/null +++ b/test/syscalls/linux/memfd.cc @@ -0,0 +1,557 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <linux/magic.h> +#include <linux/memfd.h> +#include <linux/unistd.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/statfs.h> +#include <sys/syscall.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +// The header sys/memfd.h isn't available on all systems, so redefining some of +// the constants here. +#define F_LINUX_SPECIFIC_BASE 1024 + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif /* F_ADD_SEALS */ + +#ifndef F_GET_SEALS +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif /* F_GET_SEALS */ + +#define F_SEAL_SEAL 0x0001 +#define F_SEAL_SHRINK 0x0002 +#define F_SEAL_GROW 0x0004 +#define F_SEAL_WRITE 0x0008 + +using ::testing::StartsWith; + +const std::string kMemfdName = "some-memfd"; + +int memfd_create(const std::string& name, unsigned int flags) { + return syscall(__NR_memfd_create, name.c_str(), flags); +} + +PosixErrorOr<FileDescriptor> MemfdCreate(const std::string& name, + uint32_t flags) { + int fd = memfd_create(name, flags); + if (fd < 0) { + return PosixError( + errno, absl::StrFormat("memfd_create(\"%s\", %#x)", name, flags)); + } + MaybeSave(); + return FileDescriptor(fd); +} + +// Procfs entries for memfds display the appropriate name. +TEST(MemfdTest, Name) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0)); + const std::string proc_name = ASSERT_NO_ERRNO_AND_VALUE( + ReadLink(absl::StrFormat("/proc/self/fd/%d", memfd.get()))); + EXPECT_THAT(proc_name, StartsWith("/memfd:" + kMemfdName)); +} + +// Memfds support read/write syscalls. +TEST(MemfdTest, WriteRead) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0)); + + // Write a random page of data to the memfd via write(2). + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Read back the same data and verify. + std::vector<char> buf2(kPageSize); + ASSERT_THAT(lseek(memfd.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(read(memfd.get(), buf2.data(), buf2.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(buf, buf2); +} + +// Memfds can be mapped and used as usual. +TEST(MemfdTest, Mmap) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0)); + const Mapping m1 = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0)); + + // Write a random page of data to the memfd via mmap m1. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds()); + memcpy(m1.ptr(), buf.data(), buf.size()); + + // Read the data back via a read syscall on the memfd. + std::vector<char> buf2(kPageSize); + EXPECT_THAT(read(memfd.get(), buf2.data(), buf2.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(buf, buf2); + + // The same data should be accessible via a new mapping m2. + const Mapping m2 = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0)); + EXPECT_EQ(0, memcmp(m1.ptr(), m2.ptr(), kPageSize)); +} + +TEST(MemfdTest, DuplicateFDsShareContent) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0)); + const Mapping m1 = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0)); + const FileDescriptor memfd2 = ASSERT_NO_ERRNO_AND_VALUE(memfd.Dup()); + + // Write a random page of data to the memfd via mmap m1. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds()); + memcpy(m1.ptr(), buf.data(), buf.size()); + + // Read the data back via a read syscall on a duplicate fd. + std::vector<char> buf2(kPageSize); + EXPECT_THAT(read(memfd2.get(), buf2.data(), buf2.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(buf, buf2); +} + +// File seals are disabled by default on memfds. +TEST(MemfdTest, SealingDisabledByDefault) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0)); + EXPECT_THAT(fcntl(memfd.get(), F_GET_SEALS), + SyscallSucceedsWithValue(F_SEAL_SEAL)); + // Attempting to set any seal should fail. + EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), + SyscallFailsWithErrno(EPERM)); +} + +// Seals can be retrieved and updated for memfds. +TEST(MemfdTest, SealsGetSet) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + int seals; + ASSERT_THAT(seals = fcntl(memfd.get(), F_GET_SEALS), SyscallSucceeds()); + // No seals are set yet. + EXPECT_EQ(0, seals); + + // Set a seal and check that we can get it back. + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds()); + EXPECT_THAT(fcntl(memfd.get(), F_GET_SEALS), + SyscallSucceedsWithValue(F_SEAL_WRITE)); + + // Set some more seals and verify. + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK), + SyscallSucceeds()); + EXPECT_THAT( + fcntl(memfd.get(), F_GET_SEALS), + SyscallSucceedsWithValue(F_SEAL_WRITE | F_SEAL_GROW | F_SEAL_SHRINK)); + + // Attempting to set a seal that is already set is a no-op. + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds()); + EXPECT_THAT( + fcntl(memfd.get(), F_GET_SEALS), + SyscallSucceedsWithValue(F_SEAL_WRITE | F_SEAL_GROW | F_SEAL_SHRINK)); + + // Add remaining seals and verify. + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_SEAL), SyscallSucceeds()); + EXPECT_THAT(fcntl(memfd.get(), F_GET_SEALS), + SyscallSucceedsWithValue(F_SEAL_WRITE | F_SEAL_GROW | + F_SEAL_SHRINK | F_SEAL_SEAL)); +} + +// F_SEAL_GROW prevents a memfd from being grown using ftruncate. +TEST(MemfdTest, SealGrowWithTruncate) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds()); + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds()); + + // Try grow the memfd by 1 page. + ASSERT_THAT(ftruncate(memfd.get(), kPageSize * 2), + SyscallFailsWithErrno(EPERM)); + + // Ftruncate calls that don't actually grow the memfd are allowed. + ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds()); + ASSERT_THAT(ftruncate(memfd.get(), kPageSize / 2), SyscallSucceeds()); + + // After shrinking, growing back is not allowed. + ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallFailsWithErrno(EPERM)); +} + +// F_SEAL_GROW prevents a memfd from being grown using the write syscall. +TEST(MemfdTest, SealGrowWithWrite) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + + // Initially, writing to the memfd succeeds. + const std::vector<char> buf(kPageSize); + EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Apply F_SEAL_GROW, subsequent writes which extend the memfd should fail. + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds()); + EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallFailsWithErrno(EPERM)); + + // However, zero-length writes are ok since they don't grow the memfd. + EXPECT_THAT(write(memfd.get(), buf.data(), 0), SyscallSucceeds()); + + // Writing to existing parts of the memfd is also ok. + ASSERT_THAT(lseek(memfd.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Returning the end of the file and writing still not allowed. + EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallFailsWithErrno(EPERM)); +} + +// F_SEAL_GROW causes writes which partially extend off the current EOF to +// partially succeed, up to the page containing the EOF. +TEST(MemfdTest, SealGrowPartialWriteTruncated) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds()); + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds()); + + // FD offset: 1 page, EOF: 1 page. + + ASSERT_THAT(lseek(memfd.get(), kPageSize * 3 / 4, SEEK_SET), + SyscallSucceeds()); + + // FD offset: 3/4 page. Writing a full page now should only write 1/4 page + // worth of data. This partially succeeds because the first page is entirely + // within the file and requires no growth, but attempting to write the final + // 3/4 page would require growing the file. + const std::vector<char> buf(kPageSize); + EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize / 4)); +} + +// F_SEAL_GROW causes writes which partially extend off the current EOF to fail +// in its entirety if the only data written would be to the page containing the +// EOF. +TEST(MemfdTest, SealGrowPartialWriteTruncatedSamePage) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + ASSERT_THAT(ftruncate(memfd.get(), kPageSize * 3 / 4), SyscallSucceeds()); + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds()); + + // EOF: 3/4 page, writing 1/2 page starting at 1/2 page would cause the file + // to grow. Since this would require only the page containing the EOF to be + // modified, the write is rejected entirely. + const std::vector<char> buf(kPageSize / 2); + EXPECT_THAT(pwrite(memfd.get(), buf.data(), buf.size(), kPageSize / 2), + SyscallFailsWithErrno(EPERM)); + + // However, writing up to EOF is fine. + EXPECT_THAT(pwrite(memfd.get(), buf.data(), buf.size() / 2, kPageSize / 2), + SyscallSucceedsWithValue(kPageSize / 4)); +} + +// F_SEAL_SHRINK prevents a memfd from being shrunk using ftruncate. +TEST(MemfdTest, SealShrink) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds()); + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_SHRINK), + SyscallSucceeds()); + + // Shrink by half a page. + ASSERT_THAT(ftruncate(memfd.get(), kPageSize / 2), + SyscallFailsWithErrno(EPERM)); + + // Ftruncate calls that don't actually shrink the file are allowed. + ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds()); + ASSERT_THAT(ftruncate(memfd.get(), kPageSize * 2), SyscallSucceeds()); + + // After growing, shrinking is still not allowed. + ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallFailsWithErrno(EPERM)); +} + +// F_SEAL_WRITE prevents a memfd from being written to through a write +// syscall. +TEST(MemfdTest, SealWriteWithWrite) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + const std::vector<char> buf(kPageSize); + ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds()); + + // Attemping to write at the end of the file fails. + EXPECT_THAT(write(memfd.get(), buf.data(), 1), SyscallFailsWithErrno(EPERM)); + + // Attemping to overwrite an existing part of the memfd fails. + EXPECT_THAT(pwrite(memfd.get(), buf.data(), 1, 0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(pwrite(memfd.get(), buf.data(), buf.size() / 2, kPageSize / 2), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(pwrite(memfd.get(), buf.data(), buf.size(), kPageSize / 2), + SyscallFailsWithErrno(EPERM)); + + // Zero-length writes however do not fail. + EXPECT_THAT(write(memfd.get(), buf.data(), 0), SyscallSucceeds()); +} + +// F_SEAL_WRITE prevents a memfd from being written to through an mmap. +TEST(MemfdTest, SealWriteWithMmap) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + const std::vector<char> buf(kPageSize); + ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds()); + + // Can't create a shared mapping with writes sealed. + void* ret = mmap(nullptr, kPageSize, PROT_WRITE, MAP_SHARED, memfd.get(), 0); + EXPECT_EQ(ret, MAP_FAILED); + EXPECT_EQ(errno, EPERM); + ret = mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, memfd.get(), 0); + EXPECT_EQ(ret, MAP_FAILED); + EXPECT_EQ(errno, EPERM); + + // However, private mappings are ok. + EXPECT_NO_ERRNO(Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + memfd.get(), 0)); +} + +// Adding F_SEAL_WRITE fails when there are outstanding writable mappings to a +// memfd. +TEST(MemfdTest, SealWriteWithOutstandingWritbleMapping) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + const std::vector<char> buf(kPageSize); + ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Attempting to add F_SEAL_WRITE with active shared mapping with any set of + // permissions fails. + + // Read-only shared mapping. + { + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, memfd.get(), 0)); + EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), + SyscallFailsWithErrno(EBUSY)); + } + + // Write-only shared mapping. + { + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_WRITE, MAP_SHARED, memfd.get(), 0)); + EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), + SyscallFailsWithErrno(EBUSY)); + } + + // Read-write shared mapping. + { + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + memfd.get(), 0)); + EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), + SyscallFailsWithErrno(EBUSY)); + } + + // F_SEAL_WRITE can be set with private mappings with any permissions. + { + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + memfd.get(), 0)); + EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), + SyscallSucceeds()); + } +} + +// When applying F_SEAL_WRITE fails due to outstanding writable mappings, any +// additional seals passed to the same add seal call are also rejected. +TEST(MemfdTest, NoPartialSealApplicationWhenWriteSealRejected) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0)); + + // Try add some seals along with F_SEAL_WRITE. The seal application should + // fail since there exists an active shared mapping. + EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE | F_SEAL_GROW), + SyscallFailsWithErrno(EBUSY)); + + // None of the seals should be applied. + EXPECT_THAT(fcntl(memfd.get(), F_GET_SEALS), SyscallSucceedsWithValue(0)); +} + +// Seals are inode level properties, and apply to all file descriptors referring +// to a memfd. +TEST(MemfdTest, SealsAreInodeLevelProperties) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + const FileDescriptor memfd2 = ASSERT_NO_ERRNO_AND_VALUE(memfd.Dup()); + + // Add seal through the original memfd, and verify that it appears on the + // dupped fd. + ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds()); + EXPECT_THAT(fcntl(memfd2.get(), F_GET_SEALS), + SyscallSucceedsWithValue(F_SEAL_WRITE)); + + // Verify the seal actually applies to both fds. + std::vector<char> buf(kPageSize); + EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(write(memfd2.get(), buf.data(), buf.size()), + SyscallFailsWithErrno(EPERM)); + + // Seals are enforced on new FDs that are dupped after the seal is already + // applied. + const FileDescriptor memfd3 = ASSERT_NO_ERRNO_AND_VALUE(memfd2.Dup()); + EXPECT_THAT(write(memfd3.get(), buf.data(), buf.size()), + SyscallFailsWithErrno(EPERM)); + + // Try a new seal applied to one of the dupped fds. + ASSERT_THAT(fcntl(memfd3.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds()); + EXPECT_THAT(ftruncate(memfd.get(), kPageSize), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(ftruncate(memfd2.get(), kPageSize), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(ftruncate(memfd3.get(), kPageSize), SyscallFailsWithErrno(EPERM)); +} + +PosixErrorOr<bool> IsTmpfs(const std::string& path) { + struct statfs stat; + if (statfs(path.c_str(), &stat)) { + if (errno == ENOENT) { + // Nothing at path, don't raise this as an error. Instead, just report no + // tmpfs at path. + return false; + } + return PosixError(errno, + absl::StrFormat("statfs(\"%s\", %#p)", path, &stat)); + } + return stat.f_type == TMPFS_MAGIC; +} + +// Tmpfs files also support seals, but are created with F_SEAL_SEAL. +TEST(MemfdTest, TmpfsFilesHaveSealSeal) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs("/tmp"))); + const TempPath tmpfs_file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn("/tmp")); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfs_file.path(), O_RDWR, 0644)); + EXPECT_THAT(fcntl(fd.get(), F_GET_SEALS), + SyscallSucceedsWithValue(F_SEAL_SEAL)); +} + +// Can open a memfd from procfs and use as normal. +TEST(MemfdTest, CanOpenFromProcfs) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + + // Write a random page of data to the memfd via write(2). + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Read back the same data from the fd obtained from procfs and verify. + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(absl::StrFormat("/proc/self/fd/%d", memfd.get()), O_RDWR)); + std::vector<char> buf2(kPageSize); + EXPECT_THAT(pread(fd.get(), buf2.data(), buf2.size(), 0), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(buf, buf2); +} + +// Test that memfd permissions are set up correctly to allow another process to +// open it from procfs. +TEST(MemfdTest, OtherProcessCanOpenFromProcfs) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + const auto memfd_path = + absl::StrFormat("/proc/%d/fd/%d", getpid(), memfd.get()); + const auto rest = [&] { + int fd = open(memfd_path.c_str(), O_RDWR); + TEST_PCHECK(fd >= 0); + TEST_PCHECK(close(fd) >= 0); + }; + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +// Test that only files opened as writable can have seals applied to them. +// Normally there's no way to specify file permissions on memfds, but we can +// obtain a read-only memfd by opening the corresponding procfs fd entry as +// read-only. +TEST(MemfdTest, MemfdMustBeWritableToModifySeals) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING)); + + // Initially adding a seal works. + EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds()); + + // Re-open the memfd as read-only from procfs. + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(absl::StrFormat("/proc/self/fd/%d", memfd.get()), O_RDONLY)); + + // Can't add seals through an unwritable fd. + EXPECT_THAT(fcntl(fd.get(), F_ADD_SEALS, F_SEAL_GROW), + SyscallFailsWithErrno(EPERM)); +} + +// Test that the memfd implementation internally tracks potentially writable +// maps correctly. +TEST(MemfdTest, MultipleWritableAndNonWritableRefsToSameFileRegion) { + const FileDescriptor memfd = + ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0)); + + // Populate with a random page of data. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Read-only map to the page. This should cause an initial mapping to be + // created. + Mapping m1 = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ, MAP_PRIVATE, memfd.get(), 0)); + + // Create a shared writable map to the page. This should cause the internal + // mapping to become potentially writable. + Mapping m2 = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0)); + + // Drop the read-only mapping first. If writable-ness isn't tracked correctly, + // this can cause some misaccounting, which can trigger asserts internally. + m1.reset(); + m2.reset(); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/memory_accounting.cc b/test/syscalls/linux/memory_accounting.cc new file mode 100644 index 000000000..94aea4077 --- /dev/null +++ b/test/syscalls/linux/memory_accounting.cc @@ -0,0 +1,99 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mman.h> + +#include <map> + +#include "gtest/gtest.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_split.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using ::absl::StrFormat; + +// AnonUsageFromMeminfo scrapes the current anonymous memory usage from +// /proc/meminfo and returns it in bytes. +PosixErrorOr<uint64_t> AnonUsageFromMeminfo() { + ASSIGN_OR_RETURN_ERRNO(auto meminfo, GetContents("/proc/meminfo")); + std::vector<std::string> lines(absl::StrSplit(meminfo, '\n')); + + // Try to find AnonPages line, the format is AnonPages:\\s+(\\d+) kB\n. + for (const auto& line : lines) { + if (!absl::StartsWith(line, "AnonPages:")) { + continue; + } + + std::vector<std::string> parts( + absl::StrSplit(line, ' ', absl::SkipEmpty())); + if (parts.size() == 3) { + // The size is the second field, let's try to parse it as a number. + ASSIGN_OR_RETURN_ERRNO(auto anon_kb, Atoi<uint64_t>(parts[1])); + return anon_kb * 1024; + } + + return PosixError(EINVAL, "AnonPages field in /proc/meminfo was malformed"); + } + + return PosixError(EINVAL, "AnonPages field not found in /proc/meminfo"); +} + +TEST(MemoryAccounting, AnonAccountingPreservedOnSaveRestore) { + // This test isn't meaningful on Linux. /proc/meminfo reports system-wide + // memory usage, which can change arbitrarily in Linux from other activity on + // the machine. In gvisor, this test is the only thing running on the + // "machine", so values in /proc/meminfo accurately reflect the memory used by + // the test. + SKIP_IF(!IsRunningOnGvisor()); + + uint64_t anon_initial = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); + + // Cause some anonymous memory usage. + uint64_t map_bytes = Megabytes(512); + char* mem = + static_cast<char*>(mmap(nullptr, map_bytes, PROT_READ | PROT_WRITE, + MAP_POPULATE | MAP_ANON | MAP_PRIVATE, -1, 0)); + ASSERT_NE(mem, MAP_FAILED) + << "Map failed, errno: " << errno << " (" << strerror(errno) << ")."; + + // Write something to each page to prevent them from being decommited on + // S/R. Zero pages are dropped on save. + for (uint64_t i = 0; i < map_bytes; i += kPageSize) { + mem[i] = 'a'; + } + + uint64_t anon_after_alloc = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); + EXPECT_THAT(anon_after_alloc, + EquivalentWithin(anon_initial + map_bytes, 0.03)); + + // We have many implicit S/R cycles from scraping /proc/meminfo throughout the + // test, but throw an explicit S/R in here as well. + MaybeSave(); + + // Usage should remain the same across S/R. + uint64_t anon_after_sr = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); + EXPECT_THAT(anon_after_sr, EquivalentWithin(anon_after_alloc, 0.03)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mempolicy.cc b/test/syscalls/linux/mempolicy.cc new file mode 100644 index 000000000..059fad598 --- /dev/null +++ b/test/syscalls/linux/mempolicy.cc @@ -0,0 +1,289 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/syscall.h> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "test/util/cleanup.h" +#include "test/util/memory_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#define BITS_PER_BYTE 8 + +#define MPOL_F_STATIC_NODES (1 << 15) +#define MPOL_F_RELATIVE_NODES (1 << 14) +#define MPOL_DEFAULT 0 +#define MPOL_PREFERRED 1 +#define MPOL_BIND 2 +#define MPOL_INTERLEAVE 3 +#define MPOL_LOCAL 4 +#define MPOL_F_NODE (1 << 0) +#define MPOL_F_ADDR (1 << 1) +#define MPOL_F_MEMS_ALLOWED (1 << 2) +#define MPOL_MF_STRICT (1 << 0) +#define MPOL_MF_MOVE (1 << 1) +#define MPOL_MF_MOVE_ALL (1 << 2) + +int get_mempolicy(int* policy, uint64_t* nmask, uint64_t maxnode, void* addr, + int flags) { + return syscall(SYS_get_mempolicy, policy, nmask, maxnode, addr, flags); +} + +int set_mempolicy(int mode, uint64_t* nmask, uint64_t maxnode) { + return syscall(SYS_set_mempolicy, mode, nmask, maxnode); +} + +int mbind(void* addr, unsigned long len, int mode, + const unsigned long* nodemask, unsigned long maxnode, + unsigned flags) { + return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags); +} + +// Creates a cleanup object that resets the calling thread's mempolicy to the +// system default when the calling scope ends. +Cleanup ScopedMempolicy() { + return Cleanup([] { + EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, nullptr, 0), SyscallSucceeds()); + }); +} + +// Temporarily change the memory policy for the calling thread within the +// caller's scope. +PosixErrorOr<Cleanup> ScopedSetMempolicy(int mode, uint64_t* nmask, + uint64_t maxnode) { + if (set_mempolicy(mode, nmask, maxnode)) { + return PosixError(errno, "set_mempolicy"); + } + return ScopedMempolicy(); +} + +TEST(MempolicyTest, CheckDefaultPolicy) { + int mode = 0; + uint64_t nodemask = 0; + ASSERT_THAT(get_mempolicy(&mode, &nodemask, sizeof(nodemask) * BITS_PER_BYTE, + nullptr, 0), + SyscallSucceeds()); + + EXPECT_EQ(MPOL_DEFAULT, mode); + EXPECT_EQ(0x0, nodemask); +} + +TEST(MempolicyTest, PolicyPreservedAfterSetMempolicy) { + uint64_t nodemask = 0x1; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy( + MPOL_BIND, &nodemask, sizeof(nodemask) * BITS_PER_BYTE)); + + int mode = 0; + uint64_t nodemask_after = 0x0; + ASSERT_THAT(get_mempolicy(&mode, &nodemask_after, + sizeof(nodemask_after) * BITS_PER_BYTE, nullptr, 0), + SyscallSucceeds()); + EXPECT_EQ(MPOL_BIND, mode); + EXPECT_EQ(0x1, nodemask_after); + + // Try throw in some mode flags. + for (auto mode_flag : {MPOL_F_STATIC_NODES, MPOL_F_RELATIVE_NODES}) { + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + ScopedSetMempolicy(MPOL_INTERLEAVE | mode_flag, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE)); + mode = 0; + nodemask_after = 0x0; + ASSERT_THAT( + get_mempolicy(&mode, &nodemask_after, + sizeof(nodemask_after) * BITS_PER_BYTE, nullptr, 0), + SyscallSucceeds()); + EXPECT_EQ(MPOL_INTERLEAVE | mode_flag, mode); + EXPECT_EQ(0x1, nodemask_after); + } +} + +TEST(MempolicyTest, SetMempolicyRejectsInvalidInputs) { + auto cleanup = ScopedMempolicy(); + uint64_t nodemask; + + if (IsRunningOnGvisor()) { + // Invalid nodemask, we only support a single node on gvisor. + nodemask = 0x4; + ASSERT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE), + SyscallFailsWithErrno(EINVAL)); + } + + nodemask = 0x1; + + // Invalid mode. + ASSERT_THAT(set_mempolicy(7439, &nodemask, sizeof(nodemask) * BITS_PER_BYTE), + SyscallFailsWithErrno(EINVAL)); + + // Invalid nodemask size. + ASSERT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, 0), + SyscallFailsWithErrno(EINVAL)); + + // Invalid mode flag. + ASSERT_THAT( + set_mempolicy(MPOL_DEFAULT | MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES, + &nodemask, sizeof(nodemask) * BITS_PER_BYTE), + SyscallFailsWithErrno(EINVAL)); + + // MPOL_INTERLEAVE with empty nodemask. + nodemask = 0x0; + ASSERT_THAT(set_mempolicy(MPOL_INTERLEAVE, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE), + SyscallFailsWithErrno(EINVAL)); +} + +// The manpages specify that the nodemask provided to set_mempolicy are +// considered empty if the nodemask pointer is null, or if the nodemask size is +// 0. We use a policy which accepts both empty and non-empty nodemasks +// (MPOL_PREFERRED), a policy which requires a non-empty nodemask (MPOL_BIND), +// and a policy which completely ignores the nodemask (MPOL_DEFAULT) to verify +// argument checking around nodemasks. +TEST(MempolicyTest, EmptyNodemaskOnSet) { + auto cleanup = ScopedMempolicy(); + + EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, nullptr, 1), SyscallSucceeds()); + EXPECT_THAT(set_mempolicy(MPOL_BIND, nullptr, 1), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(set_mempolicy(MPOL_PREFERRED, nullptr, 1), SyscallSucceeds()); + + uint64_t nodemask = 0x1; + EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, 0), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(set_mempolicy(MPOL_BIND, &nodemask, 0), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(set_mempolicy(MPOL_PREFERRED, &nodemask, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(MempolicyTest, QueryAvailableNodes) { + uint64_t nodemask = 0; + ASSERT_THAT( + get_mempolicy(nullptr, &nodemask, sizeof(nodemask) * BITS_PER_BYTE, + nullptr, MPOL_F_MEMS_ALLOWED), + SyscallSucceeds()); + // We can only be sure there is a single node if running on gvisor. + if (IsRunningOnGvisor()) { + EXPECT_EQ(0x1, nodemask); + } + + // MPOL_F_ADDR and MPOL_F_NODE flags may not be combined with + // MPOL_F_MEMS_ALLLOWED. + for (auto flags : + {MPOL_F_MEMS_ALLOWED | MPOL_F_ADDR, MPOL_F_MEMS_ALLOWED | MPOL_F_NODE, + MPOL_F_MEMS_ALLOWED | MPOL_F_ADDR | MPOL_F_NODE}) { + ASSERT_THAT(get_mempolicy(nullptr, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE, nullptr, flags), + SyscallFailsWithErrno(EINVAL)); + } +} + +TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) { + uint64_t dummy_stack_address; + auto dummy_heap_address = absl::make_unique<uint64_t>(); + int mode; + + for (auto ptr : {&dummy_stack_address, dummy_heap_address.get()}) { + mode = -1; + ASSERT_THAT( + get_mempolicy(&mode, nullptr, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE), + SyscallSucceeds()); + // If we're not running on gvisor, the address may be allocated on a + // different numa node. + if (IsRunningOnGvisor()) { + EXPECT_EQ(0, mode); + } + } + + void* invalid_address = reinterpret_cast<void*>(-1); + + // Invalid address. + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, invalid_address, + MPOL_F_ADDR | MPOL_F_NODE), + SyscallFailsWithErrno(EFAULT)); + + // Invalid mode pointer. + ASSERT_THAT(get_mempolicy(reinterpret_cast<int*>(invalid_address), nullptr, 0, + &dummy_stack_address, MPOL_F_ADDR | MPOL_F_NODE), + SyscallFailsWithErrno(EFAULT)); +} + +TEST(MempolicyTest, GetMempolicyCanOmitPointers) { + int mode; + uint64_t nodemask; + + // Omit nodemask pointer. + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, 0), SyscallSucceeds()); + // Omit mode pointer. + ASSERT_THAT(get_mempolicy(nullptr, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE, nullptr, 0), + SyscallSucceeds()); + // Omit both pointers. + ASSERT_THAT(get_mempolicy(nullptr, nullptr, 0, nullptr, 0), + SyscallSucceeds()); +} + +TEST(MempolicyTest, GetMempolicyNextInterleaveNode) { + int mode; + // Policy for thread not yet set to MPOL_INTERLEAVE, can't query for + // the next node which will be used for allocation. + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, MPOL_F_NODE), + SyscallFailsWithErrno(EINVAL)); + + // Set default policy for thread to MPOL_INTERLEAVE. + uint64_t nodemask = 0x1; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy( + MPOL_INTERLEAVE, &nodemask, sizeof(nodemask) * BITS_PER_BYTE)); + + mode = -1; + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, MPOL_F_NODE), + SyscallSucceeds()); + EXPECT_EQ(0, mode); +} + +TEST(MempolicyTest, Mbind) { + // Temporarily set the thread policy to MPOL_PREFERRED. + const auto cleanup_thread_policy = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy(MPOL_PREFERRED, nullptr, 0)); + + const auto mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS)); + + // vmas default to MPOL_DEFAULT irrespective of the thread policy (currently + // MPOL_PREFERRED). + int mode; + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, mapping.ptr(), MPOL_F_ADDR), + SyscallSucceeds()); + EXPECT_EQ(mode, MPOL_DEFAULT); + + // Set MPOL_PREFERRED for the vma and read it back. + ASSERT_THAT( + mbind(mapping.ptr(), mapping.len(), MPOL_PREFERRED, nullptr, 0, 0), + SyscallSucceeds()); + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, mapping.ptr(), MPOL_F_ADDR), + SyscallSucceeds()); + EXPECT_EQ(mode, MPOL_PREFERRED); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mincore.cc b/test/syscalls/linux/mincore.cc new file mode 100644 index 000000000..5c1240c89 --- /dev/null +++ b/test/syscalls/linux/mincore.cc @@ -0,0 +1,96 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <algorithm> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +size_t CountSetLSBs(std::vector<unsigned char> const& vec) { + return std::count_if(begin(vec), end(vec), + [](unsigned char c) { return (c & 1) != 0; }); +} + +TEST(MincoreTest, DirtyAnonPagesAreResident) { + constexpr size_t kTestPageCount = 10; + auto const kTestMappingBytes = kTestPageCount * kPageSize; + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + memset(m.ptr(), 0, m.len()); + + std::vector<unsigned char> vec(kTestPageCount, 0); + ASSERT_THAT(mincore(m.ptr(), kTestMappingBytes, vec.data()), + SyscallSucceeds()); + EXPECT_EQ(kTestPageCount, CountSetLSBs(vec)); +} + +TEST(MincoreTest, UnalignedAddressFails) { + // Map and touch two pages, then try to mincore the second half of the first + // page + the first half of the second page. Both pages are mapped, but + // mincore should return EINVAL due to the misaligned start address. + constexpr size_t kTestPageCount = 2; + auto const kTestMappingBytes = kTestPageCount * kPageSize; + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + memset(m.ptr(), 0, m.len()); + + std::vector<unsigned char> vec(kTestPageCount, 0); + EXPECT_THAT(mincore(reinterpret_cast<void*>(m.addr() + kPageSize / 2), + kPageSize, vec.data()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(MincoreTest, UnalignedLengthSucceedsAndIsRoundedUp) { + // Map and touch two pages, then try to mincore the first page + the first + // half of the second page. mincore should silently round up the length to + // include both pages. + constexpr size_t kTestPageCount = 2; + auto const kTestMappingBytes = kTestPageCount * kPageSize; + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + memset(m.ptr(), 0, m.len()); + + std::vector<unsigned char> vec(kTestPageCount, 0); + ASSERT_THAT(mincore(m.ptr(), kPageSize + kPageSize / 2, vec.data()), + SyscallSucceeds()); + EXPECT_EQ(kTestPageCount, CountSetLSBs(vec)); +} + +TEST(MincoreTest, ZeroLengthSucceedsAndAllowsAnyVecBelowTaskSize) { + EXPECT_THAT(mincore(nullptr, 0, nullptr), SyscallSucceeds()); +} + +TEST(MincoreTest, InvalidLengthFails) { + EXPECT_THAT(mincore(nullptr, -1, nullptr), SyscallFailsWithErrno(ENOMEM)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mkdir.cc b/test/syscalls/linux/mkdir.cc new file mode 100644 index 000000000..4036a9275 --- /dev/null +++ b/test/syscalls/linux/mkdir.cc @@ -0,0 +1,88 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/temp_umask.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class MkdirTest : public ::testing::Test { + protected: + // SetUp creates various configurations of files. + void SetUp() override { dirname_ = NewTempAbsPath(); } + + // TearDown unlinks created files. + void TearDown() override { + EXPECT_THAT(rmdir(dirname_.c_str()), SyscallSucceeds()); + } + + std::string dirname_; +}; + +TEST_F(MkdirTest, CanCreateWritableDir) { + ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds()); + std::string filename = JoinPath(dirname_, "anything"); + int fd; + ASSERT_THAT(fd = open(filename.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + ASSERT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +TEST_F(MkdirTest, HonorsUmask) { + constexpr mode_t kMask = 0111; + TempUmask mask(kMask); + ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds()); + struct stat statbuf; + ASSERT_THAT(stat(dirname_.c_str(), &statbuf), SyscallSucceeds()); + EXPECT_EQ(0777 & ~kMask, statbuf.st_mode & 0777); +} + +TEST_F(MkdirTest, HonorsUmask2) { + constexpr mode_t kMask = 0142; + TempUmask mask(kMask); + ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds()); + struct stat statbuf; + ASSERT_THAT(stat(dirname_.c_str(), &statbuf), SyscallSucceeds()); + EXPECT_EQ(0777 & ~kMask, statbuf.st_mode & 0777); +} + +TEST_F(MkdirTest, FailsOnDirWithoutWritePerms) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + ASSERT_THAT(mkdir(dirname_.c_str(), 0555), SyscallSucceeds()); + auto dir = JoinPath(dirname_.c_str(), "foo"); + EXPECT_THAT(mkdir(dir.c_str(), 0777), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(open(JoinPath(dirname_, "file").c_str(), O_RDWR | O_CREAT, 0666), + SyscallFailsWithErrno(EACCES)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc new file mode 100644 index 000000000..05dfb375a --- /dev/null +++ b/test/syscalls/linux/mknod.cc @@ -0,0 +1,190 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> +#include <unistd.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(MknodTest, RegularFile) { + const std::string node0 = NewTempAbsPath(); + EXPECT_THAT(mknod(node0.c_str(), S_IFREG, 0), SyscallSucceeds()); + + const std::string node1 = NewTempAbsPath(); + EXPECT_THAT(mknod(node1.c_str(), 0, 0), SyscallSucceeds()); +} + +TEST(MknodTest, RegularFilePermissions) { + const std::string node = NewTempAbsPath(); + mode_t newUmask = 0077; + umask(newUmask); + + // Attempt to open file with mode 0777. Not specifying file type should create + // a regualar file. + mode_t perms = S_IRWXU | S_IRWXG | S_IRWXO; + EXPECT_THAT(mknod(node.c_str(), perms, 0), SyscallSucceeds()); + + // In the absence of a default ACL, the permissions of the created node are + // (mode & ~umask). -- mknod(2) + mode_t wantPerms = perms & ~newUmask; + struct stat st; + ASSERT_THAT(stat(node.c_str(), &st), SyscallSucceeds()); + ASSERT_EQ(st.st_mode & 0777, wantPerms); + + // "Zero file type is equivalent to type S_IFREG." - mknod(2) + ASSERT_EQ(st.st_mode & S_IFMT, S_IFREG); +} + +TEST(MknodTest, MknodAtFIFO) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string fifo_relpath = NewTempRelPath(); + const std::string fifo = JoinPath(dir.path(), fifo_relpath); + + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path().c_str(), O_RDONLY)); + ASSERT_THAT(mknodat(dirfd.get(), fifo_relpath.c_str(), S_IFIFO | S_IRUSR, 0), + SyscallSucceeds()); + + struct stat st; + ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISFIFO(st.st_mode)); +} + +TEST(MknodTest, MknodOnExistingPathFails) { + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const TempPath slink = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path())); + + EXPECT_THAT(mknod(file.path().c_str(), S_IFREG, 0), + SyscallFailsWithErrno(EEXIST)); + EXPECT_THAT(mknod(file.path().c_str(), S_IFIFO, 0), + SyscallFailsWithErrno(EEXIST)); + EXPECT_THAT(mknod(slink.path().c_str(), S_IFREG, 0), + SyscallFailsWithErrno(EEXIST)); + EXPECT_THAT(mknod(slink.path().c_str(), S_IFIFO, 0), + SyscallFailsWithErrno(EEXIST)); +} + +TEST(MknodTest, UnimplementedTypesReturnError) { + const std::string path = NewTempAbsPath(); + + if (IsRunningWithVFS1()) { + ASSERT_THAT(mknod(path.c_str(), S_IFSOCK, 0), + SyscallFailsWithErrno(EOPNOTSUPP)); + } + // These will fail on linux as well since we don't have CAP_MKNOD. + ASSERT_THAT(mknod(path.c_str(), S_IFCHR, 0), SyscallFailsWithErrno(EPERM)); + ASSERT_THAT(mknod(path.c_str(), S_IFBLK, 0), SyscallFailsWithErrno(EPERM)); +} + +TEST(MknodTest, Fifo) { + const std::string fifo = NewTempAbsPath(); + ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0), + SyscallSucceeds()); + + struct stat st; + ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISFIFO(st.st_mode)); + + std::string msg = "some std::string"; + std::vector<char> buf(512); + + // Read-end of the pipe. + ScopedThread t([&fifo, &buf, &msg]() { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_RDONLY)); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_EQ(msg, std::string(buf.data())); + }); + + // Write-end of the pipe. + FileDescriptor wfd = ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_WRONLY)); + EXPECT_THAT(WriteFd(wfd.get(), msg.c_str(), msg.length()), + SyscallSucceedsWithValue(msg.length())); +} + +TEST(MknodTest, FifoOtrunc) { + const std::string fifo = NewTempAbsPath(); + ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0), + SyscallSucceeds()); + + struct stat st = {}; + ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISFIFO(st.st_mode)); + + std::string msg = "some std::string"; + std::vector<char> buf(512); + // Read-end of the pipe. + ScopedThread t([&fifo, &buf, &msg]() { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_RDONLY)); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_EQ(msg, std::string(buf.data())); + }); + + // Write-end of the pipe. + FileDescriptor wfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_WRONLY | O_TRUNC)); + EXPECT_THAT(WriteFd(wfd.get(), msg.c_str(), msg.length()), + SyscallSucceedsWithValue(msg.length())); +} + +TEST(MknodTest, FifoTruncNoOp) { + const std::string fifo = NewTempAbsPath(); + ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0), + SyscallSucceeds()); + + EXPECT_THAT(truncate(fifo.c_str(), 0), SyscallFailsWithErrno(EINVAL)); + + struct stat st = {}; + ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISFIFO(st.st_mode)); + + std::string msg = "some std::string"; + std::vector<char> buf(512); + // Read-end of the pipe. + ScopedThread t([&fifo, &buf, &msg]() { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_RDONLY)); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_EQ(msg, std::string(buf.data())); + }); + + FileDescriptor wfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_WRONLY | O_TRUNC)); + EXPECT_THAT(ftruncate(wfd.get(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(WriteFd(wfd.get(), msg.c_str(), msg.length()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_THAT(ftruncate(wfd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mlock.cc b/test/syscalls/linux/mlock.cc new file mode 100644 index 000000000..78ac96bed --- /dev/null +++ b/test/syscalls/linux/mlock.cc @@ -0,0 +1,332 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include <cerrno> +#include <cstring> + +#include "gmock/gmock.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/rlimit_util.h" +#include "test/util/test_util.h" + +using ::testing::_; + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<bool> CanMlock() { + struct rlimit rlim; + if (getrlimit(RLIMIT_MEMLOCK, &rlim) < 0) { + return PosixError(errno, "getrlimit(RLIMIT_MEMLOCK)"); + } + if (rlim.rlim_cur != 0) { + return true; + } + return HaveCapability(CAP_IPC_LOCK); +} + +// Returns true if the page containing addr is mlocked. +bool IsPageMlocked(uintptr_t addr) { + // This relies on msync(MS_INVALIDATE) interacting correctly with mlocked + // pages, which is tested for by the MsyncInvalidate case below. + int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)), + kPageSize, MS_ASYNC | MS_INVALIDATE); + if (rv == 0) { + return false; + } + // This uses TEST_PCHECK_MSG since it's used in subprocesses. + TEST_PCHECK_MSG(errno == EBUSY, "msync failed with unexpected errno"); + return true; +} + +TEST(MlockTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); +} + +TEST(MlockTest, ProtNone) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), + SyscallFailsWithErrno(ENOMEM)); + // ENOMEM is returned because mlock can't populate the page, but it's still + // considered locked. + EXPECT_TRUE(IsPageMlocked(mapping.addr())); +} + +TEST(MlockTest, MadviseDontneed) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_THAT(madvise(mapping.ptr(), mapping.len(), MADV_DONTNEED), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(MlockTest, MsyncInvalidate) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_THAT(msync(mapping.ptr(), mapping.len(), MS_ASYNC | MS_INVALIDATE), + SyscallFailsWithErrno(EBUSY)); + EXPECT_THAT(msync(mapping.ptr(), mapping.len(), MS_SYNC | MS_INVALIDATE), + SyscallFailsWithErrno(EBUSY)); +} + +TEST(MlockTest, Fork) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + EXPECT_THAT( + InForkedProcess([&] { TEST_CHECK(!IsPageMlocked(mapping.addr())); }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(MlockTest, RlimitMemlockZero) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0)); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), + SyscallFailsWithErrno(EPERM)); +} + +TEST(MlockTest, RlimitMemlockInsufficient) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, kPageSize)); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), + SyscallFailsWithErrno(ENOMEM)); +} + +TEST(MunlockTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); +} + +TEST(MunlockTest, NotLocked) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + EXPECT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); +} + +// There is currently no test for mlockall(MCL_CURRENT) because the default +// RLIMIT_MEMLOCK of 64 KB is insufficient to actually invoke +// mlockall(MCL_CURRENT). + +TEST(MlockallTest, Future) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + + // Run this test in a separate (single-threaded) subprocess to ensure that a + // background thread doesn't try to mmap a large amount of memory, fail due + // to hitting RLIMIT_MEMLOCK, and explode the process violently. + auto const do_test = [] { + auto const mapping = + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE).ValueOrDie(); + TEST_CHECK(!IsPageMlocked(mapping.addr())); + TEST_PCHECK(mlockall(MCL_FUTURE) == 0); + // Ensure that mlockall(MCL_FUTURE) is turned off before the end of the + // test, as otherwise mmaps may fail unexpectedly. + Cleanup do_munlockall([] { TEST_PCHECK(munlockall() == 0); }); + auto const mapping2 = + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE).ValueOrDie(); + TEST_CHECK(IsPageMlocked(mapping2.addr())); + // Fire munlockall() and check that it disables mlockall(MCL_FUTURE). + do_munlockall.Release()(); + auto const mapping3 = + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE).ValueOrDie(); + TEST_CHECK(!IsPageMlocked(mapping2.addr())); + }; + EXPECT_THAT(InForkedProcess(do_test), IsPosixErrorOkAndHolds(0)); +} + +TEST(MunlockallTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(munlockall(), SyscallSucceeds()); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); +} + +#ifndef SYS_mlock2 +#if defined(__x86_64__) +#define SYS_mlock2 325 +#elif defined(__aarch64__) +#define SYS_mlock2 284 +#endif +#endif + +#ifndef MLOCK_ONFAULT +#define MLOCK_ONFAULT 0x01 // Linux: include/uapi/asm-generic/mman-common.h +#endif + +#ifdef SYS_mlock2 + +int mlock2(void const* addr, size_t len, int flags) { + return syscall(SYS_mlock2, addr, len, flags); +} + +TEST(Mlock2Test, NoFlags) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock2(mapping.ptr(), mapping.len(), 0), SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); +} + +TEST(Mlock2Test, MlockOnfault) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock2(mapping.ptr(), mapping.len(), MLOCK_ONFAULT), + SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); +} + +TEST(Mlock2Test, UnknownFlags) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_THAT(mlock2(mapping.ptr(), mapping.len(), ~0), + SyscallFailsWithErrno(EINVAL)); +} + +#endif // defined(SYS_mlock2) + +TEST(MapLockedTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + EXPECT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); +} + +TEST(MapLockedTest, RlimitMemlockZero) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0)); + EXPECT_THAT( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED), + PosixErrorIs(EPERM, _)); +} + +TEST(MapLockedTest, RlimitMemlockInsufficient) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, kPageSize)); + EXPECT_THAT( + MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED), + PosixErrorIs(EAGAIN, _)); +} + +TEST(MremapLockedTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + + void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(), + MREMAP_MAYMOVE, nullptr); + if (addr == MAP_FAILED) { + FAIL() << "mremap failed: " << errno << " (" << strerror(errno) << ")"; + } + mapping.release(); + mapping.reset(addr, 2 * mapping.len()); + EXPECT_TRUE(IsPageMlocked(reinterpret_cast<uintptr_t>(addr))); +} + +TEST(MremapLockedTest, RlimitMemlockZero) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0)); + void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(), + MREMAP_MAYMOVE, nullptr); + EXPECT_TRUE(addr == MAP_FAILED && errno == EAGAIN) + << "addr = " << addr << ", errno = " << errno; +} + +TEST(MremapLockedTest, RlimitMemlockInsufficient) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = ASSERT_NO_ERRNO_AND_VALUE( + ScopedSetSoftRlimit(RLIMIT_MEMLOCK, mapping.len())); + void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(), + MREMAP_MAYMOVE, nullptr); + EXPECT_TRUE(addr == MAP_FAILED && errno == EAGAIN) + << "addr = " << addr << ", errno = " << errno; +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc new file mode 100644 index 000000000..6d3227ab6 --- /dev/null +++ b/test/syscalls/linux/mmap.cc @@ -0,0 +1,1676 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <linux/magic.h> +#include <linux/unistd.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/statfs.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/escaping.h" +#include "absl/strings/str_split.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::Gt; + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<int64_t> VirtualMemorySize() { + ASSIGN_OR_RETURN_ERRNO(auto contents, GetContents("/proc/self/statm")); + std::vector<std::string> parts = absl::StrSplit(contents, ' '); + if (parts.empty()) { + return PosixError(EINVAL, "Unable to parse /proc/self/statm"); + } + ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi<int64_t>(parts[0])); + return pages * getpagesize(); +} + +class MMapTest : public ::testing::Test { + protected: + // Unmap mapping, if one was made. + void TearDown() override { + if (addr_) { + EXPECT_THAT(Unmap(), SyscallSucceeds()); + } + } + + // Remembers mapping, so it can be automatically unmapped. + uintptr_t Map(uintptr_t addr, size_t length, int prot, int flags, int fd, + off_t offset) { + void* ret = + mmap(reinterpret_cast<void*>(addr), length, prot, flags, fd, offset); + + if (ret != MAP_FAILED) { + addr_ = ret; + length_ = length; + } + + return reinterpret_cast<uintptr_t>(ret); + } + + // Unmap previous mapping + int Unmap() { + if (!addr_) { + return -1; + } + + int ret = munmap(addr_, length_); + + addr_ = nullptr; + length_ = 0; + + return ret; + } + + // Msync the mapping. + int Msync() { return msync(addr_, length_, MS_SYNC); } + + // Mlock the mapping. + int Mlock() { return mlock(addr_, length_); } + + // Munlock the mapping. + int Munlock() { return munlock(addr_, length_); } + + int Protect(uintptr_t addr, size_t length, int prot) { + return mprotect(reinterpret_cast<void*>(addr), length, prot); + } + + void* addr_ = nullptr; + size_t length_ = 0; +}; + +// Matches if arg contains the same contents as string str. +MATCHER_P(EqualsMemory, str, "") { + if (0 == memcmp(arg, str.c_str(), str.size())) { + return true; + } + + *result_listener << "Memory did not match. Got:\n" + << absl::BytesToHexString( + std::string(static_cast<char*>(arg), str.size())) + << "Want:\n" + << absl::BytesToHexString(str); + return false; +} + +// We can't map pipes, but for different reasons. +TEST_F(MMapTest, MapPipe) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[0], 0), + SyscallFailsWithErrno(ENODEV)); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[1], 0), + SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +// It's very common to mmap /dev/zero because anonymous mappings aren't part +// of POSIX although they are widely supported. So a zero initialized memory +// region would actually come from a "file backed" /dev/zero mapping. +TEST_F(MMapTest, MapDevZeroShared) { + // This test will verify that we're able to map a page backed by /dev/zero + // as MAP_SHARED. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + // Test that we can create a RW SHARED mapping of /dev/zero. + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), + SyscallSucceeds()); +} + +TEST_F(MMapTest, MapDevZeroPrivate) { + // This test will verify that we're able to map a page backed by /dev/zero + // as MAP_PRIVATE. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + // Test that we can create a RW SHARED mapping of /dev/zero. + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0), + SyscallSucceeds()); +} + +TEST_F(MMapTest, MapDevZeroNoPersistence) { + // This test will verify that two independent mappings of /dev/zero do not + // appear to reference the same "backed file." + + const FileDescriptor dev_zero1 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + const FileDescriptor dev_zero2 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero1.get(), 0), + SyscallSucceeds()); + + // Create a second mapping via the second /dev/zero fd. + void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + dev_zero2.get(), 0); + ASSERT_THAT(reinterpret_cast<intptr_t>(psec_map), SyscallSucceeds()); + + // Always unmap. + auto cleanup_psec_map = Cleanup( + [&] { EXPECT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); }); + + // Verify that we have independently addressed pages. + ASSERT_NE(psec_map, addr_); + + std::string buf_zero(kPageSize, 0x00); + std::string buf_ones(kPageSize, 0xFF); + + // Verify the first is actually all zeros after mmap. + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + + // Let's fill in the first mapping with 0xFF. + memcpy(addr_, buf_ones.data(), kPageSize); + + // Verify that the memcpy actually stuck in the page. + EXPECT_THAT(addr_, EqualsMemory(buf_ones)); + + // Verify that it didn't affect the second page which should be all zeros. + EXPECT_THAT(psec_map, EqualsMemory(buf_zero)); +} + +TEST_F(MMapTest, MapDevZeroSharedMultiplePages) { + // This will test that we're able to map /dev/zero over multiple pages. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + // Test that we can create a RW SHARED mapping of /dev/zero. + ASSERT_THAT(Map(0, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE, + dev_zero.get(), 0), + SyscallSucceeds()); + + std::string buf_zero(kPageSize * 2, 0x00); + std::string buf_ones(kPageSize * 2, 0xFF); + + // Verify the two pages are actually all zeros after mmap. + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + + // Fill out the pages with all ones. + memcpy(addr_, buf_ones.data(), kPageSize * 2); + + // Verify that the memcpy actually stuck in the pages. + EXPECT_THAT(addr_, EqualsMemory(buf_ones)); +} + +TEST_F(MMapTest, MapDevZeroSharedFdNoPersistence) { + // This test will verify that two independent mappings of /dev/zero do not + // appear to reference the same "backed file" even when mapped from the + // same initial fd. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), + SyscallSucceeds()); + + // Create a second mapping via the same fd. + void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + dev_zero.get(), 0); + ASSERT_THAT(reinterpret_cast<int64_t>(psec_map), SyscallSucceeds()); + + // Always unmap. + auto cleanup_psec_map = Cleanup( + [&] { ASSERT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); }); + + // Verify that we have independently addressed pages. + ASSERT_NE(psec_map, addr_); + + std::string buf_zero(kPageSize, 0x00); + std::string buf_ones(kPageSize, 0xFF); + + // Verify the first is actually all zeros after mmap. + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + + // Let's fill in the first mapping with 0xFF. + memcpy(addr_, buf_ones.data(), kPageSize); + + // Verify that the memcpy actually stuck in the page. + EXPECT_THAT(addr_, EqualsMemory(buf_ones)); + + // Verify that it didn't affect the second page which should be all zeros. + EXPECT_THAT(psec_map, EqualsMemory(buf_zero)); +} + +TEST_F(MMapTest, MapDevZeroSegfaultAfterUnmap) { + SetupGvisorDeathTest(); + + // This test will verify that we're able to map a page backed by /dev/zero + // as MAP_SHARED and after it's unmapped any access results in a SIGSEGV. + // This test is redundant but given the special nature of /dev/zero mappings + // it doesn't hurt. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + const auto rest = [&] { + // Test that we can create a RW SHARED mapping of /dev/zero. + TEST_PCHECK(Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + dev_zero.get(), + 0) != reinterpret_cast<uintptr_t>(MAP_FAILED)); + + // Confirm that accesses after the unmap result in a SIGSEGV. + // + // N.B. We depend on this process being single-threaded to ensure there + // can't be another mmap to map addr before the dereference below. + void* addr_saved = addr_; // Unmap resets addr_. + TEST_PCHECK(Unmap() == 0); + *reinterpret_cast<volatile int*>(addr_saved) = 0xFF; + }; + + EXPECT_THAT(InForkedProcess(rest), + IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV))); +} + +TEST_F(MMapTest, MapDevZeroUnaligned) { + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + const size_t size = kPageSize + kPageSize / 2; + const std::string buf_zero(size, 0x00); + + ASSERT_THAT( + Map(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + ASSERT_THAT( + Map(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); +} + +// We can't map _some_ character devices. +TEST_F(MMapTest, MapCharDevice) { + const FileDescriptor cdevfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/random", 0, 0)); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, cdevfd.get(), 0), + SyscallFailsWithErrno(ENODEV)); +} + +// We can't map directories. +TEST_F(MMapTest, MapDirectory) { + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), 0, 0)); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, dirfd.get(), 0), + SyscallFailsWithErrno(ENODEV)); +} + +// We can map *something* +TEST_F(MMapTest, MapAnything) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceedsWithValue(Gt(0))); +} + +// Map length < PageSize allowed +TEST_F(MMapTest, SmallMap) { + EXPECT_THAT(Map(0, 128, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); +} + +// Hint address doesn't break anything. +// Note: there is no requirement we actually get the hint address +TEST_F(MMapTest, HintAddress) { + EXPECT_THAT( + Map(0x30000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); +} + +// MAP_FIXED gives us exactly the requested address +TEST_F(MMapTest, MapFixed) { + EXPECT_THAT(Map(0x30000000, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), + SyscallSucceedsWithValue(0x30000000)); +} + +// 64-bit addresses work too +#if defined(__x86_64__) || defined(__aarch64__) +TEST_F(MMapTest, MapFixed64) { + EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), + SyscallSucceedsWithValue(0x300000000000)); +} +#endif + +// MAP_STACK allowed. +// There isn't a good way to verify it did anything. +TEST_F(MMapTest, MapStack) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0), + SyscallSucceeds()); +} + +// MAP_LOCKED allowed. +// There isn't a good way to verify it did anything. +TEST_F(MMapTest, MapLocked) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0), + SyscallSucceeds()); +} + +// MAP_PRIVATE or MAP_SHARED must be passed +TEST_F(MMapTest, NotPrivateOrShared) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Only one of MAP_PRIVATE or MAP_SHARED may be passed +TEST_F(MMapTest, PrivateAndShared) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_SHARED | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(MMapTest, FixedAlignment) { + // Addr must be page aligned (MAP_FIXED) + EXPECT_THAT(Map(0x30000001, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Non-MAP_FIXED address does not need to be page aligned +TEST_F(MMapTest, NonFixedAlignment) { + EXPECT_THAT( + Map(0x30000001, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); +} + +// Length = 0 results in EINVAL. +TEST_F(MMapTest, InvalidLength) { + EXPECT_THAT(Map(0, 0, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Bad fd not allowed. +TEST_F(MMapTest, BadFd) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE, 999, 0), + SyscallFailsWithErrno(EBADF)); +} + +// Mappings are writable. +TEST_F(MMapTest, ProtWrite) { + uint64_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + // This shouldn't cause a SIGSEGV. + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + // The written data should actually be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +// "Write-only" mappings are writable *and* readable. +TEST_F(MMapTest, ProtWriteOnly) { + uint64_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + // This shouldn't cause a SIGSEGV. + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + // The written data should actually be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +// "Write-only" mappings are readable. +// +// This is distinct from above to ensure the page is accessible even if the +// initial fault is a write fault. +TEST_F(MMapTest, ProtWriteOnlyReadable) { + uint64_t addr; + constexpr uint64_t kFirstWord = 0; + + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), &kFirstWord, + sizeof(kFirstWord))); +} + +// Mappings are writable after mprotect from PROT_NONE to PROT_READ|PROT_WRITE. +TEST_F(MMapTest, ProtectProtWrite) { + uint64_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE), + SyscallSucceeds()); + + // This shouldn't cause a SIGSEGV. + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + // The written data should actually be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +// SIGSEGV raised when reading PROT_NONE memory +TEST_F(MMapTest, ProtNoneDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + + ASSERT_THAT( + addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr), + ::testing::KilledBySignal(SIGSEGV), ""); +} + +// SIGSEGV raised when writing PROT_READ only memory +TEST_F(MMapTest, ReadOnlyDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + + ASSERT_THAT( + addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr) = 42, + ::testing::KilledBySignal(SIGSEGV), ""); +} + +// Writable mapping mprotect'd to read-only should not be writable. +TEST_F(MMapTest, MprotectReadOnlyDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + volatile int* val = reinterpret_cast<int*>(addr); + + // Copy to ensure page is mapped in. + *val = 42; + + ASSERT_THAT(Protect(addr, kPageSize, PROT_READ), SyscallSucceeds()); + + // Now it shouldn't be writable. + EXPECT_EXIT(*val = 0, ::testing::KilledBySignal(SIGSEGV), ""); +} + +// Verify that calling mprotect an address that's not page aligned fails. +TEST_F(MMapTest, MprotectNotPageAligned) { + uintptr_t addr; + + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + ASSERT_THAT(Protect(addr + 1, kPageSize - 1, PROT_READ), + SyscallFailsWithErrno(EINVAL)); +} + +// Verify that calling mprotect with an absurdly huge length fails. +TEST_F(MMapTest, MprotectHugeLength) { + uintptr_t addr; + + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + ASSERT_THAT(Protect(addr, static_cast<size_t>(-1), PROT_READ), + SyscallFailsWithErrno(ENOMEM)); +} + +#if defined(__x86_64__) || defined(__i386__) +// This code is equivalent in 32 and 64-bit mode +const uint8_t machine_code[] = { + 0xb8, 0x2a, 0x00, 0x00, 0x00, // movl $42, %eax + 0xc3, // retq +}; +#elif defined(__aarch64__) +const uint8_t machine_code[] = { + 0x40, 0x05, 0x80, 0x52, // mov w0, #42 + 0xc0, 0x03, 0x5f, 0xd6, // ret +}; +#endif + +// PROT_EXEC allows code execution +TEST_F(MMapTest, ProtExec) { + uintptr_t addr; + uint32_t (*func)(void); + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code)); + + func = reinterpret_cast<uint32_t (*)(void)>(addr); + + EXPECT_EQ(42, func()); +} + +// No PROT_EXEC disallows code execution +TEST_F(MMapTest, NoProtExecDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + uint32_t (*func)(void); + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code)); + + func = reinterpret_cast<uint32_t (*)(void)>(addr); + + EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), ""); +} + +TEST_F(MMapTest, NoExceedLimitData) { + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 1; + + setlim.rlim_cur = RLIM_INFINITY; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallSucceedsWithValue(0)); +} + +TEST_F(MMapTest, ExceedLimitData) { + // To unit test this more precisely, we'd need access to the mm's start_brk + // and end_brk, which we don't have direct access to :/ + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 8192; + + setlim.rlim_cur = 0; + setlim.rlim_max = RLIM_INFINITY; + // Set RLIMIT_DATA very low so any subsequent brk() calls fail. + // Reset RLIMIT_DATA during teardown step. + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); + // Teardown step... + setlim.rlim_cur = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); +} + +TEST_F(MMapTest, ExceedLimitDataPrlimit) { + // To unit test this more precisely, we'd need access to the mm's start_brk + // and end_brk, which we don't have direct access to :/ + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 8192; + + setlim.rlim_cur = 0; + setlim.rlim_max = RLIM_INFINITY; + // Set RLIMIT_DATA very low so any subsequent brk() calls fail. + // Reset RLIMIT_DATA during teardown step. + ASSERT_THAT(prlimit(0, RLIMIT_DATA, &setlim, nullptr), SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); + // Teardown step... + setlim.rlim_cur = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); +} + +TEST_F(MMapTest, ExceedLimitDataPrlimitPID) { + // To unit test this more precisely, we'd need access to the mm's start_brk + // and end_brk, which we don't have direct access to :/ + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 8192; + + setlim.rlim_cur = 0; + setlim.rlim_max = RLIM_INFINITY; + // Set RLIMIT_DATA very low so any subsequent brk() calls fail. + // Reset RLIMIT_DATA during teardown step. + ASSERT_THAT(prlimit(syscall(__NR_gettid), RLIMIT_DATA, &setlim, nullptr), + SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); + // Teardown step... + setlim.rlim_cur = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); +} + +TEST_F(MMapTest, NoExceedLimitAS) { + constexpr uint64_t kAllocBytes = 200 << 20; + // Add some headroom to the AS limit in case of e.g. unexpected stack + // expansion. + constexpr uint64_t kExtraASBytes = kAllocBytes + (20 << 20); + static_assert(kAllocBytes < kExtraASBytes, + "test depends on allocation not exceeding AS limit"); + + auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize()); + struct rlimit setlim; + setlim.rlim_cur = vss + kExtraASBytes; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds()); + EXPECT_THAT( + Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceedsWithValue(Gt(0))); +} + +TEST_F(MMapTest, ExceedLimitAS) { + constexpr uint64_t kAllocBytes = 200 << 20; + // Add some headroom to the AS limit in case of e.g. unexpected stack + // expansion. + constexpr uint64_t kExtraASBytes = 20 << 20; + static_assert(kAllocBytes > kExtraASBytes, + "test depends on allocation exceeding AS limit"); + + auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize()); + struct rlimit setlim; + setlim.rlim_cur = vss + kExtraASBytes; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds()); + EXPECT_THAT( + Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(ENOMEM)); +} + +// Tests that setting an anonymous mmap to PROT_NONE doesn't free the memory. +TEST_F(MMapTest, SettingProtNoneDoesntFreeMemory) { + uintptr_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceedsWithValue(Gt(0))); + + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + ASSERT_THAT(Protect(addr, kPageSize, PROT_NONE), SyscallSucceeds()); + ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE), + SyscallSucceeds()); + + // The written data should still be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +constexpr char kFileContents[] = "Hello World!"; + +class MMapFileTest : public MMapTest { + protected: + FileDescriptor fd_; + std::string filename_; + + // Open a file for read/write + void SetUp() override { + MMapTest::SetUp(); + + filename_ = NewTempAbsPath(); + fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_CREAT | O_RDWR, 0644)); + + // Extend file so it can be written once mapped. Deliberately make the file + // only half a page in size, so we can test what happens when we access the + // second half. + // Use ftruncate(2) once the sentry supports it. + char zero = 0; + size_t count = 0; + do { + const DisableSave ds; // saving 2048 times is slow and useless. + Write(&zero, 1), SyscallSucceedsWithValue(1); + } while (++count < (kPageSize / 2)); + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + } + + // Close and delete file + void TearDown() override { + MMapTest::TearDown(); + fd_.reset(); // Make sure the files is closed before we unlink it. + ASSERT_THAT(unlink(filename_.c_str()), SyscallSucceeds()); + } + + ssize_t Read(char* buf, size_t count) { + ssize_t len = 0; + do { + ssize_t ret = read(fd_.get(), buf, count); + if (ret < 0) { + return ret; + } else if (ret == 0) { + return len; + } + + len += ret; + buf += ret; + } while (len < static_cast<ssize_t>(count)); + + return len; + } + + ssize_t Write(const char* buf, size_t count) { + ssize_t len = 0; + do { + ssize_t ret = write(fd_.get(), buf, count); + if (ret < 0) { + return ret; + } else if (ret == 0) { + return len; + } + + len += ret; + buf += ret; + } while (len < static_cast<ssize_t>(count)); + + return len; + } +}; + +class MMapFileParamTest + : public MMapFileTest, + public ::testing::WithParamInterface<std::tuple<int, int>> { + protected: + int prot() const { return std::get<0>(GetParam()); } + + int flags() const { return std::get<1>(GetParam()); } +}; + +// MAP_POPULATE allowed. +// There isn't a good way to verify it actually did anything. +TEST_P(MMapFileParamTest, MapPopulate) { + ASSERT_THAT(Map(0, kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), + SyscallSucceeds()); +} + +// MAP_POPULATE on a short file. +TEST_P(MMapFileParamTest, MapPopulateShort) { + ASSERT_THAT( + Map(0, 2 * kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), + SyscallSucceeds()); +} + +// Read contents from mapped file. +TEST_F(MMapFileTest, Read) { + size_t len = strlen(kFileContents); + ASSERT_EQ(len, Write(kFileContents, len)); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), 0), + SyscallSucceeds()); + + EXPECT_THAT(reinterpret_cast<char*>(addr), + EqualsMemory(std::string(kFileContents))); +} + +// Map at an offset. +TEST_F(MMapFileTest, MapOffset) { + ASSERT_THAT(lseek(fd_.get(), kPageSize, SEEK_SET), SyscallSucceeds()); + + size_t len = strlen(kFileContents); + ASSERT_EQ(len, Write(kFileContents, len)); + + uintptr_t addr; + ASSERT_THAT( + addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), kPageSize), + SyscallSucceeds()); + + EXPECT_THAT(reinterpret_cast<char*>(addr), + EqualsMemory(std::string(kFileContents))); +} + +TEST_F(MMapFileTest, MapOffsetBeyondEnd) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 10 * kPageSize), + SyscallSucceeds()); + + // Touching the memory causes SIGBUS. + size_t len = strlen(kFileContents); + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)), + ::testing::KilledBySignal(SIGBUS), ""); +} + +// Verify mmap fails when sum of length and offset overflows. +TEST_F(MMapFileTest, MapLengthPlusOffsetOverflows) { + const size_t length = static_cast<size_t>(-kPageSize); + const off_t offset = kPageSize; + ASSERT_THAT(Map(0, length, PROT_READ, MAP_PRIVATE, fd_.get(), offset), + SyscallFailsWithErrno(ENOMEM)); +} + +// MAP_PRIVATE PROT_WRITE is allowed on read-only FDs. +TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); + + uintptr_t addr; + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd.get(), 0), + SyscallSucceeds()); + + // Touch the page to ensure the kernel didn't lie about writability. + size_t len = strlen(kFileContents); + std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)); +} + +// MAP_SHARED PROT_WRITE not allowed on read-only FDs. +TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); + + uintptr_t addr; + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0), + SyscallFailsWithErrno(EACCES)); +} + +// The FD must be readable. +TEST_P(MMapFileParamTest, WriteOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); + + uintptr_t addr; + EXPECT_THAT(addr = Map(0, kPageSize, prot(), flags(), fd.get(), 0), + SyscallFailsWithErrno(EACCES)); +} + +// Overwriting the contents of a file mapped MAP_SHARED PROT_READ +// should cause the new data to be reflected in the mapping. +TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to two pages and dirty them. + std::string bufA(kPageSize, 'a'); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + std::string bufB(kPageSize, 'b'); + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the mapping contains the right file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize)); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(), + kPageSize)); + + // Start at the beginning of the file. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Swap the write pattern. + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + + // Check that the mapping got updated. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufB.c_str(), kPageSize)); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufA.c_str(), + kPageSize)); +} + +// Partially overwriting a file mapped MAP_SHARED PROT_READ should be reflected +// in the mapping. +TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to two pages and dirty them. + std::string bufA(kPageSize, 'a'); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + std::string bufB(kPageSize, 'b'); + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the mapping contains the right file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize)); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(), + kPageSize)); + + // Start at the beginning of the file. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Do a partial overwrite, spanning both pages. + std::string bufC(kPageSize + (kPageSize / 2), 'c'); + ASSERT_THAT(Write(bufC.c_str(), bufC.size()), + SyscallSucceedsWithValue(bufC.size())); + + // Check that the mapping got updated. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufC.c_str(), + kPageSize + (kPageSize / 2))); + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr + kPageSize + (kPageSize / 2)), + bufB.c_str(), kPageSize / 2)); +} + +// Overwriting a file mapped MAP_SHARED PROT_READ should be reflected in the +// mapping and the file. +TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to two full pages and dirty it. + std::string bufA(2 * kPageSize, 'a'); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + + // Map only the first page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Prepare to overwrite the file contents. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Overwrite everything, beyond the mapped portion. + std::string bufB(2 * kPageSize, 'b'); + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + + // What the mapped portion should now look like. + std::string bufMapped(kPageSize, 'b'); + + // Expect that the mapped portion is consistent. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), bufMapped.c_str(), kPageSize)); + + // Prepare to read the entire file contents. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Expect that the file was fully updated. + std::vector<char> bufFile(2 * kPageSize); + ASSERT_THAT(Read(bufFile.data(), bufFile.size()), + SyscallSucceedsWithValue(bufFile.size())); + // Cast to void* to avoid EXPECT_THAT assuming bufFile.data() is a + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(bufFile.data()), EqualsMemory(bufB)); +} + +// Write data to mapped file. +TEST_F(MMapFileTest, WriteShared) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + size_t len = strlen(kFileContents); + memcpy(reinterpret_cast<void*>(addr), kFileContents, len); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + std::vector<char> buf(len); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), + EqualsMemory(std::string(kFileContents))); +} + +// Write data to portion of mapped page beyond the end of the file. +// These writes are not reflected in the file. +TEST_F(MMapFileTest, WriteSharedBeyondEnd) { + // The file is only half of a page. We map an entire page. Writes to the + // end of the mapping must not be reflected in the file. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // First half; this is reflected in the file. + std::string first(kPageSize / 2, 'A'); + memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); + + // Second half; this is not reflected in the file. + std::string second(kPageSize / 2, 'B'); + memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(), + second.size()); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + // Big enough to fit the entire page, if the writes are mistakenly written to + // the file. + std::vector<char> buf(kPageSize); + + // Only the first half is in the file. + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(first.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); +} + +// The portion of a mapped page that becomes part of the file after a truncate +// is reflected in the file. +TEST_F(MMapFileTest, WriteSharedTruncateUp) { + // The file is only half of a page. We map an entire page. Writes to the + // end of the mapping must not be reflected in the file. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // First half; this is reflected in the file. + std::string first(kPageSize / 2, 'A'); + memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); + + // Second half; this is not reflected in the file now (see + // WriteSharedBeyondEnd), but will be after the truncate. + std::string second(kPageSize / 2, 'B'); + memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(), + second.size()); + + // Extend the file to a full page. The second half of the page will be + // reflected in the file. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + // The whole page is in the file. + std::vector<char> buf(kPageSize); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); + EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2), + EqualsMemory(second)); +} + +TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to a full page and dirty it. + std::string buf(kPageSize, 'a'); + ASSERT_THAT(Write(buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the memory contains the file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize)); + + // Truncate down, then up. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + // Check that the memory was zeroed. + std::string zeroed(kPageSize, '\0'); + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize)); + + // The file may not actually be updated until msync is called. + ASSERT_THAT(Msync(), SyscallSucceeds()); + + // Prepare to read the entire file contents. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Expect that the file is fully updated. + std::vector<char> bufFile(kPageSize); + ASSERT_THAT(Read(bufFile.data(), bufFile.size()), + SyscallSucceedsWithValue(bufFile.size())); + EXPECT_EQ(0, memcmp(bufFile.data(), zeroed.c_str(), kPageSize)); +} + +TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) { + // The file is only half of a page. We map an entire page. Writes to the + // end of the mapping must not be reflected in the file. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // First half; this will be deleted by truncate(0). + std::string first(kPageSize / 2, 'A'); + memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); + + // Truncate down, then up. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + // The whole page is zeroed in memory. + std::string zeroed(kPageSize, '\0'); + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize)); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + // The whole file is also zeroed. + std::vector<char> buf(kPageSize); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed)); +} + +TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) { + SetupGvisorDeathTest(); + + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to a full page and dirty it. + std::string buf(kPageSize, 'a'); + ASSERT_THAT(Write(buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the mapping contains the file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize)); + + // Truncate down. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Accessing the truncated region should cause a SIGBUS. + std::vector<char> in(kPageSize); + EXPECT_EXIT( + std::copy(reinterpret_cast<volatile char*>(addr), + reinterpret_cast<volatile char*>(addr) + kPageSize, in.data()), + ::testing::KilledBySignal(SIGBUS), ""); +} + +TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // Touch the memory to be sure it really is mapped. + size_t len = strlen(kFileContents); + memcpy(reinterpret_cast<void*>(addr), kFileContents, len); + + // Truncate down. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Accessing the truncated file should cause a SIGBUS. + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)), + ::testing::KilledBySignal(SIGBUS), ""); +} + +TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Dirty the file. + std::string buf(kPageSize, 'a'); + ASSERT_THAT(Write(buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Map a page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Truncate to half of the page. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds()); + + // First half of the page untouched. + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr), buf.data(), kPageSize / 2)); + + // Second half is zeroed. + std::string zeroed(kPageSize / 2, '\0'); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2), + zeroed.c_str(), kPageSize / 2)); +} + +// Page can still be accessed and contents are intact after truncating a partial +// page. +TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) { + // Expand the file to a full page. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // Fill the entire page. + std::string contents(kPageSize, 'A'); + memcpy(reinterpret_cast<void*>(addr), contents.c_str(), contents.size()); + + // Truncate half of the page. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds()); + + // First half of the page untouched. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), contents.c_str(), + kPageSize / 2)); + + // Second half zeroed. + std::string zeroed(kPageSize / 2, '\0'); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2), + zeroed.c_str(), kPageSize / 2)); +} + +// MAP_PRIVATE writes are not carried through to the underlying file. +TEST_F(MMapFileTest, WritePrivate) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 0), + SyscallSucceeds()); + + size_t len = strlen(kFileContents); + memcpy(reinterpret_cast<void*>(addr), kFileContents, len); + + // The file should not be updated, but if it mistakenly is, it may not be + // until after munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + std::vector<char> buf(len); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), + EqualsMemory(std::string(len, '\0'))); +} + +// SIGBUS raised when reading or writing past end of a mapped file. +TEST_P(MMapFileParamTest, SigBusDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), + SyscallSucceeds()); + + auto* start = reinterpret_cast<volatile char*>(addr + kPageSize); + + // MMapFileTest makes a file kPageSize/2 long. The entire first page should be + // accessible, but anything beyond it should not. + if (prot() & PROT_WRITE) { + // Write beyond first page. + size_t len = strlen(kFileContents); + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, start), + ::testing::KilledBySignal(SIGBUS), ""); + } else { + // Read beyond first page. + std::vector<char> in(kPageSize); + EXPECT_EXIT(std::copy(start, start + kPageSize, in.data()), + ::testing::KilledBySignal(SIGBUS), ""); + } +} + +// Tests that SIGBUS is not raised when reading or writing to a file-mapped +// page before EOF, even if part of the mapping extends beyond EOF. +// +// See b/27877699. +TEST_P(MMapFileParamTest, NoSigBusOnPagesBeforeEOF) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), + SyscallSucceeds()); + + // The test passes if this survives. + auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1); + size_t len = strlen(kFileContents); + if (prot() & PROT_WRITE) { + std::copy(kFileContents, kFileContents + len, start); + } else { + std::vector<char> in(len); + std::copy(start, start + len, in.data()); + } +} + +// Tests that SIGBUS is not raised when reading or writing from a file-mapped +// page containing EOF, *after* the EOF. +TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), + SyscallSucceeds()); + + // The test passes if this survives. (Technically addr+kPageSize/2 is already + // beyond EOF, but +1 to check for fencepost errors.) + auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1); + size_t len = strlen(kFileContents); + if (prot() & PROT_WRITE) { + std::copy(kFileContents, kFileContents + len, start); + } else { + std::vector<char> in(len); + std::copy(start, start + len, in.data()); + } +} + +// Tests that reading from writable shared file-mapped pages succeeds. +// +// On most platforms this is trivial, but when the file is mapped via the sentry +// page cache (which does not yet support writing to shared mappings), a bug +// caused reads to fail unnecessarily on such mappings. See b/28913513. +TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { + uintptr_t addr; + size_t len = strlen(kFileContents); + + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + std::vector<char> buf(kPageSize); + // The test passes if this survives. + std::copy(reinterpret_cast<volatile char*>(addr), + reinterpret_cast<volatile char*>(addr) + len, buf.data()); +} + +// Tests that EFAULT is returned when invoking a syscall that requires the OS to +// read past end of file (resulting in a fault in sentry context in the gVisor +// case). See b/28913513. +TEST_F(MMapFileTest, InternalSigBus) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 0), + SyscallSucceeds()); + + // This depends on the fact that gVisor implements pipes internally. + int pipefd[2]; + ASSERT_THAT(pipe(pipefd), SyscallSucceeds()); + EXPECT_THAT( + write(pipefd[1], reinterpret_cast<void*>(addr + kPageSize), kPageSize), + SyscallFailsWithErrno(EFAULT)); + + EXPECT_THAT(close(pipefd[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipefd[1]), SyscallSucceeds()); +} + +// Like InternalSigBus, but test the WriteZerosAt path by reading from +// /dev/zero to a shared mapping (so that the SIGBUS isn't caught during +// copy-on-write breaking). +TEST_F(MMapFileTest, InternalSigBusZeroing) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + EXPECT_THAT(read(dev_zero.get(), reinterpret_cast<void*>(addr + kPageSize), + kPageSize), + SyscallFailsWithErrno(EFAULT)); +} + +// Checks that mmaps with a length of uint64_t(-PAGE_SIZE + 1) or greater do not +// induce a sentry panic (due to "rounding up" to 0). +TEST_F(MMapTest, HugeLength) { + EXPECT_THAT(Map(0, static_cast<uint64_t>(-kPageSize + 1), PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(ENOMEM)); +} + +// Tests for a specific gVisor MM caching bug. +TEST_F(MMapTest, AccessCOWInvalidatesCachedSegments) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + auto zero_fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + + // Get a two-page private mapping and fill it with 1s. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + memset(addr_, 1, 2 * kPageSize); + MaybeSave(); + + // Fork to make the mapping copy-on-write. + pid_t const pid = fork(); + if (pid == 0) { + // The child process waits for the parent to SIGKILL it. + while (true) { + pause(); + } + } + ASSERT_THAT(pid, SyscallSucceeds()); + auto cleanup_child = Cleanup([&] { + EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds()); + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + }); + + // Induce a read-only Access of the first page of the mapping, which will not + // cause a copy. The usermem.Segment should be cached. + ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + + // Induce a writable Access of both pages of the mapping. This should + // invalidate the cached Segment. + ASSERT_THAT(PreadFd(zero_fd.get(), addr_, 2 * kPageSize, 0), + SyscallSucceedsWithValue(2 * kPageSize)); + + // Induce a read-only Access of the first page of the mapping again. It should + // read the 0s that were stored in the mapping by the read from /dev/zero. If + // the read failed to invalidate the cached Segment, it will instead read the + // 1s in the stale page. + ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + std::vector<char> buf(kPageSize); + ASSERT_THAT(PreadFd(fd.get(), buf.data(), kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + for (size_t i = 0; i < kPageSize; i++) { + ASSERT_EQ(0, buf[i]) << "at offset " << i; + } +} + +TEST_F(MMapTest, NoReserve) { + const size_t kSize = 10 * 1 << 20; // 10M + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0), + SyscallSucceeds()); + EXPECT_GT(addr, 0); + + // Check that every page can be read/written. Technically, writing to memory + // could SIGSEGV in case there is no more memory available. In gVisor it + // would never happen though because NORESERVE is ignored. In Linux, it's + // possible to fail, but allocation is small enough that it's highly likely + // to succeed. + for (size_t j = 0; j < kSize; j += kPageSize) { + EXPECT_EQ(0, reinterpret_cast<char*>(addr)[j]); + reinterpret_cast<char*>(addr)[j] = j; + } +} + +// Map more than the gVisor page-cache map unit (64k) and ensure that +// it is consistent with reading from the file. +TEST_F(MMapFileTest, Bug38498194) { + // Choose a sufficiently large map unit. + constexpr int kSize = 4 * 1024 * 1024; + EXPECT_THAT(ftruncate(fd_.get(), kSize), SyscallSucceeds()); + + // Map a large enough region so that multiple internal segments + // are created to back the mapping. + uintptr_t addr; + ASSERT_THAT( + addr = Map(0, kSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + std::vector<char> expect(kSize, 'a'); + std::copy(expect.data(), expect.data() + expect.size(), + reinterpret_cast<volatile char*>(addr)); + + // Trigger writeback for gVisor. In Linux pages stay cached until + // it can't hold onto them anymore. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + std::vector<char> buf(kSize); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + EXPECT_EQ(buf, expect) << std::string(buf.data(), buf.size()); +} + +// Tests that reading from a file to a memory mapping of the same file does not +// deadlock. See b/34813270. +TEST_F(MMapFileTest, SelfRead) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(Read(reinterpret_cast<char*>(addr), kPageSize / 2), + SyscallSucceedsWithValue(kPageSize / 2)); + // The resulting file contents are poorly-specified and irrelevant. +} + +// Tests that writing to a file from a memory mapping of the same file does not +// deadlock. Regression test for b/34813270. +TEST_F(MMapFileTest, SelfWrite) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(Write(reinterpret_cast<char*>(addr), kPageSize / 2), + SyscallSucceedsWithValue(kPageSize / 2)); + // The resulting file contents are poorly-specified and irrelevant. +} + +TEST(MMapDeathTest, TruncateAfterCOWBreak) { + SetupGvisorDeathTest(); + + // Create and map a single-page file. + auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDWR)); + ASSERT_THAT(ftruncate(fd.get(), kPageSize), SyscallSucceeds()); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0)); + + // Write to this mapping, causing the page to be copied for write. + memset(mapping.ptr(), 'a', mapping.len()); + MaybeSave(); // Trigger a co-operative save cycle. + + // Truncate the file and expect it to invalidate the copied page. + ASSERT_THAT(ftruncate(fd.get(), 0), SyscallSucceeds()); + EXPECT_EXIT(*reinterpret_cast<volatile char*>(mapping.ptr()), + ::testing::KilledBySignal(SIGBUS), ""); +} + +// Regression test for #147. +TEST(MMapNoFixtureTest, MapReadOnlyAfterCreateWriteOnly) { + std::string filename = NewTempAbsPath(); + + // We have to create the file O_RDONLY to reproduce the bug because + // fsgofer.localFile.Create() silently upgrades O_WRONLY to O_RDWR, causing + // the cached "write-only" FD to be read/write and therefore usable by mmap(). + auto const ro_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(filename, O_RDONLY | O_CREAT | O_EXCL, 0666)); + + // Get a write-only FD for the same file, which should be ignored by mmap() + // (but isn't in #147). + auto const wo_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_WRONLY)); + ASSERT_THAT(ftruncate(wo_fd.get(), kPageSize), SyscallSucceeds()); + + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, ro_fd.get(), 0)); + std::vector<char> buf(kPageSize); + // The test passes if this survives. + std::copy(static_cast<char*>(mapping.ptr()), + static_cast<char*>(mapping.endptr()), buf.data()); +} + +// Conditional on MAP_32BIT. +// This flag is supported only on x86-64, for 64-bit programs. +#ifdef __x86_64__ + +TEST(MMapNoFixtureTest, Map32Bit) { + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE | MAP_32BIT)); + EXPECT_LT(mapping.addr(), static_cast<uintptr_t>(1) << 32); + EXPECT_LE(mapping.endaddr(), static_cast<uintptr_t>(1) << 32); +} + +#endif // defined(__x86_64__) + +INSTANTIATE_TEST_SUITE_P( + ReadWriteSharedPrivate, MMapFileParamTest, + ::testing::Combine(::testing::ValuesIn({ + PROT_READ, + PROT_WRITE, + PROT_READ | PROT_WRITE, + }), + ::testing::ValuesIn({MAP_SHARED, MAP_PRIVATE}))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc new file mode 100644 index 000000000..a3e9745cf --- /dev/null +++ b/test/syscalls/linux/mount.cc @@ -0,0 +1,327 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <functional> +#include <memory> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "absl/time/time.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/mount_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(MountTest, MountBadFilesystem) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + // Linux expects a valid target before it checks the file system name. + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(mount("", dir.path().c_str(), "foobar", 0, ""), + SyscallFailsWithErrno(ENODEV)); +} + +TEST(MountTest, MountInvalidTarget) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = NewTempAbsPath(); + EXPECT_THAT(mount("", dir.c_str(), "tmpfs", 0, ""), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(MountTest, MountPermDenied) { + // Clear CAP_SYS_ADMIN. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false)); + } + + // Linux expects a valid target before checking capability. + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(mount("", dir.path().c_str(), "", 0, ""), + SyscallFailsWithErrno(EPERM)); +} + +TEST(MountTest, UmountPermDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0)); + + // Drop privileges in another thread, so we can still unmount the mounted + // directory. + ScopedThread([&]() { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false)); + EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EPERM)); + }); +} + +TEST(MountTest, MountOverBusy) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777)); + + // Should be able to mount over a busy directory. + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0)); +} + +TEST(MountTest, OpenFileBusy) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0)); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777)); + + // An open file should prevent unmounting. + EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EBUSY)); +} + +TEST(MountTest, UmountDetach) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + // structure: + // + // dir (mount point) + // subdir + // file + // + // We show that we can walk around in the mount after detach-unmount dir. + // + // We show that even though dir is unreachable from outside the mount, we can + // still reach dir's (former) parent! + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + auto mount = + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "mode=0700", + /* umountflags= */ MNT_DETACH)); + const struct stat after = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_NE(before.st_ino, after.st_ino); + + // Create files in the new mount. + constexpr char kContents[] = "no no no"; + auto const subdir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + auto const file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(dir.path(), kContents, 0777)); + + auto const dir_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(subdir.path(), O_RDONLY | O_DIRECTORY)); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + // Unmount the tmpfs. + mount.Release()(); + + const struct stat after2 = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_EQ(before.st_ino, after2.st_ino); + + // Can still read file after unmounting. + std::vector<char> buf(sizeof(kContents)); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()), SyscallSucceeds()); + + // Walk to dir. + auto const mounted_dir = ASSERT_NO_ERRNO_AND_VALUE( + OpenAt(dir_fd.get(), "..", O_DIRECTORY | O_RDONLY)); + // Walk to dir/file. + auto const fd_again = ASSERT_NO_ERRNO_AND_VALUE( + OpenAt(mounted_dir.get(), std::string(Basename(file.path())), O_RDONLY)); + + std::vector<char> buf2(sizeof(kContents)); + EXPECT_THAT(ReadFd(fd_again.get(), buf2.data(), buf2.size()), + SyscallSucceeds()); + EXPECT_EQ(buf, buf2); + + // Walking outside the unmounted realm should still work, too! + auto const dir_parent = ASSERT_NO_ERRNO_AND_VALUE( + OpenAt(mounted_dir.get(), "..", O_DIRECTORY | O_RDONLY)); +} + +TEST(MountTest, ActiveSubmountBusy) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount1 = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0)); + + auto const dir2 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + auto const mount2 = + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir2.path(), "tmpfs", 0, "", 0)); + + // Since dir now has an active submount, should not be able to unmount. + EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EBUSY)); +} + +TEST(MountTest, MountTmpfs) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // NOTE(b/129868551): Inode IDs are only stable across S/R if we have an open + // FD for that inode. Since we are going to compare inode IDs below, get a + // FileDescriptor for this directory here, which will be closed automatically + // at the end of the test. + auto const fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY, O_RDONLY)); + + const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + + { + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0)); + + const struct stat s = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_EQ(s.st_mode, S_IFDIR | 0700); + EXPECT_NE(s.st_ino, before.st_ino); + + EXPECT_NO_ERRNO(Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777)); + } + + // Now that dir is unmounted again, we should have the old inode back. + const struct stat after = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_EQ(before.st_ino, after.st_ino); +} + +TEST(MountTest, MountTmpfsMagicValIgnored) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", MS_MGC_VAL, "mode=0700", 0)); +} + +// Passing nullptr to data is equivalent to "". +TEST(MountTest, NullData) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + EXPECT_THAT(mount("", dir.path().c_str(), "tmpfs", 0, nullptr), + SyscallSucceeds()); + EXPECT_THAT(umount2(dir.path().c_str(), 0), SyscallSucceeds()); +} + +TEST(MountTest, MountReadonly) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", MS_RDONLY, "mode=0777", 0)); + + const struct stat s = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_EQ(s.st_mode, S_IFDIR | 0777); + + std::string const filename = JoinPath(dir.path(), "foo"); + EXPECT_THAT(open(filename.c_str(), O_RDWR | O_CREAT, 0777), + SyscallFailsWithErrno(EROFS)); +} + +PosixErrorOr<absl::Time> ATime(absl::string_view file) { + struct stat s = {}; + if (stat(std::string(file).c_str(), &s) == -1) { + return PosixError(errno, "stat failed"); + } + return absl::TimeFromTimespec(s.st_atim); +} + +TEST(MountTest, MountNoAtime) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", MS_NOATIME, "mode=0777", 0)); + + std::string const contents = "No no no, don't follow the instructions!"; + auto const file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(dir.path(), contents, 0777)); + + absl::Time const before = ASSERT_NO_ERRNO_AND_VALUE(ATime(file.path())); + + // Reading from the file should change the atime, but the MS_NOATIME flag + // should prevent that. + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + char buf[100]; + int read_n; + ASSERT_THAT(read_n = read(fd.get(), buf, sizeof(buf)), SyscallSucceeds()); + EXPECT_EQ(std::string(buf, read_n), contents); + + absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(ATime(file.path())); + + // Expect that atime hasn't changed. + EXPECT_EQ(before, after); +} + +TEST(MountTest, MountNoExec) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", MS_NOEXEC, "mode=0777", 0)); + + std::string const contents = "No no no, don't follow the instructions!"; + auto const file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(dir.path(), contents, 0777)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {}, {}, nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, EACCES); +} + +TEST(MountTest, RenameRemoveMountPoint) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir_parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir_parent.path())); + auto const new_dir = NewTempAbsPath(); + + auto const mount = + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0)); + + ASSERT_THAT(rename(dir.path().c_str(), new_dir.c_str()), + SyscallFailsWithErrno(EBUSY)); + + ASSERT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EBUSY)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mremap.cc b/test/syscalls/linux/mremap.cc new file mode 100644 index 000000000..f0e5f7d82 --- /dev/null +++ b/test/syscalls/linux/mremap.cc @@ -0,0 +1,492 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <string.h> +#include <sys/mman.h> + +#include <string> + +#include "gmock/gmock.h" +#include "absl/strings/string_view.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::_; + +namespace gvisor { +namespace testing { + +namespace { + +// Fixture for mremap tests parameterized by mmap flags. +using MremapParamTest = ::testing::TestWithParam<int>; + +TEST_P(MremapParamTest, Noop) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + + ASSERT_THAT(Mremap(m.ptr(), kPageSize, kPageSize, 0, nullptr), + IsPosixErrorOkAndHolds(m.ptr())); + EXPECT_TRUE(IsMapped(m.addr())); +} + +TEST_P(MremapParamTest, InPlace_ShrinkingWholeVMA) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // N.B. we must be in a single-threaded subprocess to ensure a + // background thread doesn't concurrently map the second page. + void* addr = mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, nullptr); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == m.ptr()); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(!IsMapped(m.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, InPlace_ShrinkingPartialVMA) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + void* addr = mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, nullptr); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == m.ptr()); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(!IsMapped(m.addr() + kPageSize)); + TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, InPlace_ShrinkingAcrossVMAs) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_READ, GetParam())); + // Changing permissions on the first page forces it to become a separate vma. + ASSERT_THAT(mprotect(m.ptr(), kPageSize, PROT_NONE), SyscallSucceeds()); + + const auto rest = [&] { + // Both old_size and new_size now span two vmas; mremap + // shouldn't care. + void* addr = mremap(m.ptr(), 3 * kPageSize, 2 * kPageSize, 0, nullptr); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == m.ptr()); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(IsMapped(m.addr() + kPageSize)); + TEST_CHECK(!IsMapped(m.addr() + 2 * kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, InPlace_ExpansionSuccess) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap the second page so that the first can be expanded back into it. + // + // N.B. we must be in a single-threaded subprocess to ensure a + // background thread doesn't concurrently map this page. + TEST_PCHECK( + munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(m.ptr(), kPageSize, 2 * kPageSize, 0, nullptr); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == m.ptr()); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(IsMapped(m.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, InPlace_ExpansionFailure) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap the second page, leaving a one-page hole. Trying to expand the + // first page to three pages should fail since the original third page + // is still mapped. + TEST_PCHECK( + munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(m.ptr(), kPageSize, 3 * kPageSize, 0, nullptr); + TEST_CHECK_MSG(addr == MAP_FAILED, "mremap unexpectedly succeeded"); + TEST_PCHECK_MSG(errno == ENOMEM, "mremap failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(!IsMapped(m.addr() + kPageSize)); + TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, MayMove_Expansion) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap the second page, leaving a one-page hole. Trying to expand the + // first page to three pages with MREMAP_MAYMOVE should force the + // mapping to be relocated since the original third page is still + // mapped. + TEST_PCHECK( + munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0); + MaybeSave(); + + void* addr2 = + mremap(m.ptr(), kPageSize, 3 * kPageSize, MREMAP_MAYMOVE, nullptr); + TEST_PCHECK_MSG(addr2 != MAP_FAILED, "mremap failed"); + MaybeSave(); + + const Mapping m2 = Mapping(addr2, 3 * kPageSize); + TEST_CHECK(m.addr() != m2.addr()); + + TEST_CHECK(!IsMapped(m.addr())); + TEST_CHECK(!IsMapped(m.addr() + kPageSize)); + TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize)); + TEST_CHECK(IsMapped(m2.addr())); + TEST_CHECK(IsMapped(m2.addr() + kPageSize)); + TEST_CHECK(IsMapped(m2.addr() + 2 * kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_SourceAndDestinationCannotOverlap) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + + ASSERT_THAT(Mremap(m.ptr(), kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, m.ptr()), + PosixErrorIs(EINVAL, _)); + EXPECT_TRUE(IsMapped(m.addr())); +} + +TEST_P(MremapParamTest, Fixed_SameSize) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap dst to create a hole. + TEST_PCHECK(munmap(dst.ptr(), kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(src.ptr(), kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(IsMapped(dst.addr())); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_SameSize_Unmapping) { + // Like the Fixed_SameSize case, but expect mremap to unmap the destination + // automatically. + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + void* addr = mremap(src.ptr(), kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(IsMapped(dst.addr())); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_ShrinkingWholeVMA) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap dst so we can check that mremap does not keep the + // second page. + TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(src.ptr(), 2 * kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(!IsMapped(src.addr() + kPageSize)); + TEST_CHECK(IsMapped(dst.addr())); + TEST_CHECK(!IsMapped(dst.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_ShrinkingPartialVMA) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap dst so we can check that mremap does not keep the + // second page. + TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(src.ptr(), 2 * kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(!IsMapped(src.addr() + kPageSize)); + TEST_CHECK(IsMapped(src.addr() + 2 * kPageSize)); + TEST_CHECK(IsMapped(dst.addr())); + TEST_CHECK(!IsMapped(dst.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_ShrinkingAcrossVMAs) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_READ, GetParam())); + // Changing permissions on the first page forces it to become a separate vma. + ASSERT_THAT(mprotect(src.ptr(), kPageSize, PROT_NONE), SyscallSucceeds()); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unlike flags=0, MREMAP_FIXED requires that [old_address, + // old_address+new_size) only spans a single vma. + void* addr = mremap(src.ptr(), 3 * kPageSize, 2 * kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_CHECK_MSG(addr == MAP_FAILED, "mremap unexpectedly succeeded"); + TEST_PCHECK_MSG(errno == EFAULT, "mremap failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(IsMapped(src.addr())); + TEST_CHECK(IsMapped(src.addr() + kPageSize)); + // Despite failing, mremap should have unmapped [old_address+new_size, + // old_address+old_size) (i.e. the third page). + TEST_CHECK(!IsMapped(src.addr() + 2 * kPageSize)); + // Despite failing, mremap should have unmapped the destination pages. + TEST_CHECK(!IsMapped(dst.addr())); + TEST_CHECK(!IsMapped(dst.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_Expansion) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap dst so we can check that mremap actually maps all pages + // at the destination. + TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(src.ptr(), kPageSize, 2 * kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(IsMapped(dst.addr())); + TEST_CHECK(IsMapped(dst.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +INSTANTIATE_TEST_SUITE_P(PrivateShared, MremapParamTest, + ::testing::Values(MAP_PRIVATE, MAP_SHARED)); + +// mremap with old_size == 0 only works with MAP_SHARED after Linux 4.14 +// (dba58d3b8c50 "mm/mremap: fail map duplication attempts for private +// mappings"). + +TEST(MremapTest, InPlace_Copy) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED)); + EXPECT_THAT(Mremap(m.ptr(), 0, kPageSize, 0, nullptr), + PosixErrorIs(ENOMEM, _)); +} + +TEST(MremapTest, MayMove_Copy) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED)); + + // Remainder of this test executes in a subprocess to ensure that if mremap + // incorrectly removes m, it is not remapped by another thread. + const auto rest = [&] { + void* ptr = mremap(m.ptr(), 0, kPageSize, MREMAP_MAYMOVE, nullptr); + MaybeSave(); + TEST_PCHECK_MSG(ptr != MAP_FAILED, "mremap failed"); + TEST_CHECK(ptr != m.ptr()); + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(IsMapped(reinterpret_cast<uintptr_t>(ptr))); + }; + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST(MremapTest, MustMove_Copy) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED)); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE)); + + // Remainder of this test executes in a subprocess to ensure that if mremap + // incorrectly removes src, it is not remapped by another thread. + const auto rest = [&] { + void* ptr = mremap(src.ptr(), 0, kPageSize, MREMAP_MAYMOVE | MREMAP_FIXED, + dst.ptr()); + MaybeSave(); + TEST_PCHECK_MSG(ptr != MAP_FAILED, "mremap failed"); + TEST_CHECK(ptr == dst.ptr()); + TEST_CHECK(IsMapped(src.addr())); + TEST_CHECK(IsMapped(dst.addr())); + }; + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +void ExpectAllBytesAre(absl::string_view v, char c) { + for (size_t i = 0; i < v.size(); i++) { + ASSERT_EQ(v[i], c) << "at offset " << i; + } +} + +TEST(MremapTest, ExpansionPreservesCOWPagesAndExposesNewFilePages) { + // Create a file with 3 pages. The first is filled with 'a', the second is + // filled with 'b', and the third is filled with 'c'. + TempPath const file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'a').c_str(), kPageSize), + SyscallSucceedsWithValue(kPageSize)); + ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'b').c_str(), kPageSize), + SyscallSucceedsWithValue(kPageSize)); + ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'c').c_str(), kPageSize), + SyscallSucceedsWithValue(kPageSize)); + + // Create a private mapping of the first 2 pages, and fill the second page + // with 'd'. + Mapping const src = ASSERT_NO_ERRNO_AND_VALUE(Mmap(nullptr, 2 * kPageSize, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, fd.get(), 0)); + memset(reinterpret_cast<void*>(src.addr() + kPageSize), 'd', kPageSize); + MaybeSave(); + + // Move the mapping while expanding it to 3 pages. The resulting mapping + // should contain the original first page of the file (filled with 'a'), + // followed by the private copy of the second page (filled with 'd'), followed + // by the newly-mapped third page of the file (filled with 'c'). + Mapping const dst = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(3 * kPageSize, PROT_NONE, MAP_PRIVATE)); + ASSERT_THAT(Mremap(src.ptr(), 2 * kPageSize, 3 * kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()), + IsPosixErrorOkAndHolds(dst.ptr())); + auto const v = dst.view(); + ExpectAllBytesAre(v.substr(0, kPageSize), 'a'); + ExpectAllBytesAre(v.substr(kPageSize, kPageSize), 'd'); + ExpectAllBytesAre(v.substr(2 * kPageSize, kPageSize), 'c'); +} + +TEST(MremapDeathTest, SharedAnon) { + SetupGvisorDeathTest(); + + // Reserve 4 pages of address space. + Mapping const reserved = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(4 * kPageSize, PROT_NONE, MAP_PRIVATE)); + + // Create a 2-page shared anonymous mapping at the beginning of the + // reservation. Fill the first page with 'a' and the second with 'b'. + Mapping const m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(reserved.ptr(), 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0)); + memset(m.ptr(), 'a', kPageSize); + memset(reinterpret_cast<void*>(m.addr() + kPageSize), 'b', kPageSize); + MaybeSave(); + + // Shrink the mapping to 1 page in-place. + ASSERT_THAT(Mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, m.ptr()), + IsPosixErrorOkAndHolds(m.ptr())); + + // Expand the mapping to 3 pages, moving it forward by 1 page in the process + // since the old and new mappings can't overlap. + void* const new_m = reinterpret_cast<void*>(m.addr() + kPageSize); + ASSERT_THAT(Mremap(m.ptr(), kPageSize, 3 * kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, new_m), + IsPosixErrorOkAndHolds(new_m)); + + // The first 2 pages of the mapping should still contain the data we wrote + // (i.e. shrinking should not have discarded the second page's data), while + // touching the third page should raise SIGBUS. + auto const v = + absl::string_view(static_cast<char const*>(new_m), 3 * kPageSize); + ExpectAllBytesAre(v.substr(0, kPageSize), 'a'); + ExpectAllBytesAre(v.substr(kPageSize, kPageSize), 'b'); + EXPECT_EXIT(ExpectAllBytesAre(v.substr(2 * kPageSize, kPageSize), '\0'), + ::testing::KilledBySignal(SIGBUS), ""); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/msync.cc b/test/syscalls/linux/msync.cc new file mode 100644 index 000000000..2b2b6aef9 --- /dev/null +++ b/test/syscalls/linux/msync.cc @@ -0,0 +1,151 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mman.h> +#include <unistd.h> + +#include <functional> +#include <string> +#include <utility> +#include <vector> + +#include "test/util/file_descriptor.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Parameters for msync tests. Use a std::tuple so we can use +// ::testing::Combine. +using MsyncTestParam = + std::tuple<int, // msync flags + std::function<PosixErrorOr<Mapping>()> // returns mapping to + // msync + >; + +class MsyncParameterizedTest : public ::testing::TestWithParam<MsyncTestParam> { + protected: + int msync_flags() const { return std::get<0>(GetParam()); } + + PosixErrorOr<Mapping> GetMapping() const { return std::get<1>(GetParam())(); } +}; + +// All valid msync(2) flag combinations, not including MS_INVALIDATE. ("Linux +// permits a call to msync() that specifies neither [MS_SYNC or MS_ASYNC], with +// semantics that are (currently) equivalent to specifying MS_ASYNC." - +// msync(2)) +constexpr std::initializer_list<int> kMsyncFlags = {MS_SYNC, MS_ASYNC, 0}; + +// Returns functions that return mappings that should be successfully +// msync()able. +std::vector<std::function<PosixErrorOr<Mapping>()>> SyncableMappings() { + std::vector<std::function<PosixErrorOr<Mapping>()>> funcs; + for (bool const writable : {false, true}) { + for (int const mflags : {MAP_PRIVATE, MAP_SHARED}) { + int const prot = PROT_READ | (writable ? PROT_WRITE : 0); + int const oflags = O_CREAT | (writable ? O_RDWR : O_RDONLY); + funcs.push_back([=] { return MmapAnon(kPageSize, prot, mflags); }); + funcs.push_back([=]() -> PosixErrorOr<Mapping> { + std::string const path = NewTempAbsPath(); + ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, oflags, 0644)); + // Don't unlink the file since that breaks save/restore. Just let the + // test infrastructure clean up all of our temporary files when we're + // done. + return Mmap(nullptr, kPageSize, prot, mflags, fd.get(), 0); + }); + } + } + return funcs; +} + +PosixErrorOr<Mapping> NoMappings() { + return PosixError(EINVAL, "unexpected attempt to create a mapping"); +} + +// "Fixture" for msync tests that hold for all valid flags, but do not create +// mappings. +using MsyncNoMappingTest = MsyncParameterizedTest; + +TEST_P(MsyncNoMappingTest, UnmappedAddressWithZeroLengthSucceeds) { + EXPECT_THAT(msync(nullptr, 0, msync_flags()), SyscallSucceeds()); +} + +TEST_P(MsyncNoMappingTest, UnmappedAddressWithNonzeroLengthFails) { + EXPECT_THAT(msync(nullptr, kPageSize, msync_flags()), + SyscallFailsWithErrno(ENOMEM)); +} + +INSTANTIATE_TEST_SUITE_P(All, MsyncNoMappingTest, + ::testing::Combine(::testing::ValuesIn(kMsyncFlags), + ::testing::Values(NoMappings))); + +// "Fixture" for msync tests that are not parameterized by msync flags, but do +// create mappings. +using MsyncNoFlagsTest = MsyncParameterizedTest; + +TEST_P(MsyncNoFlagsTest, BothSyncAndAsyncFails) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT(msync(m.ptr(), m.len(), MS_SYNC | MS_ASYNC), + SyscallFailsWithErrno(EINVAL)); +} + +INSTANTIATE_TEST_SUITE_P( + All, MsyncNoFlagsTest, + ::testing::Combine(::testing::Values(0), // ignored + ::testing::ValuesIn(SyncableMappings()))); + +// "Fixture" for msync tests parameterized by both msync flags and sources of +// mappings. +using MsyncFullParamTest = MsyncParameterizedTest; + +TEST_P(MsyncFullParamTest, NormallySucceeds) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT(msync(m.ptr(), m.len(), msync_flags()), SyscallSucceeds()); +} + +TEST_P(MsyncFullParamTest, UnalignedLengthSucceeds) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT(msync(m.ptr(), m.len() - 1, msync_flags()), SyscallSucceeds()); +} + +TEST_P(MsyncFullParamTest, UnalignedAddressFails) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT( + msync(reinterpret_cast<void*>(m.addr() + 1), m.len() - 1, msync_flags()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(MsyncFullParamTest, InvalidateUnlockedSucceeds) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT(msync(m.ptr(), m.len(), msync_flags() | MS_INVALIDATE), + SyscallSucceeds()); +} + +// The test for MS_INVALIDATE on mlocked pages is in mlock.cc since it requires +// probing for mlock support. + +INSTANTIATE_TEST_SUITE_P( + All, MsyncFullParamTest, + ::testing::Combine(::testing::ValuesIn(kMsyncFlags), + ::testing::ValuesIn(SyncableMappings()))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/munmap.cc b/test/syscalls/linux/munmap.cc new file mode 100644 index 000000000..067241f4d --- /dev/null +++ b/test/syscalls/linux/munmap.cc @@ -0,0 +1,53 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mman.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class MunmapTest : public ::testing::Test { + protected: + void SetUp() override { + m_ = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, m_); + } + + void* m_ = nullptr; +}; + +TEST_F(MunmapTest, HappyCase) { + EXPECT_THAT(munmap(m_, kPageSize), SyscallSucceeds()); +} + +TEST_F(MunmapTest, ZeroLength) { + EXPECT_THAT(munmap(m_, 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(MunmapTest, LastPageRoundUp) { + // Attempt to unmap up to and including the last page. + EXPECT_THAT(munmap(m_, static_cast<size_t>(-kPageSize + 1)), + SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/network_namespace.cc b/test/syscalls/linux/network_namespace.cc new file mode 100644 index 000000000..133fdecf0 --- /dev/null +++ b/test/syscalls/linux/network_namespace.cc @@ -0,0 +1,52 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <net/if.h> +#include <sched.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +TEST(NetworkNamespaceTest, LoopbackExists) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + ScopedThread t([&] { + ASSERT_THAT(unshare(CLONE_NEWNET), SyscallSucceedsWithValue(0)); + + // TODO(gvisor.dev/issue/1833): Update this to test that only "lo" exists. + // Check loopback device exists. + int sock = socket(AF_INET, SOCK_DGRAM, 0); + ASSERT_THAT(sock, SyscallSucceeds()); + struct ifreq ifr; + strncpy(ifr.ifr_name, "lo", IFNAMSIZ); + EXPECT_THAT(ioctl(sock, SIOCGIFINDEX, &ifr), SyscallSucceeds()) + << "lo cannot be found"; + }); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc new file mode 100644 index 000000000..bb7d108e8 --- /dev/null +++ b/test/syscalls/linux/open.cc @@ -0,0 +1,451 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <linux/capability.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// This test is currently very rudimentary. +// +// There are plenty of extra cases to cover once the sentry supports them. +// +// Different types of opens: +// * O_CREAT +// * O_DIRECTORY +// * O_NOFOLLOW +// * O_PATH <- Will we ever support this? +// +// Special operations on open: +// * O_EXCL +// +// Special files: +// * Blocking behavior for a named pipe. +// +// Different errors: +// * EACCES +// * EEXIST +// * ENAMETOOLONG +// * ELOOP +// * ENOTDIR +// * EPERM +class OpenTest : public FileTest { + void SetUp() override { + FileTest::SetUp(); + + ASSERT_THAT( + write(test_file_fd_.get(), test_data_.c_str(), test_data_.length()), + SyscallSucceedsWithValue(test_data_.length())); + EXPECT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), SyscallSucceeds()); + } + + public: + const std::string test_data_ = "hello world\n"; +}; + +TEST_F(OpenTest, OTrunc) { + auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_TRUNC, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(OpenTest, OTruncAndReadOnlyDir) { + auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_TRUNC | O_RDONLY, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(OpenTest, OTruncAndReadOnlyFile) { + auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncfile"); + const FileDescriptor existing = + ASSERT_NO_ERRNO_AND_VALUE(Open(dirpath.c_str(), O_RDWR | O_CREAT, 0666)); + const FileDescriptor otrunc = ASSERT_NO_ERRNO_AND_VALUE( + Open(dirpath.c_str(), O_TRUNC | O_RDONLY, 0666)); +} + +TEST_F(OpenTest, ReadOnly) { + char buf; + const FileDescriptor ro_file = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + EXPECT_THAT(read(ro_file.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_THAT(lseek(ro_file.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(write(ro_file.get(), &buf, 1), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(OpenTest, WriteOnly) { + char buf; + const FileDescriptor wo_file = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY)); + + EXPECT_THAT(read(wo_file.get(), &buf, 1), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(lseek(wo_file.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(write(wo_file.get(), &buf, 1), SyscallSucceedsWithValue(1)); +} + +TEST_F(OpenTest, ReadWrite) { + char buf; + const FileDescriptor rw_file = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + EXPECT_THAT(read(rw_file.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_THAT(lseek(rw_file.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(write(rw_file.get(), &buf, 1), SyscallSucceedsWithValue(1)); +} + +TEST_F(OpenTest, RelPath) { + auto name = std::string(Basename(test_file_name_)); + + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name, O_RDONLY)); +} + +TEST_F(OpenTest, AbsPath) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); +} + +TEST_F(OpenTest, AtRelPath) { + auto name = std::string(Basename(test_file_name_)); + const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE( + Open(GetAbsoluteTestTmpdir(), O_RDONLY | O_DIRECTORY)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(dirfd.get(), name, O_RDONLY)); +} + +TEST_F(OpenTest, AtAbsPath) { + const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE( + Open(GetAbsoluteTestTmpdir(), O_RDONLY | O_DIRECTORY)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(dirfd.get(), test_file_name_, O_RDONLY)); +} + +TEST_F(OpenTest, OpenNoFollowSymlink) { + const std::string link_path = JoinPath(GetAbsoluteTestTmpdir(), "link"); + ASSERT_THAT(symlink(test_file_name_.c_str(), link_path.c_str()), + SyscallSucceeds()); + auto cleanup = Cleanup([link_path]() { + EXPECT_THAT(unlink(link_path.c_str()), SyscallSucceeds()); + }); + + // Open will succeed without O_NOFOLLOW and fails with O_NOFOLLOW. + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(link_path, O_RDONLY)); + ASSERT_THAT(open(link_path.c_str(), O_RDONLY | O_NOFOLLOW), + SyscallFailsWithErrno(ELOOP)); +} + +TEST_F(OpenTest, OpenNoFollowStillFollowsLinksInPath) { + // We will create the following structure: + // tmp_folder/real_folder/file + // tmp_folder/sym_folder -> tmp_folder/real_folder + // + // We will then open tmp_folder/sym_folder/file with O_NOFOLLOW and it + // should succeed as O_NOFOLLOW only applies to the final path component. + auto tmp_path = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(GetAbsoluteTestTmpdir())); + auto sym_path = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), tmp_path.path())); + auto file_path = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(tmp_path.path())); + + auto path_via_symlink = JoinPath(sym_path.path(), Basename(file_path.path())); + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(path_via_symlink, O_RDONLY | O_NOFOLLOW)); +} + +// Test that open(2) can follow symlinks that point back to the same tree. +// Test sets up files as follows: +// root/child/symlink => redirects to ../.. +// root/child/target => regular file +// +// open("root/child/symlink/root/child/file") +TEST_F(OpenTest, SymlinkRecurse) { + auto root = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(GetAbsoluteTestTmpdir())); + auto child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + auto symlink = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(child.path(), "../..")); + auto target = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(child.path(), "abc", 0644)); + auto path_via_symlink = + JoinPath(symlink.path(), Basename(root.path()), Basename(child.path()), + Basename(target.path())); + const auto contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContents(path_via_symlink)); + ASSERT_EQ(contents, "abc"); +} + +TEST_F(OpenTest, Fault) { + char* totally_not_null = nullptr; + ASSERT_THAT(open(totally_not_null, O_RDONLY), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(OpenTest, AppendOnly) { + // First write some data to the fresh file. + const int64_t kBufSize = 1024; + std::vector<char> buf(kBufSize, 'a'); + + FileDescriptor fd0 = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(WriteFd(fd0.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + fd0.reset(); // Close the file early. + + // Next get two handles to the same file. We open two files because we want + // to make sure that appending is respected between them. + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND)); + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND)); + EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + // Then try to write to the first file and make sure the bytes are appended. + EXPECT_THAT(WriteFd(fd1.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Check that the size of the file is correct and that the offset has been + // incremented to that size. + struct stat s0; + EXPECT_THAT(fstat(fd1.get(), &s0), SyscallSucceeds()); + EXPECT_EQ(s0.st_size, kBufSize * 2); + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(kBufSize * 2)); + + // Then try to write to the second file and make sure the bytes are appended. + EXPECT_THAT(WriteFd(fd2.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Check that the size of the file is correct and that the offset has been + // incremented to that size. + struct stat s1; + EXPECT_THAT(fstat(fd2.get(), &s1), SyscallSucceeds()); + EXPECT_EQ(s1.st_size, kBufSize * 3); + EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(kBufSize * 3)); +} + +TEST_F(OpenTest, AppendConcurrentWrite) { + constexpr int kThreadCount = 5; + constexpr int kBytesPerThread = 10000; + std::unique_ptr<ScopedThread> threads[kThreadCount]; + + // In case of the uncached policy, we expect that a file system can be changed + // externally, so we create a new inode each time when we open a file and we + // can't guarantee that writes to files with O_APPEND will work correctly. + SKIP_IF(getenv("GVISOR_GOFER_UNCACHED")); + + EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds()); + + std::string filename = test_file_name_; + DisableSave ds; // Too many syscalls. + // Start kThreadCount threads which will write concurrently into the same + // file. + for (int i = 0; i < kThreadCount; i++) { + threads[i] = absl::make_unique<ScopedThread>([filename]() { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDWR | O_APPEND)); + + for (int j = 0; j < kBytesPerThread; j++) { + EXPECT_THAT(WriteFd(fd.get(), &j, 1), SyscallSucceedsWithValue(1)); + } + }); + } + for (int i = 0; i < kThreadCount; i++) { + threads[i]->Join(); + } + + // Check that the size of the file is correct. + struct stat st; + EXPECT_THAT(stat(test_file_name_.c_str(), &st), SyscallSucceeds()); + EXPECT_EQ(st.st_size, kThreadCount * kBytesPerThread); +} + +TEST_F(OpenTest, Truncate) { + { + // First write some data to the new file and close it. + FileDescriptor fd0 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY)); + std::vector<char> orig(10, 'a'); + EXPECT_THAT(WriteFd(fd0.get(), orig.data(), orig.size()), + SyscallSucceedsWithValue(orig.size())); + } + + // Then open with truncate and verify that offset is set to 0. + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_TRUNC)); + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + // Then write less data to the file and ensure the old content is gone. + std::vector<char> want(5, 'b'); + EXPECT_THAT(WriteFd(fd1.get(), want.data(), want.size()), + SyscallSucceedsWithValue(want.size())); + + struct stat stat; + EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds()); + EXPECT_EQ(stat.st_size, want.size()); + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(want.size())); + + // Read the data and ensure only the latest write is in the file. + std::vector<char> got(want.size() + 1, 'c'); + ASSERT_THAT(pread(fd1.get(), got.data(), got.size(), 0), + SyscallSucceedsWithValue(want.size())); + EXPECT_EQ(memcmp(want.data(), got.data(), want.size()), 0) + << "rbuf=" << got.data(); + EXPECT_EQ(got.back(), 'c'); // Last byte should not have been modified. +} + +TEST_F(OpenTest, NameTooLong) { + char buf[4097] = {}; + memset(buf, 'a', 4097); + EXPECT_THAT(open(buf, O_RDONLY), SyscallFailsWithErrno(ENAMETOOLONG)); +} + +TEST_F(OpenTest, DotsFromRoot) { + const FileDescriptor rootfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/", O_RDONLY | O_DIRECTORY)); + const FileDescriptor other_rootfd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(rootfd.get(), "..", O_RDONLY)); +} + +TEST_F(OpenTest, DirectoryWritableFails) { + ASSERT_THAT(open(GetAbsoluteTestTmpdir().c_str(), O_RDWR), + SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(OpenTest, FileNotDirectory) { + // Create a file and try to open it with O_DIRECTORY. + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_THAT(open(file.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallFailsWithErrno(ENOTDIR)); +} + +TEST_F(OpenTest, Null) { + char c = '\0'; + ASSERT_THAT(open(&c, O_RDONLY), SyscallFailsWithErrno(ENOENT)); +} + +// NOTE(b/119785738): While the man pages specify that this behavior should be +// undefined, Linux truncates the file on opening read only if we have write +// permission, so we will too. +TEST_F(OpenTest, CanTruncateReadOnly) { + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY | O_TRUNC)); + + struct stat stat; + EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds()); + EXPECT_EQ(stat.st_size, 0); +} + +// If we don't have read permission on the file, opening with +// O_TRUNC should fail. +TEST_F(OpenTest, CanTruncateReadOnlyNoWritePermission_NoRandomSave) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + const DisableSave ds; // Permissions are dropped. + ASSERT_THAT(chmod(test_file_name_.c_str(), S_IRUSR | S_IRGRP), + SyscallSucceeds()); + + ASSERT_THAT(open(test_file_name_.c_str(), O_RDONLY | O_TRUNC), + SyscallFailsWithErrno(EACCES)); + + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + struct stat stat; + EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds()); + EXPECT_EQ(stat.st_size, test_data_.size()); +} + +// If we don't have read permission but have write permission, opening O_WRONLY +// and O_TRUNC should succeed. +TEST_F(OpenTest, CanTruncateWriteOnlyNoReadPermission_NoRandomSave) { + const DisableSave ds; // Permissions are dropped. + + EXPECT_THAT(fchmod(test_file_fd_.get(), S_IWUSR | S_IWGRP), + SyscallSucceeds()); + + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY | O_TRUNC)); + + EXPECT_THAT(fchmod(test_file_fd_.get(), S_IRUSR | S_IRGRP), + SyscallSucceeds()); + + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + struct stat stat; + EXPECT_THAT(fstat(fd2.get(), &stat), SyscallSucceeds()); + EXPECT_EQ(stat.st_size, 0); +} + +TEST_F(OpenTest, CanTruncateWithStrangePermissions) { + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + const DisableSave ds; // Permissions are dropped. + std::string path = NewTempAbsPath(); + int fd; + // Create a file without user permissions. + EXPECT_THAT( // SAVE_BELOW + fd = open(path.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 055), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + // Cannot open file because we are owner and have no permissions set. + EXPECT_THAT(open(path.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES)); + + // We *can* chmod the file, because we are the owner. + EXPECT_THAT(chmod(path.c_str(), 0755), SyscallSucceeds()); + + // Now we can open the file again. + EXPECT_THAT(fd = open(path.c_str(), O_RDWR), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(OpenTest, OpenNonDirectoryWithTrailingSlash) { + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string bad_path = file.path() + "/"; + EXPECT_THAT(open(bad_path.c_str(), O_RDONLY), SyscallFailsWithErrno(ENOTDIR)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc new file mode 100644 index 000000000..51eacf3f2 --- /dev/null +++ b/test/syscalls/linux/open_create.cc @@ -0,0 +1,155 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/temp_umask.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { +TEST(CreateTest, TmpFile) { + int fd; + EXPECT_THAT(fd = open(JoinPath(GetAbsoluteTestTmpdir(), "a").c_str(), + O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(CreateTest, ExistingFile) { + int fd; + EXPECT_THAT( + fd = open(JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile").c_str(), + O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT( + fd = open(JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile").c_str(), + O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(CreateTest, CreateAtFile) { + int dirfd; + EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0666), + SyscallSucceeds()); + EXPECT_THAT(openat(dirfd, "CreateAtFile", O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(CreateTest, HonorsUmask_NoRandomSave) { + const DisableSave ds; // file cannot be re-opened as writable. + TempUmask mask(0222); + int fd; + ASSERT_THAT( + fd = open(JoinPath(GetAbsoluteTestTmpdir(), "UmaskedFile").c_str(), + O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + struct stat statbuf; + ASSERT_THAT(fstat(fd, &statbuf), SyscallSucceeds()); + EXPECT_EQ(0444, statbuf.st_mode & 0777); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(CreateTest, CreateExclusively) { + std::string filename = NewTempAbsPath(); + + int fd; + ASSERT_THAT(fd = open(filename.c_str(), O_CREAT | O_RDWR, 0644), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(open(filename.c_str(), O_CREAT | O_EXCL | O_RDWR, 0644), + SyscallFailsWithErrno(EEXIST)); +} + +TEST(CreateTeast, CreatWithOTrunc) { + std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(CreateTeast, CreatDirWithOTruncAndReadOnly) { + std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(CreateTeast, CreatFileWithOTruncAndReadOnly) { + std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncfile"); + int dirfd; + ASSERT_THAT(dirfd = open(dirpath.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0666), + SyscallSucceeds()); + ASSERT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(CreateTest, CreateFailsOnUnpermittedDir) { + // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to + // always override directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_THAT(open("/foo", O_CREAT | O_RDWR, 0644), + SyscallFailsWithErrno(EACCES)); +} + +TEST(CreateTest, CreateFailsOnDirWithoutWritePerms) { + // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to + // always override directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + auto parent = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555)); + auto file = JoinPath(parent.path(), "foo"); + ASSERT_THAT(open(file.c_str(), O_CREAT | O_RDWR, 0644), + SyscallFailsWithErrno(EACCES)); +} + +// A file originally created RW, but opened RO can later be opened RW. +// Regression test for b/65385065. +TEST(CreateTest, OpenCreateROThenRW) { + TempPath file(NewTempAbsPath()); + + // Create a RW file, but only open it RO. + FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE( + Open(file.path(), O_CREAT | O_EXCL | O_RDONLY, 0644)); + + // Now get a RW FD. + FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + // fd1 is not writable, but fd2 is. + char c = 'a'; + EXPECT_THAT(WriteFd(fd1.get(), &c, 1), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(WriteFd(fd2.get(), &c, 1), SyscallSucceedsWithValue(1)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc new file mode 100644 index 000000000..5ac68feb4 --- /dev/null +++ b/test/syscalls/linux/packet_socket.cc @@ -0,0 +1,440 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <ifaddrs.h> +#include <linux/capability.h> +#include <linux/if_arp.h> +#include <linux/if_packet.h> +#include <net/ethernet.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/base/internal/endian.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Some of these tests involve sending packets via AF_PACKET sockets and the +// loopback interface. Because AF_PACKET circumvents so much of the networking +// stack, Linux sees these packets as "martian", i.e. they claim to be to/from +// localhost but don't have the usual associated data. Thus Linux drops them by +// default. You can see where this happens by following the code at: +// +// - net/ipv4/ip_input.c:ip_rcv_finish, which calls +// - net/ipv4/route.c:ip_route_input_noref, which calls +// - net/ipv4/route.c:ip_route_input_slow, which finds and drops martian +// packets. +// +// To tell Linux not to drop these packets, you need to tell it to accept our +// funny packets (which are completely valid and correct, but lack associated +// in-kernel data because we use AF_PACKET): +// +// echo 1 >> /proc/sys/net/ipv4/conf/lo/accept_local +// echo 1 >> /proc/sys/net/ipv4/conf/lo/route_localnet +// +// These tests require CAP_NET_RAW to run. + +// TODO(gvisor.dev/issue/173): gVisor support. + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +constexpr char kMessage[] = "soweoneul malhaebwa"; +constexpr in_port_t kPort = 0x409c; // htons(40000) + +// +// "Cooked" tests. Cooked AF_PACKET sockets do not contain link layer +// headers, and provide link layer destination/source information via a +// returned struct sockaddr_ll. +// + +// Send kMessage via sock to loopback +void SendUDPMessage(int sock) { + struct sockaddr_in dest = {}; + dest.sin_port = kPort; + dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + dest.sin_family = AF_INET; + EXPECT_THAT(sendto(sock, kMessage, sizeof(kMessage), 0, + reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)), + SyscallSucceedsWithValue(sizeof(kMessage))); +} + +// Send an IP packet and make sure ETH_P_<something else> doesn't pick it up. +TEST(BasicCookedPacketTest, WrongType) { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + ASSERT_THAT(socket(AF_PACKET, SOCK_DGRAM, ETH_P_PUP), + SyscallFailsWithErrno(EPERM)); + GTEST_SKIP(); + } + + FileDescriptor sock = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_PUP))); + + // Let's use a simple IP payload: a UDP datagram. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + SendUDPMessage(udp_sock.get()); + + // Wait and make sure the socket never becomes readable. + struct pollfd pfd = {}; + pfd.fd = sock.get(); + pfd.events = POLLIN; + EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0)); +} + +// Tests for "cooked" (SOCK_DGRAM) packet(7) sockets. +class CookedPacketTest : public ::testing::TestWithParam<int> { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // Gets the device index of the loopback device. + int GetLoopbackIndex(); + + // The socket used for both reading and writing. + int socket_; +}; + +void CookedPacketTest::SetUp() { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + ASSERT_THAT(socket(AF_PACKET, SOCK_DGRAM, htons(GetParam())), + SyscallFailsWithErrno(EPERM)); + GTEST_SKIP(); + } + + if (!IsRunningOnGvisor()) { + FileDescriptor acceptLocal = ASSERT_NO_ERRNO_AND_VALUE( + Open("/proc/sys/net/ipv4/conf/lo/accept_local", O_RDONLY)); + FileDescriptor routeLocalnet = ASSERT_NO_ERRNO_AND_VALUE( + Open("/proc/sys/net/ipv4/conf/lo/route_localnet", O_RDONLY)); + char enabled; + ASSERT_THAT(read(acceptLocal.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_EQ(enabled, '1'); + ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_EQ(enabled, '1'); + } + + ASSERT_THAT(socket_ = socket(AF_PACKET, SOCK_DGRAM, htons(GetParam())), + SyscallSucceeds()); +} + +void CookedPacketTest::TearDown() { + // TearDown will be run even if we skip the test. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + EXPECT_THAT(close(socket_), SyscallSucceeds()); + } +} + +int CookedPacketTest::GetLoopbackIndex() { + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + EXPECT_THAT(ioctl(socket_, SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + return ifr.ifr_ifindex; +} + +// Receive and verify the message via packet socket on interface. +void ReceiveMessage(int sock, int ifindex) { + // Wait for the socket to become readable. + struct pollfd pfd = {}; + pfd.fd = sock; + pfd.events = POLLIN; + EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 2000), SyscallSucceedsWithValue(1)); + + // Read and verify the data. + constexpr size_t packet_size = + sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kMessage); + char buf[64]; + struct sockaddr_ll src = {}; + socklen_t src_len = sizeof(src); + ASSERT_THAT(recvfrom(sock, buf, sizeof(buf), 0, + reinterpret_cast<struct sockaddr*>(&src), &src_len), + SyscallSucceedsWithValue(packet_size)); + + // sockaddr_ll ends with an 8 byte physical address field, but ethernet + // addresses only use 6 bytes. Linux used to return sizeof(sockaddr_ll)-2 + // here, but since commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c returns + // sizeof(sockaddr_ll). + ASSERT_THAT(src_len, AnyOf(Eq(sizeof(src)), Eq(sizeof(src) - 2))); + + // TODO(b/129292371): Verify protocol once we return it. + // Verify the source address. + EXPECT_EQ(src.sll_family, AF_PACKET); + EXPECT_EQ(src.sll_ifindex, ifindex); + EXPECT_EQ(src.sll_halen, ETH_ALEN); + // This came from the loopback device, so the address is all 0s. + for (int i = 0; i < src.sll_halen; i++) { + EXPECT_EQ(src.sll_addr[i], 0); + } + + // Verify the IP header. We memcpy to deal with pointer aligment. + struct iphdr ip = {}; + memcpy(&ip, buf, sizeof(ip)); + EXPECT_EQ(ip.ihl, 5); + EXPECT_EQ(ip.version, 4); + EXPECT_EQ(ip.tot_len, htons(packet_size)); + EXPECT_EQ(ip.protocol, IPPROTO_UDP); + EXPECT_EQ(ip.daddr, htonl(INADDR_LOOPBACK)); + EXPECT_EQ(ip.saddr, htonl(INADDR_LOOPBACK)); + + // Verify the UDP header. We memcpy to deal with pointer aligment. + struct udphdr udp = {}; + memcpy(&udp, buf + sizeof(iphdr), sizeof(udp)); + EXPECT_EQ(udp.dest, kPort); + EXPECT_EQ(udp.len, htons(sizeof(udphdr) + sizeof(kMessage))); + + // Verify the payload. + char* payload = reinterpret_cast<char*>(buf + sizeof(iphdr) + sizeof(udphdr)); + EXPECT_EQ(strncmp(payload, kMessage, sizeof(kMessage)), 0); +} + +// Receive via a packet socket. +TEST_P(CookedPacketTest, Receive) { + // Let's use a simple IP payload: a UDP datagram. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + SendUDPMessage(udp_sock.get()); + + // Receive and verify the data. + int loopback_index = GetLoopbackIndex(); + ReceiveMessage(socket_, loopback_index); +} + +// Send via a packet socket. +TEST_P(CookedPacketTest, Send) { + // TODO(b/129292371): Remove once we support packet socket writing. + SKIP_IF(IsRunningOnGvisor()); + + // Let's send a UDP packet and receive it using a regular UDP socket. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + struct sockaddr_in bind_addr = {}; + bind_addr.sin_family = AF_INET; + bind_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + bind_addr.sin_port = kPort; + ASSERT_THAT( + bind(udp_sock.get(), reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + + // Set up the destination physical address. + struct sockaddr_ll dest = {}; + dest.sll_family = AF_PACKET; + dest.sll_halen = ETH_ALEN; + dest.sll_ifindex = GetLoopbackIndex(); + dest.sll_protocol = htons(ETH_P_IP); + // We're sending to the loopback device, so the address is all 0s. + memset(dest.sll_addr, 0x00, ETH_ALEN); + + // Set up the IP header. + struct iphdr iphdr = {0}; + iphdr.ihl = 5; + iphdr.version = 4; + iphdr.tos = 0; + iphdr.tot_len = + htons(sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kMessage)); + // Get a pseudo-random ID. If we clash with an in-use ID the test will fail, + // but we have no way of getting an ID we know to be good. + srand(*reinterpret_cast<unsigned int*>(&iphdr)); + iphdr.id = rand(); + // Linux sets this bit ("do not fragment") for small packets. + iphdr.frag_off = 1 << 6; + iphdr.ttl = 64; + iphdr.protocol = IPPROTO_UDP; + iphdr.daddr = htonl(INADDR_LOOPBACK); + iphdr.saddr = htonl(INADDR_LOOPBACK); + iphdr.check = IPChecksum(iphdr); + + // Set up the UDP header. + struct udphdr udphdr = {}; + udphdr.source = kPort; + udphdr.dest = kPort; + udphdr.len = htons(sizeof(udphdr) + sizeof(kMessage)); + udphdr.check = UDPChecksum(iphdr, udphdr, kMessage, sizeof(kMessage)); + + // Copy both headers and the payload into our packet buffer. + char send_buf[sizeof(iphdr) + sizeof(udphdr) + sizeof(kMessage)]; + memcpy(send_buf, &iphdr, sizeof(iphdr)); + memcpy(send_buf + sizeof(iphdr), &udphdr, sizeof(udphdr)); + memcpy(send_buf + sizeof(iphdr) + sizeof(udphdr), kMessage, sizeof(kMessage)); + + // Send it. + ASSERT_THAT(sendto(socket_, send_buf, sizeof(send_buf), 0, + reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Wait for the packet to become available on both sockets. + struct pollfd pfd = {}; + pfd.fd = udp_sock.get(); + pfd.events = POLLIN; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1)); + pfd.fd = socket_; + pfd.events = POLLIN; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1)); + + // Receive on the packet socket. + char recv_buf[sizeof(send_buf)]; + ASSERT_THAT(recv(socket_, recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_EQ(memcmp(recv_buf, send_buf, sizeof(send_buf)), 0); + + // Receive on the UDP socket. + struct sockaddr_in src; + socklen_t src_len = sizeof(src); + ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT, + reinterpret_cast<struct sockaddr*>(&src), &src_len), + SyscallSucceedsWithValue(sizeof(kMessage))); + // Check src and payload. + EXPECT_EQ(strncmp(recv_buf, kMessage, sizeof(kMessage)), 0); + EXPECT_EQ(src.sin_family, AF_INET); + EXPECT_EQ(src.sin_port, kPort); + EXPECT_EQ(src.sin_addr.s_addr, htonl(INADDR_LOOPBACK)); +} + +// Bind and receive via packet socket. +TEST_P(CookedPacketTest, BindReceive) { + struct sockaddr_ll bind_addr = {}; + bind_addr.sll_family = AF_PACKET; + bind_addr.sll_protocol = htons(GetParam()); + bind_addr.sll_ifindex = GetLoopbackIndex(); + + ASSERT_THAT(bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + + // Let's use a simple IP payload: a UDP datagram. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + SendUDPMessage(udp_sock.get()); + + // Receive and verify the data. + ReceiveMessage(socket_, bind_addr.sll_ifindex); +} + +// Double Bind socket. +TEST_P(CookedPacketTest, DoubleBind) { + struct sockaddr_ll bind_addr = {}; + bind_addr.sll_family = AF_PACKET; + bind_addr.sll_protocol = htons(GetParam()); + bind_addr.sll_ifindex = GetLoopbackIndex(); + + ASSERT_THAT(bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + + // Binding socket again should fail. + ASSERT_THAT( + bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr)), + // Linux 4.09 returns EINVAL here, but some time before 4.19 it switched + // to EADDRINUSE. + AnyOf(SyscallFailsWithErrno(EADDRINUSE), SyscallFailsWithErrno(EINVAL))); +} + +// Bind and verify we do not receive data on interface which is not bound +TEST_P(CookedPacketTest, BindDrop) { + // Let's use a simple IP payload: a UDP datagram. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + struct ifaddrs* if_addr_list = nullptr; + auto cleanup = Cleanup([&if_addr_list]() { freeifaddrs(if_addr_list); }); + + ASSERT_THAT(getifaddrs(&if_addr_list), SyscallSucceeds()); + + // Get interface other than loopback. + struct ifreq ifr = {}; + for (struct ifaddrs* i = if_addr_list; i; i = i->ifa_next) { + if (strcmp(i->ifa_name, "lo") != 0) { + strncpy(ifr.ifr_name, i->ifa_name, sizeof(ifr.ifr_name)); + break; + } + } + + // Skip if no interface is available other than loopback. + if (strlen(ifr.ifr_name) == 0) { + GTEST_SKIP(); + } + + // Get interface index. + EXPECT_THAT(ioctl(socket_, SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + + // Bind to packet socket requires only family, protocol and ifindex. + struct sockaddr_ll bind_addr = {}; + bind_addr.sll_family = AF_PACKET; + bind_addr.sll_protocol = htons(GetParam()); + bind_addr.sll_ifindex = ifr.ifr_ifindex; + + ASSERT_THAT(bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + + // Send to loopback interface. + struct sockaddr_in dest = {}; + dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + dest.sin_family = AF_INET; + dest.sin_port = kPort; + EXPECT_THAT(sendto(udp_sock.get(), kMessage, sizeof(kMessage), 0, + reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)), + SyscallSucceedsWithValue(sizeof(kMessage))); + + // Wait and make sure the socket never receives any data. + struct pollfd pfd = {}; + pfd.fd = socket_; + pfd.events = POLLIN; + EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0)); +} + +// Bind with invalid address. +TEST_P(CookedPacketTest, BindFail) { + // Null address. + ASSERT_THAT( + bind(socket_, nullptr, sizeof(struct sockaddr)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallFailsWithErrno(EINVAL))); + + // Address of size 1. + uint8_t addr = 0; + ASSERT_THAT( + bind(socket_, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(EINVAL)); +} + +INSTANTIATE_TEST_SUITE_P(AllInetTests, CookedPacketTest, + ::testing::Values(ETH_P_IP, ETH_P_ALL)); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc new file mode 100644 index 000000000..4093ac813 --- /dev/null +++ b/test/syscalls/linux/packet_socket_raw.cc @@ -0,0 +1,565 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <linux/capability.h> +#include <linux/if_arp.h> +#include <linux/if_packet.h> +#include <net/ethernet.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/endian.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Some of these tests involve sending packets via AF_PACKET sockets and the +// loopback interface. Because AF_PACKET circumvents so much of the networking +// stack, Linux sees these packets as "martian", i.e. they claim to be to/from +// localhost but don't have the usual associated data. Thus Linux drops them by +// default. You can see where this happens by following the code at: +// +// - net/ipv4/ip_input.c:ip_rcv_finish, which calls +// - net/ipv4/route.c:ip_route_input_noref, which calls +// - net/ipv4/route.c:ip_route_input_slow, which finds and drops martian +// packets. +// +// To tell Linux not to drop these packets, you need to tell it to accept our +// funny packets (which are completely valid and correct, but lack associated +// in-kernel data because we use AF_PACKET): +// +// echo 1 >> /proc/sys/net/ipv4/conf/lo/accept_local +// echo 1 >> /proc/sys/net/ipv4/conf/lo/route_localnet +// +// These tests require CAP_NET_RAW to run. + +// TODO(gvisor.dev/issue/173): gVisor support. + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +constexpr char kMessage[] = "soweoneul malhaebwa"; +constexpr in_port_t kPort = 0x409c; // htons(40000) + +// Send kMessage via sock to loopback +void SendUDPMessage(int sock) { + struct sockaddr_in dest = {}; + dest.sin_port = kPort; + dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + dest.sin_family = AF_INET; + EXPECT_THAT(sendto(sock, kMessage, sizeof(kMessage), 0, + reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)), + SyscallSucceedsWithValue(sizeof(kMessage))); +} + +// +// Raw tests. Packets sent with raw AF_PACKET sockets always include link layer +// headers. +// + +// Tests for "raw" (SOCK_RAW) packet(7) sockets. +class RawPacketTest : public ::testing::TestWithParam<int> { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // Gets the device index of the loopback device. + int GetLoopbackIndex(); + + // The socket used for both reading and writing. + int s_; +}; + +void RawPacketTest::SetUp() { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, htons(GetParam())), + SyscallFailsWithErrno(EPERM)); + GTEST_SKIP(); + } + + if (!IsRunningOnGvisor()) { + // Ensure that looped back packets aren't rejected by the kernel. + FileDescriptor acceptLocal = ASSERT_NO_ERRNO_AND_VALUE( + Open("/proc/sys/net/ipv4/conf/lo/accept_local", O_RDWR)); + FileDescriptor routeLocalnet = ASSERT_NO_ERRNO_AND_VALUE( + Open("/proc/sys/net/ipv4/conf/lo/route_localnet", O_RDWR)); + char enabled; + ASSERT_THAT(read(acceptLocal.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + if (enabled != '1') { + enabled = '1'; + ASSERT_THAT(lseek(acceptLocal.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(write(acceptLocal.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_THAT(lseek(acceptLocal.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(acceptLocal.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_EQ(enabled, '1'); + } + + ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + if (enabled != '1') { + enabled = '1'; + ASSERT_THAT(lseek(routeLocalnet.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(write(routeLocalnet.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_THAT(lseek(routeLocalnet.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_EQ(enabled, '1'); + } + } + + ASSERT_THAT(s_ = socket(AF_PACKET, SOCK_RAW, htons(GetParam())), + SyscallSucceeds()); +} + +void RawPacketTest::TearDown() { + // TearDown will be run even if we skip the test. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + EXPECT_THAT(close(s_), SyscallSucceeds()); + } +} + +int RawPacketTest::GetLoopbackIndex() { + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + EXPECT_THAT(ioctl(s_, SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + return ifr.ifr_ifindex; +} + +// Receive via a packet socket. +TEST_P(RawPacketTest, Receive) { + // Let's use a simple IP payload: a UDP datagram. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + SendUDPMessage(udp_sock.get()); + + // Wait for the socket to become readable. + struct pollfd pfd = {}; + pfd.fd = s_; + pfd.events = POLLIN; + EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 2000), SyscallSucceedsWithValue(1)); + + // Read and verify the data. + constexpr size_t packet_size = sizeof(struct ethhdr) + sizeof(struct iphdr) + + sizeof(struct udphdr) + sizeof(kMessage); + char buf[64]; + struct sockaddr_ll src = {}; + socklen_t src_len = sizeof(src); + ASSERT_THAT(recvfrom(s_, buf, sizeof(buf), 0, + reinterpret_cast<struct sockaddr*>(&src), &src_len), + SyscallSucceedsWithValue(packet_size)); + // sockaddr_ll ends with an 8 byte physical address field, but ethernet + // addresses only use 6 bytes. Linux used to return sizeof(sockaddr_ll)-2 + // here, but since commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c returns + // sizeof(sockaddr_ll). + ASSERT_THAT(src_len, AnyOf(Eq(sizeof(src)), Eq(sizeof(src) - 2))); + + // TODO(b/129292371): Verify protocol once we return it. + // Verify the source address. + EXPECT_EQ(src.sll_family, AF_PACKET); + EXPECT_EQ(src.sll_ifindex, GetLoopbackIndex()); + EXPECT_EQ(src.sll_halen, ETH_ALEN); + // This came from the loopback device, so the address is all 0s. + for (int i = 0; i < src.sll_halen; i++) { + EXPECT_EQ(src.sll_addr[i], 0); + } + + // Verify the ethernet header. We memcpy to deal with pointer alignment. + struct ethhdr eth = {}; + memcpy(ð, buf, sizeof(eth)); + // The destination and source address should be 0, for loopback. + for (int i = 0; i < ETH_ALEN; i++) { + EXPECT_EQ(eth.h_dest[i], 0); + EXPECT_EQ(eth.h_source[i], 0); + } + EXPECT_EQ(eth.h_proto, htons(ETH_P_IP)); + + // Verify the IP header. We memcpy to deal with pointer aligment. + struct iphdr ip = {}; + memcpy(&ip, buf + sizeof(ethhdr), sizeof(ip)); + EXPECT_EQ(ip.ihl, 5); + EXPECT_EQ(ip.version, 4); + EXPECT_EQ(ip.tot_len, htons(packet_size - sizeof(eth))); + EXPECT_EQ(ip.protocol, IPPROTO_UDP); + EXPECT_EQ(ip.daddr, htonl(INADDR_LOOPBACK)); + EXPECT_EQ(ip.saddr, htonl(INADDR_LOOPBACK)); + + // Verify the UDP header. We memcpy to deal with pointer aligment. + struct udphdr udp = {}; + memcpy(&udp, buf + sizeof(eth) + sizeof(iphdr), sizeof(udp)); + EXPECT_EQ(udp.dest, kPort); + EXPECT_EQ(udp.len, htons(sizeof(udphdr) + sizeof(kMessage))); + + // Verify the payload. + char* payload = reinterpret_cast<char*>(buf + sizeof(eth) + sizeof(iphdr) + + sizeof(udphdr)); + EXPECT_EQ(strncmp(payload, kMessage, sizeof(kMessage)), 0); +} + +// Send via a packet socket. +TEST_P(RawPacketTest, Send) { + // TODO(b/129292371): Remove once we support packet socket writing. + SKIP_IF(IsRunningOnGvisor()); + + // Let's send a UDP packet and receive it using a regular UDP socket. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + struct sockaddr_in bind_addr = {}; + bind_addr.sin_family = AF_INET; + bind_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + bind_addr.sin_port = kPort; + ASSERT_THAT( + bind(udp_sock.get(), reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + + // Set up the destination physical address. + struct sockaddr_ll dest = {}; + dest.sll_family = AF_PACKET; + dest.sll_halen = ETH_ALEN; + dest.sll_ifindex = GetLoopbackIndex(); + dest.sll_protocol = htons(ETH_P_IP); + // We're sending to the loopback device, so the address is all 0s. + memset(dest.sll_addr, 0x00, ETH_ALEN); + + // Set up the ethernet header. The kernel takes care of the footer. + // We're sending to and from hardware address 0 (loopback). + struct ethhdr eth = {}; + eth.h_proto = htons(ETH_P_IP); + + // Set up the IP header. + struct iphdr iphdr = {}; + iphdr.ihl = 5; + iphdr.version = 4; + iphdr.tos = 0; + iphdr.tot_len = + htons(sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kMessage)); + // Get a pseudo-random ID. If we clash with an in-use ID the test will fail, + // but we have no way of getting an ID we know to be good. + srand(*reinterpret_cast<unsigned int*>(&iphdr)); + iphdr.id = rand(); + // Linux sets this bit ("do not fragment") for small packets. + iphdr.frag_off = 1 << 6; + iphdr.ttl = 64; + iphdr.protocol = IPPROTO_UDP; + iphdr.daddr = htonl(INADDR_LOOPBACK); + iphdr.saddr = htonl(INADDR_LOOPBACK); + iphdr.check = IPChecksum(iphdr); + + // Set up the UDP header. + struct udphdr udphdr = {}; + udphdr.source = kPort; + udphdr.dest = kPort; + udphdr.len = htons(sizeof(udphdr) + sizeof(kMessage)); + udphdr.check = UDPChecksum(iphdr, udphdr, kMessage, sizeof(kMessage)); + + // Copy both headers and the payload into our packet buffer. + char + send_buf[sizeof(eth) + sizeof(iphdr) + sizeof(udphdr) + sizeof(kMessage)]; + memcpy(send_buf, ð, sizeof(eth)); + memcpy(send_buf + sizeof(ethhdr), &iphdr, sizeof(iphdr)); + memcpy(send_buf + sizeof(ethhdr) + sizeof(iphdr), &udphdr, sizeof(udphdr)); + memcpy(send_buf + sizeof(ethhdr) + sizeof(iphdr) + sizeof(udphdr), kMessage, + sizeof(kMessage)); + + // Send it. + ASSERT_THAT(sendto(s_, send_buf, sizeof(send_buf), 0, + reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Wait for the packet to become available on both sockets. + struct pollfd pfd = {}; + pfd.fd = udp_sock.get(); + pfd.events = POLLIN; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1)); + pfd.fd = s_; + pfd.events = POLLIN; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1)); + + // Receive on the packet socket. + char recv_buf[sizeof(send_buf)]; + ASSERT_THAT(recv(s_, recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_EQ(memcmp(recv_buf, send_buf, sizeof(send_buf)), 0); + + // Receive on the UDP socket. + struct sockaddr_in src; + socklen_t src_len = sizeof(src); + ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT, + reinterpret_cast<struct sockaddr*>(&src), &src_len), + SyscallSucceedsWithValue(sizeof(kMessage))); + // Check src and payload. + EXPECT_EQ(strncmp(recv_buf, kMessage, sizeof(kMessage)), 0); + EXPECT_EQ(src.sin_family, AF_INET); + EXPECT_EQ(src.sin_port, kPort); + EXPECT_EQ(src.sin_addr.s_addr, htonl(INADDR_LOOPBACK)); +} + +// Check that setting SO_RCVBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawPacketTest, SetSocketRecvBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum receive buf size by trying to set it to zero. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_RCVBUF above max is clamped to the maximum +// receive buffer size. +TEST_P(RawPacketTest, SetSocketRecvBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover max buf size by trying to set the largest possible buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. +TEST_P(RawPacketTest, SetSocketRecvBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover max buf size by trying to set a really large buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set a zero size receive buffer + // size. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + ASSERT_EQ(quarter_sz, val); +} + +// Check that setting SO_SNDBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawPacketTest, SetSocketSendBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_SNDBUF above max is clamped to the maximum +// send buffer size. +TEST_P(RawPacketTest, SetSocketSendBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. +TEST_P(RawPacketTest, SetSocketSendBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack + // matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + + ASSERT_EQ(quarter_sz, val); +} + +INSTANTIATE_TEST_SUITE_P(AllInetTests, RawPacketTest, + ::testing::Values(ETH_P_IP, ETH_P_ALL)); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/partial_bad_buffer.cc b/test/syscalls/linux/partial_bad_buffer.cc new file mode 100644 index 000000000..df7129acc --- /dev/null +++ b/test/syscalls/linux/partial_bad_buffer.cc @@ -0,0 +1,405 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::Gt; + +namespace gvisor { +namespace testing { + +namespace { + +constexpr char kMessage[] = "hello world"; + +// PartialBadBufferTest checks the result of various IO syscalls when passed a +// buffer that does not have the space specified in the syscall (most of it is +// PROT_NONE). Linux is annoyingly inconsistent among different syscalls, so we +// test all of them. +class PartialBadBufferTest : public ::testing::Test { + protected: + void SetUp() override { + // Create and open a directory for getdents cases. + directory_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT( + directory_fd_ = open(directory_.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + + // Create and open a normal file, placing it in the directory + // so the getdents cases have some dirents. + name_ = JoinPath(directory_.path(), "a"); + ASSERT_THAT(fd_ = open(name_.c_str(), O_RDWR | O_CREAT, 0644), + SyscallSucceeds()); + + // Write some initial data. + size_t size = sizeof(kMessage) - 1; + EXPECT_THAT(WriteFd(fd_, &kMessage, size), SyscallSucceedsWithValue(size)); + ASSERT_THAT(lseek(fd_, 0, SEEK_SET), SyscallSucceeds()); + + // Map a useable buffer. + addr_ = mmap(0, 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(addr_, MAP_FAILED); + char* buf = reinterpret_cast<char*>(addr_); + + // Guard page for our read to run into. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, + PROT_NONE), + SyscallSucceeds()); + + // Leave only one free byte in the buffer. + bad_buffer_ = buf + kPageSize - 1; + } + + off_t Size() { + struct stat st; + int rc = fstat(fd_, &st); + if (rc < 0) { + return static_cast<off_t>(rc); + } + return st.st_size; + } + + void TearDown() override { + EXPECT_THAT(munmap(addr_, 2 * kPageSize), SyscallSucceeds()) << addr_; + EXPECT_THAT(close(fd_), SyscallSucceeds()); + EXPECT_THAT(unlink(name_.c_str()), SyscallSucceeds()); + EXPECT_THAT(close(directory_fd_), SyscallSucceeds()); + } + + // Return buffer with n bytes of free space. + // N.B. this is the same buffer used to back bad_buffer_. + char* FreeBytes(size_t n) { + TEST_CHECK(n <= static_cast<size_t>(4096)); + return reinterpret_cast<char*>(addr_) + kPageSize - n; + } + + std::string name_; + int fd_; + TempPath directory_; + int directory_fd_; + void* addr_; + char* bad_buffer_; +}; + +// We do both "big" and "small" tests to try to hit the "zero copy" and +// non-"zero copy" paths, which have different code paths for handling faults. + +TEST_F(PartialBadBufferTest, ReadBig) { + EXPECT_THAT(RetryEINTR(read)(fd_, bad_buffer_, kPageSize), + SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, ReadSmall) { + EXPECT_THAT(RetryEINTR(read)(fd_, bad_buffer_, 10), + SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, PreadBig) { + EXPECT_THAT(RetryEINTR(pread)(fd_, bad_buffer_, kPageSize, 0), + SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, PreadSmall) { + EXPECT_THAT(RetryEINTR(pread)(fd_, bad_buffer_, 10, 0), + SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, ReadvBig) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = kPageSize; + + EXPECT_THAT(RetryEINTR(readv)(fd_, &vec, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, ReadvSmall) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = 10; + + EXPECT_THAT(RetryEINTR(readv)(fd_, &vec, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, PreadvBig) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = kPageSize; + + EXPECT_THAT(RetryEINTR(preadv)(fd_, &vec, 1, 0), SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, PreadvSmall) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = 10; + + EXPECT_THAT(RetryEINTR(preadv)(fd_, &vec, 1, 0), SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, WriteBig) { + off_t orig_size = Size(); + int n; + + ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT( + (n = RetryEINTR(write)(fd_, bad_buffer_, kPageSize)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); +} + +TEST_F(PartialBadBufferTest, WriteSmall) { + off_t orig_size = Size(); + int n; + + ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT( + (n = RetryEINTR(write)(fd_, bad_buffer_, 10)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); +} + +TEST_F(PartialBadBufferTest, PwriteBig) { + off_t orig_size = Size(); + int n; + + EXPECT_THAT( + (n = RetryEINTR(pwrite)(fd_, bad_buffer_, kPageSize, orig_size)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); +} + +TEST_F(PartialBadBufferTest, PwriteSmall) { + off_t orig_size = Size(); + int n; + + EXPECT_THAT( + (n = RetryEINTR(pwrite)(fd_, bad_buffer_, 10, orig_size)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); +} + +TEST_F(PartialBadBufferTest, WritevBig) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = kPageSize; + off_t orig_size = Size(); + int n; + + ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT( + (n = RetryEINTR(writev)(fd_, &vec, 1)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); +} + +TEST_F(PartialBadBufferTest, WritevSmall) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = 10; + off_t orig_size = Size(); + int n; + + ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT( + (n = RetryEINTR(writev)(fd_, &vec, 1)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); +} + +TEST_F(PartialBadBufferTest, PwritevBig) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = kPageSize; + off_t orig_size = Size(); + int n; + + EXPECT_THAT( + (n = RetryEINTR(pwritev)(fd_, &vec, 1, orig_size)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); +} + +TEST_F(PartialBadBufferTest, PwritevSmall) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = 10; + off_t orig_size = Size(); + int n; + + EXPECT_THAT( + (n = RetryEINTR(pwritev)(fd_, &vec, 1, orig_size)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); +} + +// getdents returns EFAULT when the you claim the buffer is large enough, but +// it actually isn't. +TEST_F(PartialBadBufferTest, GetdentsBig) { + EXPECT_THAT(RetryEINTR(syscall)(SYS_getdents64, directory_fd_, bad_buffer_, + kPageSize), + SyscallFailsWithErrno(EFAULT)); +} + +// getdents returns EINVAL when the you claim the buffer is too small. +TEST_F(PartialBadBufferTest, GetdentsSmall) { + EXPECT_THAT( + RetryEINTR(syscall)(SYS_getdents64, directory_fd_, bad_buffer_, 10), + SyscallFailsWithErrno(EINVAL)); +} + +// getdents will write entries into a buffer if there is space before it faults. +TEST_F(PartialBadBufferTest, GetdentsOneEntry) { + // 30 bytes is enough for one (small) entry. + char* buf = FreeBytes(30); + + EXPECT_THAT( + RetryEINTR(syscall)(SYS_getdents64, directory_fd_, buf, kPageSize), + SyscallSucceedsWithValue(Gt(0))); +} + +PosixErrorOr<sockaddr_storage> InetLoopbackAddr(int family) { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + addr.ss_family = family; + switch (family) { + case AF_INET: + reinterpret_cast<struct sockaddr_in*>(&addr)->sin_addr.s_addr = + htonl(INADDR_LOOPBACK); + break; + case AF_INET6: + reinterpret_cast<struct sockaddr_in6*>(&addr)->sin6_addr = + in6addr_loopback; + break; + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } + return addr; +} + +// SendMsgTCP verifies that calling sendmsg with a bad address returns an +// EFAULT. It also verifies that passing a buffer which is made up of 2 +// pages one valid and one guard page succeeds as long as the write is +// for exactly the size of 1 page. +TEST_F(PartialBadBufferTest, SendMsgTCP) { + auto listen_socket = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(AF_INET)); + socklen_t addrlen = sizeof(addr); + + // Bind to some port then start listening. + ASSERT_THAT(bind(listen_socket.get(), + reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(listen(listen_socket.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the address we're listening on, then connect to it. We need to do this + // because we're allowing the stack to pick a port for us. + ASSERT_THAT(getsockname(listen_socket.get(), + reinterpret_cast<struct sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + + auto send_socket = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)); + + ASSERT_THAT( + RetryEINTR(connect)(send_socket.get(), + reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + + // Accept the connection. + auto recv_socket = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_socket.get(), nullptr, nullptr)); + + // TODO(gvisor.dev/issue/674): Update this once Netstack matches linux + // behaviour on a setsockopt of SO_RCVBUF/SO_SNDBUF. + // + // Set SO_SNDBUF for socket to exactly kPageSize+1. + // + // gVisor does not double the value passed in SO_SNDBUF like linux does so we + // just increase it by 1 byte here for gVisor so that we can test writing 1 + // byte past the valid page and check that it triggers an EFAULT + // correctly. Otherwise in gVisor the sendmsg call will just return with no + // error with kPageSize bytes written successfully. + const uint32_t buf_size = kPageSize + 1; + ASSERT_THAT(setsockopt(send_socket.get(), SOL_SOCKET, SO_SNDBUF, &buf_size, + sizeof(buf_size)), + SyscallSucceedsWithValue(0)); + + struct msghdr hdr = {}; + struct iovec iov = {}; + iov.iov_base = bad_buffer_; + iov.iov_len = kPageSize; + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0), + SyscallFailsWithErrno(EFAULT)); + + // Now assert that writing kPageSize from addr_ succeeds. + iov.iov_base = addr_; + ASSERT_THAT(RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0), + SyscallSucceedsWithValue(kPageSize)); + // Read all the data out so that we drain the socket SND_BUF on the sender. + std::vector<char> buffer(kPageSize); + ASSERT_THAT(RetryEINTR(read)(recv_socket.get(), buffer.data(), kPageSize), + SyscallSucceedsWithValue(kPageSize)); + + // Sleep for a shortwhile to ensure that we have time to process the + // ACKs. This is not strictly required unless running under gotsan which is a + // lot slower and can result in the next write to write only 1 byte instead of + // our intended kPageSize + 1. + absl::SleepFor(absl::Milliseconds(50)); + + // Now assert that writing > kPageSize results in EFAULT. + iov.iov_len = kPageSize + 1; + ASSERT_THAT(RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0), + SyscallFailsWithErrno(EFAULT)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pause.cc b/test/syscalls/linux/pause.cc new file mode 100644 index 000000000..8c05efd6f --- /dev/null +++ b/test/syscalls/linux/pause.cc @@ -0,0 +1,88 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <signal.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <atomic> + +#include "gtest/gtest.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void NoopSignalHandler(int sig, siginfo_t* info, void* context) {} + +} // namespace + +TEST(PauseTest, OnlyReturnsWhenSignalHandled) { + struct sigaction sa; + sigfillset(&sa.sa_mask); + + // Ensure that SIGUSR1 is ignored. + sa.sa_handler = SIG_IGN; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + // Register a handler for SIGUSR2. + sa.sa_sigaction = NoopSignalHandler; + sa.sa_flags = SA_SIGINFO; + ASSERT_THAT(sigaction(SIGUSR2, &sa, nullptr), SyscallSucceeds()); + + // The child sets their own tid. + absl::Mutex mu; + pid_t child_tid = 0; + bool child_tid_available = false; + std::atomic<int> sent_signal{0}; + std::atomic<int> waking_signal{0}; + ScopedThread t([&] { + mu.Lock(); + child_tid = gettid(); + child_tid_available = true; + mu.Unlock(); + EXPECT_THAT(pause(), SyscallFailsWithErrno(EINTR)); + waking_signal.store(sent_signal.load()); + }); + mu.Lock(); + mu.Await(absl::Condition(&child_tid_available)); + mu.Unlock(); + + // Wait a bit to let the child enter pause(). + absl::SleepFor(absl::Seconds(3)); + + // The child should not be woken by SIGUSR1. + sent_signal.store(SIGUSR1); + ASSERT_THAT(tgkill(getpid(), child_tid, SIGUSR1), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(3)); + + // The child should be woken by SIGUSR2. + sent_signal.store(SIGUSR2); + ASSERT_THAT(tgkill(getpid(), child_tid, SIGUSR2), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(3)); + + EXPECT_EQ(SIGUSR2, waking_signal.load()); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ping_socket.cc b/test/syscalls/linux/ping_socket.cc new file mode 100644 index 000000000..a9bfdb37b --- /dev/null +++ b/test/syscalls/linux/ping_socket.cc @@ -0,0 +1,91 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/save_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class PingSocket : public ::testing::Test { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // The loopback address. + struct sockaddr_in addr_; +}; + +void PingSocket::SetUp() { + // On some hosts ping sockets are restricted to specific groups using the + // sysctl "ping_group_range". + int s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); + if (s < 0 && errno == EPERM) { + GTEST_SKIP(); + } + close(s); + + addr_ = {}; + // Just a random port as the destination port number is irrelevant for ping + // sockets. + addr_.sin_port = 12345; + addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr_.sin_family = AF_INET; +} + +void PingSocket::TearDown() {} + +// Test ICMP port exhaustion returns EAGAIN. +// +// We disable both random/cooperative S/R for this test as it makes way too many +// syscalls. +TEST_F(PingSocket, ICMPPortExhaustion_NoRandomSave) { + DisableSave ds; + std::vector<FileDescriptor> sockets; + constexpr int kSockets = 65536; + addr_.sin_port = 0; + for (int i = 0; i < kSockets; i++) { + auto s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)); + int ret = connect(s.get(), reinterpret_cast<struct sockaddr*>(&addr_), + sizeof(addr_)); + if (ret == 0) { + sockets.push_back(std::move(s)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN)); + break; + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc new file mode 100644 index 000000000..34291850d --- /dev/null +++ b/test/syscalls/linux/pipe.cc @@ -0,0 +1,670 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> /* Obtain O_* constant definitions */ +#include <sys/ioctl.h> +#include <sys/uio.h> +#include <unistd.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Used as a non-zero sentinel value, below. +constexpr int kTestValue = 0x12345678; + +// Used for synchronization in race tests. +const absl::Duration syncDelay = absl::Seconds(2); + +struct PipeCreator { + std::string name_; + + // void (fds, is_blocking, is_namedpipe). + std::function<void(int[2], bool*, bool*)> create_; +}; + +class PipeTest : public ::testing::TestWithParam<PipeCreator> { + public: + static void SetUpTestSuite() { + // Tests intentionally generate SIGPIPE. + TEST_PCHECK(signal(SIGPIPE, SIG_IGN) != SIG_ERR); + } + + // Initializes rfd_ and wfd_ as a blocking pipe. + // + // The return value indicates success: the test should be skipped otherwise. + bool CreateBlocking() { return create(true); } + + // Initializes rfd_ and wfd_ as a non-blocking pipe. + // + // The return value is per CreateBlocking. + bool CreateNonBlocking() { return create(false); } + + // Returns true iff the pipe represents a named pipe. + bool IsNamedPipe() const { return named_pipe_; } + + int Size() const { + int s1 = fcntl(rfd_.get(), F_GETPIPE_SZ); + int s2 = fcntl(wfd_.get(), F_GETPIPE_SZ); + EXPECT_GT(s1, 0); + EXPECT_GT(s2, 0); + EXPECT_EQ(s1, s2); + return s1; + } + + static void TearDownTestSuite() { + TEST_PCHECK(signal(SIGPIPE, SIG_DFL) != SIG_ERR); + } + + private: + bool create(bool wants_blocking) { + // Generate the pipe. + int fds[2] = {-1, -1}; + bool is_blocking = false; + GetParam().create_(fds, &is_blocking, &named_pipe_); + if (fds[0] < 0 || fds[1] < 0) { + return false; + } + + // Save descriptors. + rfd_.reset(fds[0]); + wfd_.reset(fds[1]); + + // Adjust blocking, if needed. + if (!is_blocking && wants_blocking) { + // Clear the blocking flag. + EXPECT_THAT(fcntl(fds[0], F_SETFL, 0), SyscallSucceeds()); + EXPECT_THAT(fcntl(fds[1], F_SETFL, 0), SyscallSucceeds()); + } else if (is_blocking && !wants_blocking) { + // Set the descriptors to blocking. + EXPECT_THAT(fcntl(fds[0], F_SETFL, O_NONBLOCK), SyscallSucceeds()); + EXPECT_THAT(fcntl(fds[1], F_SETFL, O_NONBLOCK), SyscallSucceeds()); + } + + return true; + } + + protected: + FileDescriptor rfd_; + FileDescriptor wfd_; + + private: + bool named_pipe_ = false; +}; + +TEST_P(PipeTest, Inode) { + SKIP_IF(!CreateBlocking()); + + // Ensure that the inode number is the same for each end. + struct stat rst; + ASSERT_THAT(fstat(rfd_.get(), &rst), SyscallSucceeds()); + struct stat wst; + ASSERT_THAT(fstat(wfd_.get(), &wst), SyscallSucceeds()); + EXPECT_EQ(rst.st_ino, wst.st_ino); +} + +TEST_P(PipeTest, Permissions) { + SKIP_IF(!CreateBlocking()); + + // Attempt bad operations. + int buf = kTestValue; + ASSERT_THAT(write(rfd_.get(), &buf, sizeof(buf)), + SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(read(wfd_.get(), &buf, sizeof(buf)), + SyscallFailsWithErrno(EBADF)); +} + +TEST_P(PipeTest, Flags) { + SKIP_IF(!CreateBlocking()); + + if (IsNamedPipe()) { + // May be stubbed to zero; define locally. + EXPECT_THAT(fcntl(rfd_.get(), F_GETFL), + SyscallSucceedsWithValue(kOLargeFile | O_RDONLY)); + EXPECT_THAT(fcntl(wfd_.get(), F_GETFL), + SyscallSucceedsWithValue(kOLargeFile | O_WRONLY)); + } else { + EXPECT_THAT(fcntl(rfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_RDONLY)); + EXPECT_THAT(fcntl(wfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_WRONLY)); + } +} + +TEST_P(PipeTest, Write) { + SKIP_IF(!CreateBlocking()); + + int wbuf = kTestValue; + int rbuf = ~kTestValue; + ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)), + SyscallSucceedsWithValue(sizeof(wbuf))); + ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(wbuf, rbuf); +} + +TEST_P(PipeTest, WritePage) { + SKIP_IF(!CreateBlocking()); + + std::vector<char> wbuf(kPageSize); + RandomizeBuffer(wbuf.data(), wbuf.size()); + std::vector<char> rbuf(wbuf.size()); + + ASSERT_THAT(write(wfd_.get(), wbuf.data(), wbuf.size()), + SyscallSucceedsWithValue(wbuf.size())); + ASSERT_THAT(read(rfd_.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(rbuf.size())); + EXPECT_EQ(memcmp(rbuf.data(), wbuf.data(), wbuf.size()), 0); +} + +TEST_P(PipeTest, NonBlocking) { + SKIP_IF(!CreateNonBlocking()); + + int wbuf = kTestValue; + int rbuf = ~kTestValue; + EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), + SyscallFailsWithErrno(EWOULDBLOCK)); + ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)), + SyscallSucceedsWithValue(sizeof(wbuf))); + + ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(wbuf, rbuf); + EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST(Pipe2Test, CloExec) { + int fds[2]; + ASSERT_THAT(pipe2(fds, O_CLOEXEC), SyscallSucceeds()); + EXPECT_THAT(fcntl(fds[0], F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); + EXPECT_THAT(fcntl(fds[1], F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); + EXPECT_THAT(close(fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST(Pipe2Test, BadOptions) { + int fds[2]; + EXPECT_THAT(pipe2(fds, 0xDEAD), SyscallFailsWithErrno(EINVAL)); +} + +// Tests that opening named pipes with O_TRUNC shouldn't cause an error, but +// calls to (f)truncate should. +TEST(NamedPipeTest, Truncate) { + const std::string tmp_path = NewTempAbsPath(); + SKIP_IF(mkfifo(tmp_path.c_str(), 0644) != 0); + + ASSERT_THAT(open(tmp_path.c_str(), O_NONBLOCK | O_RDONLY), SyscallSucceeds()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(tmp_path.c_str(), O_RDWR | O_NONBLOCK | O_TRUNC)); + + ASSERT_THAT(truncate(tmp_path.c_str(), 0), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(PipeTest, Seek) { + SKIP_IF(!CreateBlocking()); + + for (int i = 0; i < 4; i++) { + // Attempt absolute seeks. + EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(rfd_.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + + // Attempt relative seeks. + EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(rfd_.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + + // Attempt end-of-file seeks. + EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(rfd_.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(lseek(wfd_.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE)); + + // Add some more data to the pipe. + int buf = kTestValue; + ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + } +} + +TEST_P(PipeTest, OffsetCalls) { + SKIP_IF(!CreateBlocking()); + + int buf; + EXPECT_THAT(pread(wfd_.get(), &buf, sizeof(buf), 0), + SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(pwrite(rfd_.get(), &buf, sizeof(buf), 0), + SyscallFailsWithErrno(ESPIPE)); + + struct iovec iov; + iov.iov_base = &buf; + iov.iov_len = sizeof(buf); + EXPECT_THAT(preadv(wfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(pwritev(rfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); +} + +TEST_P(PipeTest, WriterSideCloses) { + SKIP_IF(!CreateBlocking()); + + ScopedThread t([this]() { + int buf = ~kTestValue; + ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + EXPECT_EQ(buf, kTestValue); + // This will return when the close() completes. + ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), SyscallSucceeds()); + // This will return straight away. + ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); + }); + + // Sleep a bit so the thread can block. + absl::SleepFor(syncDelay); + + // Write to unblock. + int buf = kTestValue; + ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Sleep a bit so the thread can block again. + absl::SleepFor(syncDelay); + + // Allow the thread to complete. + ASSERT_THAT(close(wfd_.release()), SyscallSucceeds()); + t.Join(); +} + +TEST_P(PipeTest, WriterSideClosesReadDataFirst) { + SKIP_IF(!CreateBlocking()); + + int wbuf = kTestValue; + ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)), + SyscallSucceedsWithValue(sizeof(wbuf))); + ASSERT_THAT(close(wfd_.release()), SyscallSucceeds()); + + int rbuf; + ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(wbuf, rbuf); + EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(PipeTest, ReaderSideCloses) { + SKIP_IF(!CreateBlocking()); + + ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); + int buf = kTestValue; + EXPECT_THAT(write(wfd_.get(), &buf, sizeof(buf)), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(PipeTest, CloseTwice) { + SKIP_IF(!CreateBlocking()); + + int reader = rfd_.release(); + int writer = wfd_.release(); + ASSERT_THAT(close(reader), SyscallSucceeds()); + ASSERT_THAT(close(writer), SyscallSucceeds()); + EXPECT_THAT(close(reader), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(close(writer), SyscallFailsWithErrno(EBADF)); +} + +// Blocking write returns EPIPE when read end is closed if nothing has been +// written. +TEST_P(PipeTest, BlockWriteClosed) { + SKIP_IF(!CreateBlocking()); + + absl::Notification notify; + ScopedThread t([this, ¬ify]() { + std::vector<char> buf(Size()); + // Exactly fill the pipe buffer. + ASSERT_THAT(WriteFd(wfd_.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + notify.Notify(); + + // Attempt to write one more byte. Blocks. + // N.B. Don't use WriteFd, we don't want a retry. + EXPECT_THAT(write(wfd_.get(), buf.data(), 1), SyscallFailsWithErrno(EPIPE)); + }); + + notify.WaitForNotification(); + ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); + t.Join(); +} + +// Blocking write returns EPIPE when read end is closed even if something has +// been written. +TEST_P(PipeTest, BlockPartialWriteClosed) { + SKIP_IF(!CreateBlocking()); + + ScopedThread t([this]() { + const int pipe_size = Size(); + std::vector<char> buf(2 * pipe_size); + + // Write more than fits in the buffer. Blocks then returns partial write + // when the other end is closed. The next call returns EPIPE. + ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(pipe_size)); + EXPECT_THAT(write(wfd_.get(), buf.data(), buf.size()), + SyscallFailsWithErrno(EPIPE)); + }); + + // Leave time for write to become blocked. + absl::SleepFor(syncDelay); + + // Unblock the above. + ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); + t.Join(); +} + +TEST_P(PipeTest, ReadFromClosedFd_NoRandomSave) { + SKIP_IF(!CreateBlocking()); + + absl::Notification notify; + ScopedThread t([this, ¬ify]() { + notify.Notify(); + int buf; + ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_EQ(kTestValue, buf); + }); + notify.WaitForNotification(); + + // Make sure that the thread gets to read(). + absl::SleepFor(syncDelay); + + { + // We cannot save/restore here as the read end of pipe is closed but there + // is ongoing read() above. We will not be able to restart the read() + // successfully in restore run since the read fd is closed. + const DisableSave ds; + ASSERT_THAT(close(rfd_.release()), SyscallSucceeds()); + int buf = kTestValue; + ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + t.Join(); + } +} + +TEST_P(PipeTest, FionRead) { + SKIP_IF(!CreateBlocking()); + + int n; + ASSERT_THAT(ioctl(rfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + ASSERT_THAT(ioctl(wfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + std::vector<char> buf(Size()); + ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + EXPECT_THAT(ioctl(rfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, buf.size()); + EXPECT_THAT(ioctl(wfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, buf.size()); +} + +// Test that opening an empty anonymous pipe RDONLY via /proc/self/fd/N does not +// block waiting for a writer. +TEST_P(PipeTest, OpenViaProcSelfFD) { + SKIP_IF(!CreateBlocking()); + SKIP_IF(IsNamedPipe()); + + // Close the write end of the pipe. + ASSERT_THAT(close(wfd_.release()), SyscallSucceeds()); + + // Open other side via /proc/self/fd. It should not block. + FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(absl::StrCat("/proc/self/fd/", rfd_.get()), O_RDONLY)); +} + +// Test that opening and reading from an anonymous pipe (with existing writes) +// RDONLY via /proc/self/fd/N returns the existing data. +TEST_P(PipeTest, OpenViaProcSelfFDWithWrites) { + SKIP_IF(!CreateBlocking()); + SKIP_IF(IsNamedPipe()); + + // Write to the pipe and then close the write fd. + int wbuf = kTestValue; + ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)), + SyscallSucceedsWithValue(sizeof(wbuf))); + ASSERT_THAT(close(wfd_.release()), SyscallSucceeds()); + + // Open read side via /proc/self/fd, and read from it. + FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(absl::StrCat("/proc/self/fd/", rfd_.get()), O_RDONLY)); + int rbuf; + ASSERT_THAT(read(proc_self_fd.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(wbuf, rbuf); +} + +// Test that accesses of /proc/<PID>/fd correctly decrement the refcount. +TEST_P(PipeTest, ProcFDReleasesFile) { + SKIP_IF(!CreateBlocking()); + + // Stat the pipe FD, which shouldn't alter the refcount. + struct stat wst; + ASSERT_THAT(lstat(absl::StrCat("/proc/self/fd/", wfd_.get()).c_str(), &wst), + SyscallSucceeds()); + + // Close the write end and ensure that read indicates EOF. + wfd_.reset(); + char buf; + ASSERT_THAT(read(rfd_.get(), &buf, 1), SyscallSucceedsWithValue(0)); +} + +// Same for /proc/<PID>/fdinfo. +TEST_P(PipeTest, ProcFDInfoReleasesFile) { + SKIP_IF(!CreateBlocking()); + + // Stat the pipe FD, which shouldn't alter the refcount. + struct stat wst; + ASSERT_THAT( + lstat(absl::StrCat("/proc/self/fdinfo/", wfd_.get()).c_str(), &wst), + SyscallSucceeds()); + + // Close the write end and ensure that read indicates EOF. + wfd_.reset(); + char buf; + ASSERT_THAT(read(rfd_.get(), &buf, 1), SyscallSucceedsWithValue(0)); +} + +TEST_P(PipeTest, SizeChange) { + SKIP_IF(!CreateBlocking()); + + // Set the minimum possible size. + ASSERT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, 0), SyscallSucceeds()); + int min = Size(); + EXPECT_GT(min, 0); // Should be rounded up. + + // Set from the read end. + ASSERT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, min + 1), SyscallSucceeds()); + int med = Size(); + EXPECT_GT(med, min); // Should have grown, may be rounded. + + // Set from the write end. + ASSERT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, med + 1), SyscallSucceeds()); + int max = Size(); + EXPECT_GT(max, med); // Ditto. +} + +TEST_P(PipeTest, SizeChangeMax) { + SKIP_IF(!CreateBlocking()); + + // Assert there's some maximum. + EXPECT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, 0x7fffffffffffffff), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, 0x7fffffffffffffff), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(PipeTest, SizeChangeFull) { + SKIP_IF(!CreateBlocking()); + + // Ensure that we adjust to a large enough size to avoid rounding when we + // perform the size decrease. If rounding occurs, we may not actually + // adjust the size and the call below will return success. It was found via + // experimentation that this granularity avoids the rounding for Linux. + constexpr int kDelta = 64 * 1024; + ASSERT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, Size() + kDelta), + SyscallSucceeds()); + + // Fill the buffer and try to change down. + std::vector<char> buf(Size()); + ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + EXPECT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, Size() - kDelta), + SyscallFailsWithErrno(EBUSY)); +} + +TEST_P(PipeTest, Streaming) { + SKIP_IF(!CreateBlocking()); + + // We make too many calls to go through full save cycles. + DisableSave ds; + + // Size() requires 2 syscalls, call it once and remember the value. + const int pipe_size = Size(); + + absl::Notification notify; + ScopedThread t([this, ¬ify, pipe_size]() { + // Don't start until it's full. + notify.WaitForNotification(); + for (int i = 0; i < pipe_size; i++) { + int rbuf; + ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(rbuf, i); + } + }); + + // Write 4 bytes * pipe_size. It will fill up the pipe once, notify the reader + // to start. Then we write pipe size worth 3 more times to ensure the reader + // can follow along. + ssize_t total = 0; + for (int i = 0; i < pipe_size; i++) { + ssize_t written = write(wfd_.get(), &i, sizeof(i)); + ASSERT_THAT(written, SyscallSucceedsWithValue(sizeof(i))); + total += written; + + // Is the next write about to fill up the buffer? Wake up the reader once. + if (total < pipe_size && (total + written) >= pipe_size) { + notify.Notify(); + } + } +} + +std::string PipeCreatorName(::testing::TestParamInfo<PipeCreator> info) { + return info.param.name_; // Use the name specified. +} + +INSTANTIATE_TEST_SUITE_P( + Pipes, PipeTest, + ::testing::Values( + PipeCreator{ + "pipe", + [](int fds[2], bool* is_blocking, bool* is_namedpipe) { + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + *is_blocking = true; + *is_namedpipe = false; + }, + }, + PipeCreator{ + "pipe2blocking", + [](int fds[2], bool* is_blocking, bool* is_namedpipe) { + ASSERT_THAT(pipe2(fds, 0), SyscallSucceeds()); + *is_blocking = true; + *is_namedpipe = false; + }, + }, + PipeCreator{ + "pipe2nonblocking", + [](int fds[2], bool* is_blocking, bool* is_namedpipe) { + ASSERT_THAT(pipe2(fds, O_NONBLOCK), SyscallSucceeds()); + *is_blocking = false; + *is_namedpipe = false; + }, + }, + PipeCreator{ + "smallbuffer", + [](int fds[2], bool* is_blocking, bool* is_namedpipe) { + // Set to the minimum available size (will round up). + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + ASSERT_THAT(fcntl(fds[0], F_SETPIPE_SZ, 0), SyscallSucceeds()); + *is_blocking = true; + *is_namedpipe = false; + }, + }, + PipeCreator{ + "namednonblocking", + [](int fds[2], bool* is_blocking, bool* is_namedpipe) { + // Create a new file-based pipe (non-blocking). + std::string path; + { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + path = file.path(); + } + SKIP_IF(mkfifo(path.c_str(), 0644) != 0); + fds[0] = open(path.c_str(), O_NONBLOCK | O_RDONLY); + fds[1] = open(path.c_str(), O_NONBLOCK | O_WRONLY); + MaybeSave(); + *is_blocking = false; + *is_namedpipe = true; + }, + }, + PipeCreator{ + "namedblocking", + [](int fds[2], bool* is_blocking, bool* is_namedpipe) { + // Create a new file-based pipe (blocking). + std::string path; + { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + path = file.path(); + } + SKIP_IF(mkfifo(path.c_str(), 0644) != 0); + ScopedThread t( + [&path, &fds]() { fds[1] = open(path.c_str(), O_WRONLY); }); + fds[0] = open(path.c_str(), O_RDONLY); + t.Join(); + MaybeSave(); + *is_blocking = true; + *is_namedpipe = true; + }, + }), + PipeCreatorName); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/poll.cc b/test/syscalls/linux/poll.cc new file mode 100644 index 000000000..7a316427d --- /dev/null +++ b/test/syscalls/linux/poll.cc @@ -0,0 +1,294 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <poll.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include <algorithm> +#include <iostream> + +#include "gtest/gtest.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/base_poll_test.h" +#include "test/util/eventfd_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class PollTest : public BasePollTest { + protected: + void SetUp() override { BasePollTest::SetUp(); } + void TearDown() override { BasePollTest::TearDown(); } +}; + +TEST_F(PollTest, InvalidFds) { + // fds is invalid because it's null, but we tell ppoll the length is non-zero. + EXPECT_THAT(poll(nullptr, 1, 1), SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT(poll(nullptr, -1, 1), SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(PollTest, NullFds) { + EXPECT_THAT(poll(nullptr, 0, 10), SyscallSucceeds()); +} + +TEST_F(PollTest, ZeroTimeout) { + EXPECT_THAT(poll(nullptr, 0, 0), SyscallSucceeds()); +} + +// If random S/R interrupts the poll, SIGALRM may be delivered before poll +// restarts, causing the poll to hang forever. +TEST_F(PollTest, NegativeTimeout_NoRandomSave) { + // Negative timeout mean wait forever so set a timer. + SetTimer(absl::Milliseconds(100)); + EXPECT_THAT(poll(nullptr, 0, -1), SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); +} + +TEST_F(PollTest, NonBlockingEventPOLLIN) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Write some data to the pipe. + char s[] = "foo\n"; + ASSERT_THAT(WriteFd(fd1.get(), s, strlen(s) + 1), SyscallSucceeds()); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {fd0.get(), POLLIN, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1)); + + // Should trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); +} + +TEST_F(PollTest, BlockingEventPOLLIN) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Start a blocking poll on the read fd. + absl::Notification notify; + ScopedThread t([&fd0, ¬ify]() { + notify.Notify(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {fd0.get(), POLLIN, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, -1), SyscallSucceedsWithValue(1)); + + // Should trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); + }); + + notify.WaitForNotification(); + absl::SleepFor(absl::Seconds(1.0)); + + // Write some data to the pipe. + char s[] = "foo\n"; + ASSERT_THAT(WriteFd(fd1.get(), s, strlen(s) + 1), SyscallSucceeds()); +} + +TEST_F(PollTest, NonBlockingEventPOLLHUP) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Close the writer fd. + fd1.reset(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {fd0.get(), POLLIN, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1)); + + // Should trigger POLLHUP event. + EXPECT_EQ(poll_fd.revents & POLLHUP, POLLHUP); + + // Should not trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, 0); +} + +TEST_F(PollTest, BlockingEventPOLLHUP) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Start a blocking poll on the read fd. + absl::Notification notify; + ScopedThread t([&fd0, ¬ify]() { + notify.Notify(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {fd0.get(), POLLIN, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, -1), SyscallSucceedsWithValue(1)); + + // Should trigger POLLHUP event. + EXPECT_EQ(poll_fd.revents & POLLHUP, POLLHUP); + + // Should not trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, 0); + }); + + notify.WaitForNotification(); + absl::SleepFor(absl::Seconds(1.0)); + + // Write some data and close the writer fd. + fd1.reset(); +} + +TEST_F(PollTest, NonBlockingEventPOLLERR) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Close the reader fd. + fd0.reset(); + + // Poll on the writer fd with POLLOUT event. + struct pollfd poll_fd = {fd1.get(), POLLOUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1)); + + // Should trigger POLLERR event. + EXPECT_EQ(poll_fd.revents & POLLERR, POLLERR); + + // Should also trigger POLLOUT event. + EXPECT_EQ(poll_fd.revents & POLLOUT, POLLOUT); +} + +// This test will validate that if an FD is already ready on some event, whether +// it's POLLIN or POLLOUT it will not immediately return unless that's actually +// what the caller was interested in. +TEST_F(PollTest, ImmediatelyReturnOnlyOnPollEvents) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Wait for read related event on the write side of the pipe, since a write + // is possible on fds[1] it would mean that POLLOUT would return immediately. + // We should make sure that we're not woken up with that state that we didn't + // specificially request. + constexpr int kTimeoutMs = 100; + struct pollfd poll_fd = {fd1.get(), POLLIN | POLLPRI | POLLRDHUP, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMs), + SyscallSucceedsWithValue(0)); // We should timeout. + EXPECT_EQ(poll_fd.revents, 0); // Nothing should be in returned events. + + // Now let's poll on POLLOUT and we should get back 1 fd as being ready and + // it should contain POLLOUT in the revents. + poll_fd.events = POLLOUT; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMs), + SyscallSucceedsWithValue(1)); // 1 fd should have an event. + EXPECT_EQ(poll_fd.revents, POLLOUT); // POLLOUT should be in revents. +} + +// This test validates that poll(2) while data is available immediately returns. +TEST_F(PollTest, PollLevelTriggered) { + int fds[2] = {}; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, /*protocol=*/0, fds), + SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Write two bytes to the socket. + const char* kBuf = "aa"; + ASSERT_THAT(RetryEINTR(send)(fd0.get(), kBuf, /*len=*/2, /*flags=*/0), + SyscallSucceedsWithValue(2)); // 2 bytes should be written. + + // Poll(2) should immediately return as there is data available to read. + constexpr int kInfiniteTimeout = -1; + struct pollfd poll_fd = {fd1.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, /*nfds=*/1, kInfiniteTimeout), + SyscallSucceedsWithValue(1)); // 1 fd should be ready to read. + EXPECT_NE(poll_fd.revents & POLLIN, 0); + + // Read a single byte. + char read_byte = 0; + ASSERT_THAT(RetryEINTR(recv)(fd1.get(), &read_byte, /*len=*/1, /*flags=*/0), + SyscallSucceedsWithValue(1)); // 1 byte should be read. + ASSERT_EQ(read_byte, 'a'); // We should have read a single 'a'. + + // Create a separate pollfd for our second poll. + struct pollfd poll_fd_after = {fd1.get(), POLLIN, 0}; + + // Poll(2) should again immediately return since we only read one byte. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd_after, /*nfds=*/1, kInfiniteTimeout), + SyscallSucceedsWithValue(1)); // 1 fd should be ready to read. + EXPECT_NE(poll_fd_after.revents & POLLIN, 0); +} + +TEST_F(PollTest, Nfds) { + // Stash value of RLIMIT_NOFILES. + struct rlimit rlim; + TEST_PCHECK(getrlimit(RLIMIT_NOFILE, &rlim) == 0); + + // gVisor caps the number of FDs that epoll can use beyond RLIMIT_NOFILE. + constexpr rlim_t maxFD = 4096; + if (rlim.rlim_cur > maxFD) { + rlim.rlim_cur = maxFD; + TEST_PCHECK(setrlimit(RLIMIT_NOFILE, &rlim) == 0); + } + + rlim_t max_fds = rlim.rlim_cur; + std::cout << "Using limit: " << max_fds << std::endl; + + // Create an eventfd. Since its value is initially zero, it is writable. + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + + // Create the biggest possible pollfd array such that each element is valid. + // Each entry in the 'fds' array refers to the eventfd and polls for + // "writable" events (events=POLLOUT). This essentially guarantees that the + // poll() is a no-op and allows negative testing of the 'nfds' parameter. + std::vector<struct pollfd> fds(max_fds + 1, + {.fd = efd.get(), .events = POLLOUT}); + + // Verify that 'nfds' up to RLIMIT_NOFILE are allowed. + EXPECT_THAT(RetryEINTR(poll)(fds.data(), 1, 1), SyscallSucceedsWithValue(1)); + EXPECT_THAT(RetryEINTR(poll)(fds.data(), max_fds / 2, 1), + SyscallSucceedsWithValue(max_fds / 2)); + EXPECT_THAT(RetryEINTR(poll)(fds.data(), max_fds, 1), + SyscallSucceedsWithValue(max_fds)); + + // If 'nfds' exceeds RLIMIT_NOFILE then it must fail with EINVAL. + EXPECT_THAT(poll(fds.data(), max_fds + 1, 1), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ppoll.cc b/test/syscalls/linux/ppoll.cc new file mode 100644 index 000000000..8245a11e8 --- /dev/null +++ b/test/syscalls/linux/ppoll.cc @@ -0,0 +1,155 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <poll.h> +#include <signal.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/base_poll_test.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +// Linux and glibc have a different idea of the sizeof sigset_t. When calling +// the syscall directly, use what the kernel expects. +unsigned kSigsetSize = SIGRTMAX / 8; + +// Linux ppoll(2) differs from the glibc wrapper function in that Linux updates +// the timeout with the amount of time remaining. In order to test this behavior +// we need to use the syscall directly. +int syscallPpoll(struct pollfd* fds, nfds_t nfds, struct timespec* timeout_ts, + const sigset_t* sigmask, unsigned mask_size) { + return syscall(SYS_ppoll, fds, nfds, timeout_ts, sigmask, mask_size); +} + +class PpollTest : public BasePollTest { + protected: + void SetUp() override { BasePollTest::SetUp(); } + void TearDown() override { BasePollTest::TearDown(); } +}; + +TEST_F(PpollTest, InvalidFds) { + // fds is invalid because it's null, but we tell ppoll the length is non-zero. + struct timespec timeout = {}; + sigset_t sigmask; + TEST_PCHECK(sigemptyset(&sigmask) == 0); + EXPECT_THAT(syscallPpoll(nullptr, 1, &timeout, &sigmask, kSigsetSize), + SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT(syscallPpoll(nullptr, -1, &timeout, &sigmask, kSigsetSize), + SyscallFailsWithErrno(EINVAL)); +} + +// See that when fds is null, ppoll behaves like sleep. +TEST_F(PpollTest, NullFds) { + struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10)); + ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +TEST_F(PpollTest, ZeroTimeout) { + struct timespec timeout = {}; + ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +// If random S/R interrupts the ppoll, SIGALRM may be delivered before ppoll +// restarts, causing the ppoll to hang forever. +TEST_F(PpollTest, NoTimeout_NoRandomSave) { + // When there's no timeout, ppoll may never return so set a timer. + SetTimer(absl::Milliseconds(100)); + // See that we get interrupted by the timer. + ASSERT_THAT(syscallPpoll(nullptr, 0, nullptr, nullptr, 0), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); +} + +TEST_F(PpollTest, InvalidTimeoutNegative) { + struct timespec timeout = absl::ToTimespec(absl::Nanoseconds(-1)); + EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(PpollTest, InvalidTimeoutNotNormalized) { + struct timespec timeout = {0, 1000000001}; + EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(PpollTest, InvalidMaskSize) { + struct timespec timeout = {}; + sigset_t sigmask; + TEST_PCHECK(sigemptyset(&sigmask) == 0); + EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, &sigmask, 128), + SyscallFailsWithErrno(EINVAL)); +} + +// Verify that signals blocked by the ppoll mask (that would otherwise be +// allowed) do not interrupt ppoll. +TEST_F(PpollTest, SignalMaskBlocksSignal) { + absl::Duration duration(absl::Seconds(30)); + struct timespec timeout = absl::ToTimespec(duration); + absl::Duration timer_duration(absl::Seconds(10)); + + // Call with a mask that blocks SIGALRM. See that ppoll is not interrupted + // (i.e. returns 0) and that upon completion, the timer has fired. + sigset_t mask; + ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + TEST_PCHECK(sigaddset(&mask, SIGALRM) == 0); + SetTimer(timer_duration); + MaybeSave(); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, &mask, kSigsetSize), + SyscallSucceeds()); + EXPECT_TRUE(TimerFired()); + EXPECT_EQ(absl::DurationFromTimespec(timeout), absl::Duration()); +} + +// Verify that signals allowed by the ppoll mask (that would otherwise be +// blocked) interrupt ppoll. +TEST_F(PpollTest, SignalMaskAllowsSignal) { + absl::Duration duration(absl::Seconds(30)); + struct timespec timeout = absl::ToTimespec(duration); + absl::Duration timer_duration(absl::Seconds(10)); + + sigset_t mask; + ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + + // Block SIGALRM. + auto cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGALRM)); + + // Call with a mask that unblocks SIGALRM. See that ppoll is interrupted. + SetTimer(timer_duration); + MaybeSave(); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, &mask, kSigsetSize), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); + EXPECT_GT(absl::DurationFromTimespec(timeout), absl::Duration()); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/prctl.cc b/test/syscalls/linux/prctl.cc new file mode 100644 index 000000000..04c5161f5 --- /dev/null +++ b/test/syscalls/linux/prctl.cc @@ -0,0 +1,230 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +ABSL_FLAG(bool, prctl_no_new_privs_test_child, false, + "If true, exit with the return value of prctl(PR_GET_NO_NEW_PRIVS) " + "plus an offset (see test source)."); + +namespace gvisor { +namespace testing { + +namespace { + +#ifndef SUID_DUMP_DISABLE +#define SUID_DUMP_DISABLE 0 +#endif /* SUID_DUMP_DISABLE */ +#ifndef SUID_DUMP_USER +#define SUID_DUMP_USER 1 +#endif /* SUID_DUMP_USER */ +#ifndef SUID_DUMP_ROOT +#define SUID_DUMP_ROOT 2 +#endif /* SUID_DUMP_ROOT */ + +TEST(PrctlTest, NameInitialized) { + const size_t name_length = 20; + char name[name_length] = {}; + ASSERT_THAT(prctl(PR_GET_NAME, name), SyscallSucceeds()); + ASSERT_NE(std::string(name), ""); +} + +TEST(PrctlTest, SetNameLongName) { + const size_t name_length = 20; + const std::string long_name(name_length, 'A'); + ASSERT_THAT(prctl(PR_SET_NAME, long_name.c_str()), SyscallSucceeds()); + char truncated_name[name_length] = {}; + ASSERT_THAT(prctl(PR_GET_NAME, truncated_name), SyscallSucceeds()); + const size_t truncated_length = 15; + ASSERT_EQ(long_name.substr(0, truncated_length), std::string(truncated_name)); +} + +TEST(PrctlTest, ChildProcessName) { + constexpr size_t kMaxNameLength = 15; + + char parent_name[kMaxNameLength + 1] = {}; + memset(parent_name, 'a', kMaxNameLength); + + ASSERT_THAT(prctl(PR_SET_NAME, parent_name), SyscallSucceeds()); + + pid_t child_pid = fork(); + TEST_PCHECK(child_pid >= 0); + if (child_pid == 0) { + char child_name[kMaxNameLength + 1] = {}; + TEST_PCHECK(prctl(PR_GET_NAME, child_name) >= 0); + TEST_CHECK(memcmp(parent_name, child_name, sizeof(parent_name)) == 0); + _exit(0); + } + + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status =" << status; +} + +// Offset added to exit code from test child to distinguish from other abnormal +// exits. +constexpr int kPrctlNoNewPrivsTestChildExitBase = 100; + +TEST(PrctlTest, NoNewPrivsPreservedAcrossCloneForkAndExecve) { + // Check if no_new_privs is already set. If it is, we can still test that it's + // preserved across clone/fork/execve, but we also expect it to still be set + // at the end of the test. Otherwise, call prctl(PR_SET_NO_NEW_PRIVS) so as + // not to contaminate the original thread. + int no_new_privs; + ASSERT_THAT(no_new_privs = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceeds()); + ScopedThread([] { + ASSERT_THAT(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), SyscallSucceeds()); + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + ScopedThread([] { + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + // Note that these ASSERT_*s failing will only return from this thread, + // but this is the intended behavior. + pid_t child_pid = -1; + int execve_errno = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", + {"/proc/self/exe", "--prctl_no_new_privs_test_child"}, {}, + nullptr, &child_pid, &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceeds()); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), kPrctlNoNewPrivsTestChildExitBase + 1); + + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + }); + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + }); + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(no_new_privs)); +} + +TEST(PrctlTest, PDeathSig) { + pid_t child_pid; + + // Make the new process' parent a separate thread since the parent death + // signal fires when the parent *thread* exits. + ScopedThread([&] { + child_pid = fork(); + TEST_CHECK(child_pid >= 0); + if (child_pid == 0) { + // In child process. + TEST_CHECK(prctl(PR_SET_PDEATHSIG, SIGKILL) >= 0); + int signo; + TEST_CHECK(prctl(PR_GET_PDEATHSIG, &signo) >= 0); + TEST_CHECK(signo == SIGKILL); + // Enable tracing, then raise SIGSTOP and expect our parent to suppress + // it. + TEST_CHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) >= 0); + raise(SIGSTOP); + // Sleep until killed by our parent death signal. sleep(3) is + // async-signal-safe, absl::SleepFor isn't. + while (true) { + sleep(10); + } + } + // In parent process. + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << "status = " << status; + + // Suppress the SIGSTOP and detach from the child. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + }); + + // The child should have been killed by its parent death SIGKILL. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << "status = " << status; +} + +// This test is to validate that calling prctl with PR_SET_MM without the +// CAP_SYS_RESOURCE returns EPERM. +TEST(PrctlTest, InvalidPrSetMM) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))) { + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_RESOURCE, + false)); // Drop capability to test below. + } + ASSERT_THAT(prctl(PR_SET_MM, 0, 0, 0, 0), SyscallFailsWithErrno(EPERM)); +} + +// Sanity check that dumpability is remembered. +TEST(PrctlTest, SetGetDumpability) { + int before; + ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); + auto cleanup = Cleanup([before] { + ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); + }); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds()); + EXPECT_THAT(prctl(PR_GET_DUMPABLE), + SyscallSucceedsWithValue(SUID_DUMP_DISABLE)); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds()); + EXPECT_THAT(prctl(PR_GET_DUMPABLE), SyscallSucceedsWithValue(SUID_DUMP_USER)); +} + +// SUID_DUMP_ROOT cannot be set via PR_SET_DUMPABLE. +TEST(PrctlTest, RootDumpability) { + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_ROOT), + SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (absl::GetFlag(FLAGS_prctl_no_new_privs_test_child)) { + exit(gvisor::testing::kPrctlNoNewPrivsTestChildExitBase + + prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0)); + } + + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/prctl_setuid.cc b/test/syscalls/linux/prctl_setuid.cc new file mode 100644 index 000000000..c4e9cf528 --- /dev/null +++ b/test/syscalls/linux/prctl_setuid.cc @@ -0,0 +1,268 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> +#include <sys/prctl.h> + +#include <string> + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "test/util/capability_util.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +ABSL_FLAG(int32_t, scratch_uid, 65534, "scratch UID"); +// This flag is used to verify that after an exec PR_GET_KEEPCAPS +// returns 0, the return code will be offset by kPrGetKeepCapsExitBase. +ABSL_FLAG(bool, prctl_pr_get_keepcaps, false, + "If true the test will verify that prctl with pr_get_keepcaps" + "returns 0. The test will exit with the result of that check."); + +// These tests exist seperately from prctl because we need to start +// them as root. Setuid() has the behavior that permissions are fully +// removed if one of the UIDs were 0 before a setuid() call. This +// behavior can be changed by using PR_SET_KEEPCAPS and that is what +// is tested here. +// +// Reference setuid(2): +// The setuid() function checks the effective user ID of +// the caller and if it is the superuser, all process-related user ID's +// are set to uid. After this has occurred, it is impossible for the +// program to regain root privileges. +// +// Thus, a set-user-ID-root program wishing to temporarily drop root +// privileges, assume the identity of an unprivileged user, and then +// regain root privileges afterward cannot use setuid(). You can +// accomplish this with seteuid(2). +namespace gvisor { +namespace testing { + +// Offset added to exit code from test child to distinguish from other abnormal +// exits. +constexpr int kPrGetKeepCapsExitBase = 100; + +namespace { + +class PrctlKeepCapsSetuidTest : public ::testing::Test { + protected: + void SetUp() override { + // PR_GET_KEEPCAPS will only return 0 or 1 (on success). + ASSERT_THAT(original_keepcaps_ = prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceeds()); + ASSERT_TRUE(original_keepcaps_ == 0 || original_keepcaps_ == 1); + } + + void TearDown() override { + // Restore PR_SET_KEEPCAPS. + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, original_keepcaps_, 0, 0, 0), + SyscallSucceeds()); + + // Verify that it was restored. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceedsWithValue(original_keepcaps_)); + } + + // The original keep caps value exposed so tests can use it if they need. + int original_keepcaps_ = 0; +}; + +// This test will verify that a bad value, eg. not 0 or 1 for +// PR_SET_KEEPCAPS will return EINVAL as required by prctl(2). +TEST_F(PrctlKeepCapsSetuidTest, PrctlBadArgsToKeepCaps) { + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 2, 0, 0, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// This test will verify that a setuid(2) without PR_SET_KEEPCAPS will cause +// all capabilities to be dropped. +TEST_F(PrctlKeepCapsSetuidTest, SetUidNoKeepCaps) { + // getuid(2) never fails. + if (getuid() != 0) { + SKIP_IF(!IsRunningOnGvisor()); + FAIL() << "User is not root on gvisor platform."; + } + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting + // this test. Otherwise, the files are created by root (UID before the + // test), but cannot be opened by the `uid` set below after the test. After + // calling setuid(non-zero-UID), there is no way to get root privileges + // back. + ScopedThread([] { + // Start by verifying we have a capability. + TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + + // Verify that PR_GET_KEEPCAPS is disabled. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceedsWithValue(0)); + + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that + // all threads have the same UIDs, so using the setuid wrapper sets all + // threads' real UID. + EXPECT_THAT(syscall(SYS_setuid, absl::GetFlag(FLAGS_scratch_uid)), + SyscallSucceeds()); + + // Verify that we changed uid. + EXPECT_THAT(getuid(), + SyscallSucceedsWithValue(absl::GetFlag(FLAGS_scratch_uid))); + + // Verify we lost the capability in the effective set, this always happens. + TEST_CHECK(!HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + + // We should have also lost it in the permitted set by the setuid() so + // SetCapability should fail when we try to add it back to the effective set + ASSERT_FALSE(SetCapability(CAP_SYS_ADMIN, true).ok()); + }); +} + +// This test will verify that a setuid with PR_SET_KEEPCAPS will cause +// capabilities to be retained after we switch away from the root user. +TEST_F(PrctlKeepCapsSetuidTest, SetUidKeepCaps) { + // getuid(2) never fails. + if (getuid() != 0) { + SKIP_IF(!IsRunningOnGvisor()); + FAIL() << "User is not root on gvisor platform."; + } + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting + // this test. Otherwise, the files are created by root (UID before the + // test), but cannot be opened by the `uid` set below after the test. After + // calling setuid(non-zero-UID), there is no way to get root privileges + // back. + ScopedThread([] { + // Start by verifying we have a capability. + TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + + // Set PR_SET_KEEPCAPS. + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds()); + + // Verify PR_SET_KEEPCAPS was set before we proceed. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that + // all threads have the same UIDs, so using the setuid wrapper sets all + // threads' real UID. + EXPECT_THAT(syscall(SYS_setuid, absl::GetFlag(FLAGS_scratch_uid)), + SyscallSucceeds()); + + // Verify that we changed uid. + EXPECT_THAT(getuid(), + SyscallSucceedsWithValue(absl::GetFlag(FLAGS_scratch_uid))); + + // Verify we lost the capability in the effective set, this always happens. + TEST_CHECK(!HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + + // We lost the capability in the effective set, but it will still + // exist in the permitted set so we can elevate the capability. + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, true)); + + // Verify we got back the capability in the effective set. + TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + }); +} + +// This test will verify that PR_SET_KEEPCAPS is not retained +// across an execve. According to prctl(2): +// "The "keep capabilities" value will be reset to 0 on subsequent +// calls to execve(2)." +TEST_F(PrctlKeepCapsSetuidTest, NoKeepCapsAfterExec) { + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds()); + + // Verify PR_SET_KEEPCAPS was set before we proceed. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), SyscallSucceedsWithValue(1)); + + pid_t child_pid = -1; + int execve_errno = 0; + // Do an exec and then verify that PR_GET_KEEPCAPS returns 0 + // see the body of main below. + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + "/proc/self/exe", {"/proc/self/exe", "--prctl_pr_get_keepcaps"}, {}, + nullptr, &child_pid, &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_TRUE(WIFEXITED(status)); + // PR_SET_KEEPCAPS should have been cleared by the exec. + // Success should return gvisor::testing::kPrGetKeepCapsExitBase + 0 + ASSERT_EQ(WEXITSTATUS(status), kPrGetKeepCapsExitBase); +} + +TEST_F(PrctlKeepCapsSetuidTest, NoKeepCapsAfterNewUserNamespace) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + + // Fork to avoid changing the user namespace of the original test process. + pid_t const child_pid = fork(); + + if (child_pid == 0) { + // Verify that the keepcaps flag is set to 0 when we change user namespaces. + TEST_PCHECK(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == 0); + MaybeSave(); + + TEST_PCHECK(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0) == 1); + MaybeSave(); + + TEST_PCHECK(unshare(CLONE_NEWUSER) == 0); + MaybeSave(); + + TEST_PCHECK(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0) == 0); + MaybeSave(); + + _exit(0); + } + + int status; + ASSERT_THAT(child_pid, SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; +} + +// This test will verify that PR_SET_KEEPCAPS and PR_GET_KEEPCAPS work correctly +TEST_F(PrctlKeepCapsSetuidTest, PrGetKeepCaps) { + // Set PR_SET_KEEPCAPS to the negation of the original. + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, !original_keepcaps_, 0, 0, 0), + SyscallSucceeds()); + + // Verify it was set. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceedsWithValue(!original_keepcaps_)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (absl::GetFlag(FLAGS_prctl_pr_get_keepcaps)) { + return gvisor::testing::kPrGetKeepCapsExitBase + + prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0); + } + + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/pread64.cc b/test/syscalls/linux/pread64.cc new file mode 100644 index 000000000..bcdbbb044 --- /dev/null +++ b/test/syscalls/linux/pread64.cc @@ -0,0 +1,167 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <linux/unistd.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class Pread64Test : public ::testing::Test { + void SetUp() override { + name_ = NewTempAbsPath(); + ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_CREAT, 0644)); + } + + void TearDown() override { unlink(name_.c_str()); } + + public: + std::string name_; +}; + +TEST(Pread64TestNoTempFile, BadFileDescriptor) { + char buf[1024]; + EXPECT_THAT(pread64(-1, buf, 1024, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(Pread64Test, ZeroBuffer) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR)); + + char msg[] = "hello world"; + EXPECT_THAT(pwrite64(fd.get(), msg, strlen(msg), 0), + SyscallSucceedsWithValue(strlen(msg))); + + char buf[10]; + EXPECT_THAT(pread64(fd.get(), buf, 0, 0), SyscallSucceedsWithValue(0)); +} + +TEST_F(Pread64Test, BadBuffer) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR)); + + char msg[] = "hello world"; + EXPECT_THAT(pwrite64(fd.get(), msg, strlen(msg), 0), + SyscallSucceedsWithValue(strlen(msg))); + + char* bad_buffer = nullptr; + EXPECT_THAT(pread64(fd.get(), bad_buffer, 1024, 0), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(Pread64Test, WriteOnlyNotReadable) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_WRONLY)); + + char buf[1024]; + EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(Pread64Test, DirNotReadable) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY)); + + char buf[1024]; + EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(Pread64Test, BadOffset) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDONLY)); + + char buf[1024]; + EXPECT_THAT(pread64(fd.get(), buf, 1024, -1), SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(Pread64Test, OffsetNotIncremented) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR)); + + char msg[] = "hello world"; + EXPECT_THAT(write(fd.get(), msg, strlen(msg)), + SyscallSucceedsWithValue(strlen(msg))); + int offset; + EXPECT_THAT(offset = lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + + char buf1[1024]; + EXPECT_THAT(pread64(fd.get(), buf1, 1024, 0), + SyscallSucceedsWithValue(strlen(msg))); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(offset)); + + char buf2[1024]; + EXPECT_THAT(pread64(fd.get(), buf2, 1024, 3), + SyscallSucceedsWithValue(strlen(msg) - 3)); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(offset)); +} + +TEST_F(Pread64Test, EndOfFile) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDONLY)); + + char buf[1024]; + EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallSucceedsWithValue(0)); +} + +int memfd_create(const std::string& name, unsigned int flags) { + return syscall(__NR_memfd_create, name.c_str(), flags); +} + +TEST_F(Pread64Test, Overflow) { + int f = memfd_create("negative", 0); + const FileDescriptor fd(f); + + EXPECT_THAT(ftruncate(fd.get(), 0x7fffffffffffffffull), SyscallSucceeds()); + + char buf[10]; + EXPECT_THAT(pread64(fd.get(), buf, sizeof(buf), 0x7fffffffffffffffull), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(Pread64TestNoTempFile, CantReadSocketPair_NoRandomSave) { + int sock_fds[2]; + EXPECT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds), SyscallSucceeds()); + + char buf[1024]; + EXPECT_THAT(pread64(sock_fds[0], buf, 1024, 0), + SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(pread64(sock_fds[1], buf, 1024, 0), + SyscallFailsWithErrno(ESPIPE)); + + EXPECT_THAT(close(sock_fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(sock_fds[1]), SyscallSucceeds()); +} + +TEST(Pread64TestNoTempFile, CantReadPipe) { + char buf[1024]; + + int pipe_fds[2]; + EXPECT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + EXPECT_THAT(pread64(pipe_fds[0], buf, 1024, 0), + SyscallFailsWithErrno(ESPIPE)); + + EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/preadv.cc b/test/syscalls/linux/preadv.cc new file mode 100644 index 000000000..5b0743fe9 --- /dev/null +++ b/test/syscalls/linux/preadv.cc @@ -0,0 +1,95 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <atomic> +#include <string> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Stress copy-on-write. Attempts to reproduce b/38430174. +TEST(PreadvTest, MMConcurrencyStress) { + // Fill a one-page file with zeroes (the contents don't really matter). + const auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + /* parent = */ GetAbsoluteTestTmpdir(), + /* content = */ std::string(kPageSize, 0), TempPath::kDefaultFileMode)); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Get a one-page private mapping to read to. + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + + // Repeatedly fork in a separate thread to force the mapping to become + // copy-on-write. + std::atomic<bool> done(false); + const ScopedThread t([&] { + while (!done.load()) { + const pid_t pid = fork(); + TEST_CHECK(pid >= 0); + if (pid == 0) { + // In child. The parent was obviously multithreaded, so it's neither + // safe nor necessary to do much more than exit. + syscall(SYS_exit_group, 0); + } + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; + } + }); + + // Repeatedly read to the mapping. + struct iovec iov[2]; + iov[0].iov_base = m.ptr(); + iov[0].iov_len = kPageSize / 2; + iov[1].iov_base = reinterpret_cast<void*>(m.addr() + kPageSize / 2); + iov[1].iov_len = kPageSize / 2; + constexpr absl::Duration kTestDuration = absl::Seconds(5); + const absl::Time end = absl::Now() + kTestDuration; + while (absl::Now() < end) { + // Among other causes, save/restore cycles may cause interruptions resulting + // in partial reads, so we don't expect any particular return value. + EXPECT_THAT(RetryEINTR(preadv)(fd.get(), iov, 2, 0), SyscallSucceeds()); + } + + // Stop the other thread. + done.store(true); + + // The test passes if it neither deadlocks nor crashes the OS. +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/preadv2.cc b/test/syscalls/linux/preadv2.cc new file mode 100644 index 000000000..4a9acd7ae --- /dev/null +++ b/test/syscalls/linux/preadv2.cc @@ -0,0 +1,280 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/uio.h> + +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#ifndef SYS_preadv2 +#if defined(__x86_64__) +#define SYS_preadv2 327 +#elif defined(__aarch64__) +#define SYS_preadv2 286 +#else +#error "Unknown architecture" +#endif +#endif // SYS_preadv2 + +#ifndef RWF_HIPRI +#define RWF_HIPRI 0x1 +#endif // RWF_HIPRI + +constexpr int kBufSize = 1024; + +std::string SetContent() { + std::string content; + for (int i = 0; i < kBufSize; i++) { + content += static_cast<char>((i % 10) + '0'); + } + return content; +} + +ssize_t preadv2(unsigned long fd, const struct iovec* iov, unsigned long iovcnt, + off_t offset, unsigned long flags) { + // syscall on preadv2 does some weird things (see man syscall and search + // preadv2), so we insert a 0 to word align the flags argument on native. + return syscall(SYS_preadv2, fd, iov, iovcnt, offset, 0, flags); +} + +// This test is the base case where we call preadv (no offset, no flags). +TEST(Preadv2Test, TestBaseCall) { + SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + std::string content = SetContent(); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), content, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + std::vector<char> buf(kBufSize); + struct iovec iov[2]; + iov[0].iov_base = buf.data(); + iov[0].iov_len = buf.size() / 2; + iov[1].iov_base = static_cast<char*>(iov[0].iov_base) + (content.size() / 2); + iov[1].iov_len = content.size() / 2; + + EXPECT_THAT(preadv2(fd.get(), iov, /*iovcnt*/ 2, /*offset=*/0, /*flags=*/0), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_EQ(content, std::string(buf.data(), buf.size())); +} + +// This test is where we call preadv with an offset and no flags. +TEST(Preadv2Test, TestValidPositiveOffset) { + SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + std::string content = SetContent(); + const std::string prefix = "0"; + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), prefix + content, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + std::vector<char> buf(kBufSize, '0'); + struct iovec iov; + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + EXPECT_THAT(preadv2(fd.get(), &iov, /*iovcnt=*/1, /*offset=*/prefix.size(), + /*flags=*/0), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + EXPECT_EQ(content, std::string(buf.data(), buf.size())); +} + +// This test is the base case where we call readv by using -1 as the offset. The +// read should use the file offset, so the test increments it by one prior to +// calling preadv2. +TEST(Preadv2Test, TestNegativeOneOffset) { + SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + std::string content = SetContent(); + const std::string prefix = "231"; + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), prefix + content, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + ASSERT_THAT(lseek(fd.get(), prefix.size(), SEEK_SET), + SyscallSucceedsWithValue(prefix.size())); + + std::vector<char> buf(kBufSize, '0'); + struct iovec iov; + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + EXPECT_THAT(preadv2(fd.get(), &iov, /*iovcnt=*/1, /*offset=*/-1, /*flags=*/0), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(prefix.size() + buf.size())); + + EXPECT_EQ(content, std::string(buf.data(), buf.size())); +} + +// preadv2 requires if the RWF_HIPRI flag is passed, the fd must be opened with +// O_DIRECT. This test implements a correct call with the RWF_HIPRI flag. +TEST(Preadv2Test, TestCallWithRWF_HIPRI) { + SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + std::string content = SetContent(); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), content, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + EXPECT_THAT(fsync(fd.get()), SyscallSucceeds()); + + std::vector<char> buf(kBufSize, '0'); + struct iovec iov; + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + EXPECT_THAT( + preadv2(fd.get(), &iov, /*iovcnt=*/1, /*offset=*/0, /*flags=*/RWF_HIPRI), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + EXPECT_EQ(content, std::string(buf.data(), buf.size())); +} +// This test calls preadv2 with an invalid flag. +TEST(Preadv2Test, TestInvalidFlag) { + SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY | O_DIRECT)); + + std::vector<char> buf(kBufSize, '0'); + struct iovec iov; + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + EXPECT_THAT(preadv2(fd.get(), &iov, /*iovcnt=*/1, + /*offset=*/0, /*flags=*/0xF0), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +// This test calls preadv2 with an invalid offset. +TEST(Preadv2Test, TestInvalidOffset) { + SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY | O_DIRECT)); + + auto iov = absl::make_unique<struct iovec[]>(1); + iov[0].iov_base = nullptr; + iov[0].iov_len = 0; + + EXPECT_THAT(preadv2(fd.get(), iov.get(), /*iovcnt=*/1, /*offset=*/-8, + /*flags=*/0), + SyscallFailsWithErrno(EINVAL)); +} + +// This test calls preadv with a file set O_WRONLY. +TEST(Preadv2Test, TestUnreadableFile) { + SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY)); + + auto iov = absl::make_unique<struct iovec[]>(1); + iov[0].iov_base = nullptr; + iov[0].iov_len = 0; + + EXPECT_THAT(preadv2(fd.get(), iov.get(), /*iovcnt=*/1, + /*offset=*/0, /*flags=*/0), + SyscallFailsWithErrno(EBADF)); +} + +// Calling preadv2 with a non-negative offset calls preadv. Calling preadv with +// an unseekable file is not allowed. A pipe is used for an unseekable file. +TEST(Preadv2Test, TestUnseekableFileInvalid) { + SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + int pipe_fds[2]; + + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + auto iov = absl::make_unique<struct iovec[]>(1); + iov[0].iov_base = nullptr; + iov[0].iov_len = 0; + + EXPECT_THAT(preadv2(pipe_fds[0], iov.get(), /*iovcnt=*/1, + /*offset=*/2, /*flags=*/0), + SyscallFailsWithErrno(ESPIPE)); + + EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +TEST(Preadv2Test, TestUnseekableFileValid) { + SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + int pipe_fds[2]; + + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + std::vector<char> content(32, 'X'); + + EXPECT_THAT(write(pipe_fds[1], content.data(), content.size()), + SyscallSucceedsWithValue(content.size())); + + std::vector<char> buf(content.size()); + auto iov = absl::make_unique<struct iovec[]>(1); + iov[0].iov_base = buf.data(); + iov[0].iov_len = buf.size(); + + EXPECT_THAT(preadv2(pipe_fds[0], iov.get(), /*iovcnt=*/1, + /*offset=*/static_cast<off_t>(-1), /*flags=*/0), + SyscallSucceedsWithValue(buf.size())); + + EXPECT_EQ(content, buf); + + EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/priority.cc b/test/syscalls/linux/priority.cc new file mode 100644 index 000000000..1d9bdfa70 --- /dev/null +++ b/test/syscalls/linux/priority.cc @@ -0,0 +1,216 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_split.h" +#include "test/util/capability_util.h" +#include "test/util/fs_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// These tests are for both the getpriority(2) and setpriority(2) syscalls +// These tests are very rudimentary because getpriority and setpriority +// have not yet been fully implemented. + +// Getpriority does something +TEST(GetpriorityTest, Implemented) { + // "getpriority() can legitimately return the value -1, it is necessary to + // clear the external variable errno prior to the call" + errno = 0; + EXPECT_THAT(getpriority(PRIO_PROCESS, /*who=*/0), SyscallSucceeds()); +} + +// Invalid which +TEST(GetpriorityTest, InvalidWhich) { + errno = 0; + EXPECT_THAT(getpriority(/*which=*/3, /*who=*/0), + SyscallFailsWithErrno(EINVAL)); +} + +// Process is found when which=PRIO_PROCESS +TEST(GetpriorityTest, ValidWho) { + errno = 0; + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), SyscallSucceeds()); +} + +// Process is not found when which=PRIO_PROCESS +TEST(GetpriorityTest, InvalidWho) { + errno = 0; + // Flaky, but it's tough to avoid a race condition when finding an unused pid + EXPECT_THAT(getpriority(PRIO_PROCESS, /*who=*/INT_MAX - 1), + SyscallFailsWithErrno(ESRCH)); +} + +// Setpriority does something +TEST(SetpriorityTest, Implemented) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + // No need to clear errno for setpriority(): + // "The setpriority() call returns 0 if there is no error, or -1 if there is" + EXPECT_THAT(setpriority(PRIO_PROCESS, /*who=*/0, /*nice=*/16), + SyscallSucceeds()); +} + +// Invalid which +TEST(Setpriority, InvalidWhich) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + EXPECT_THAT(setpriority(/*which=*/3, /*who=*/0, /*nice=*/16), + SyscallFailsWithErrno(EINVAL)); +} + +// Process is found when which=PRIO_PROCESS +TEST(SetpriorityTest, ValidWho) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/16), + SyscallSucceeds()); +} + +// niceval is within the range [-20, 19] +TEST(SetpriorityTest, InsideRange) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + // Set 0 < niceval < 19 + int nice = 12; + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), nice), SyscallSucceeds()); + + errno = 0; + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(nice)); + + // Set -20 < niceval < 0 + nice = -12; + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), nice), SyscallSucceeds()); + + errno = 0; + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(nice)); +} + +// Verify that priority/niceness are exposed via /proc/PID/stat. +TEST(SetpriorityTest, NicenessExposedViaProcfs) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + constexpr int kNiceVal = 12; + ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), kNiceVal), SyscallSucceeds()); + + errno = 0; + ASSERT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(kNiceVal)); + + // Now verify we can read that same value via /proc/self/stat. + std::string proc_stat; + ASSERT_NO_ERRNO(GetContents("/proc/self/stat", &proc_stat)); + std::vector<std::string> pieces = absl::StrSplit(proc_stat, ' '); + ASSERT_GT(pieces.size(), 20); + + int niceness_procfs = 0; + ASSERT_TRUE(absl::SimpleAtoi(pieces[18], &niceness_procfs)); + EXPECT_EQ(niceness_procfs, kNiceVal); +} + +// In the kernel's implementation, values outside the range of [-20, 19] are +// truncated to these minimum and maximum values. See +// https://elixir.bootlin.com/linux/v4.4/source/kernel/sys.c#L190 +TEST(SetpriorityTest, OutsideRange) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + // Set niceval > 19 + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/100), + SyscallSucceeds()); + + errno = 0; + // Test niceval truncated to 19 + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(/*maxnice=*/19)); + + // Set niceval < -20 + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/-100), + SyscallSucceeds()); + + errno = 0; + // Test niceval truncated to -20 + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(/*minnice=*/-20)); +} + +// Process is not found when which=PRIO_PROCESS +TEST(SetpriorityTest, InvalidWho) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + // Flaky, but it's tough to avoid a race condition when finding an unused pid + EXPECT_THAT(setpriority(PRIO_PROCESS, + /*who=*/INT_MAX - 1, + /*nice=*/16), + SyscallFailsWithErrno(ESRCH)); +} + +// Nice succeeds, correctly modifies (or in this case does not +// modify priority of process +TEST(SetpriorityTest, NiceSucceeds) { + errno = 0; + const int priority_before = getpriority(PRIO_PROCESS, /*who=*/0); + ASSERT_THAT(nice(/*inc=*/0), SyscallSucceeds()); + + // nice(0) should not change priority + EXPECT_EQ(priority_before, getpriority(PRIO_PROCESS, /*who=*/0)); +} + +// Threads resulting from clone() maintain parent's priority +// Changes to child priority do not affect parent's priority +TEST(GetpriorityTest, CloneMaintainsPriority) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + constexpr int kParentPriority = 16; + constexpr int kChildPriority = 14; + ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), kParentPriority), + SyscallSucceeds()); + + ScopedThread th([]() { + // Check that priority equals that of parent thread + pid_t my_tid; + EXPECT_THAT(my_tid = syscall(__NR_gettid), SyscallSucceeds()); + EXPECT_THAT(getpriority(PRIO_PROCESS, my_tid), + SyscallSucceedsWithValue(kParentPriority)); + + // Change the child thread's priority + EXPECT_THAT(setpriority(PRIO_PROCESS, my_tid, kChildPriority), + SyscallSucceeds()); + }); + th.Join(); + + // Check that parent's priority reemained the same even though + // the child's priority was altered + EXPECT_EQ(kParentPriority, getpriority(PRIO_PROCESS, syscall(__NR_gettid))); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/priority_execve.cc b/test/syscalls/linux/priority_execve.cc new file mode 100644 index 000000000..5cb343bad --- /dev/null +++ b/test/syscalls/linux/priority_execve.cc @@ -0,0 +1,42 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +int main(int argc, char** argv, char** envp) { + errno = 0; + int prio = getpriority(PRIO_PROCESS, getpid()); + + // NOTE: getpriority() can legitimately return negative values + // in the range [-20, 0). If errno is set, exit with a value that + // could not be reached by a valid priority. Valid exit values + // for the test are in the range [1, 40], so we'll use 0. + if (errno != 0) { + printf("getpriority() failed with errno = %d\n", errno); + exit(0); + } + + // Used by test to verify priority is being maintained through + // calls to execve(). Since prio should always be in the range + // [-20, 19], we offset by 20 so as not to have negative exit codes. + exit(20 - prio); + + return 0; +} diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc new file mode 100644 index 000000000..d6b875dbf --- /dev/null +++ b/test/syscalls/linux/proc.cc @@ -0,0 +1,2173 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <elf.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <signal.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/stat.h> +#include <sys/utsname.h> +#include <syscall.h> +#include <unistd.h> + +#include <algorithm> +#include <atomic> +#include <functional> +#include <iostream> +#include <map> +#include <memory> +#include <ostream> +#include <regex> +#include <string> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/time_util.h" +#include "test/util/timer_util.h" + +// NOTE(magi): No, this isn't really a syscall but this is a really simple +// way to get it tested on both gVisor, PTrace and Linux. + +using ::testing::AllOf; +using ::testing::AnyOf; +using ::testing::ContainerEq; +using ::testing::Contains; +using ::testing::ContainsRegex; +using ::testing::Eq; +using ::testing::Gt; +using ::testing::HasSubstr; +using ::testing::IsSupersetOf; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; +using ::testing::UnorderedElementsAreArray; + +// Exported by glibc. +extern char** environ; + +namespace gvisor { +namespace testing { +namespace { + +#ifndef SUID_DUMP_DISABLE +#define SUID_DUMP_DISABLE 0 +#endif /* SUID_DUMP_DISABLE */ +#ifndef SUID_DUMP_USER +#define SUID_DUMP_USER 1 +#endif /* SUID_DUMP_USER */ +#ifndef SUID_DUMP_ROOT +#define SUID_DUMP_ROOT 2 +#endif /* SUID_DUMP_ROOT */ + +#if defined(__x86_64__) || defined(__i386__) +// This list of "required" fields is taken from reading the file +// arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally +// printed by the kernel. +static const char* required_fields[] = { + "processor", + "vendor_id", + "cpu family", + "model\t\t:", + "model name", + "stepping", + "cpu MHz", + "fpu\t\t:", + "fpu_exception", + "cpuid level", + "wp", + "bogomips", + "clflush size", + "cache_alignment", + "address sizes", + "power management", +}; +#elif __aarch64__ +// This list of "required" fields is taken from reading the file +// arch/arm64/kernel/cpuinfo.c and seeing which fields will be unconditionally +// printed by the kernel. +static const char* required_fields[] = { + "processor", "BogoMIPS", "Features", "CPU implementer", + "CPU architecture", "CPU variant", "CPU part", "CPU revision", +}; +#else +#error "Unknown architecture" +#endif + +// Takes the subprocess command line and pid. +// If it returns !OK, WithSubprocess returns immediately. +using SubprocessCallback = std::function<PosixError(int)>; + +std::vector<std::string> saved_argv; // NOLINT + +// Helper function to dump /proc/{pid}/status and check the +// state data. State should = "Z" for zombied or "RSD" for +// running, interruptible sleeping (S), or uninterruptible sleep +// (D). +void CompareProcessState(absl::string_view state, int pid) { + auto status_file = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", pid, "/status"))); + // N.B. POSIX extended regexes don't support shorthand character classes (\w) + // inside of brackets. + EXPECT_THAT(status_file, + ContainsRegex(absl::StrCat("State:.[", state, + R"EOL(]\s+\([a-zA-Z ]+\))EOL"))); +} + +// Run callbacks while a subprocess is running, zombied, and/or exited. +PosixError WithSubprocess(SubprocessCallback const& running, + SubprocessCallback const& zombied, + SubprocessCallback const& exited) { + int pipe_fds[2] = {}; + if (pipe(pipe_fds) < 0) { + return PosixError(errno, "pipe"); + } + + int child_pid = fork(); + if (child_pid < 0) { + return PosixError(errno, "fork"); + } + + if (child_pid == 0) { + close(pipe_fds[0]); // Close the read end. + const DisableSave ds; // Timing issues. + + // Write to the pipe to tell it we're ready. + char buf = 'a'; + int res = 0; + res = WriteFd(pipe_fds[1], &buf, sizeof(buf)); + TEST_CHECK_MSG(res == sizeof(buf), "Write failure in subprocess"); + + while (true) { + SleepSafe(absl::Milliseconds(100)); + } + } + + close(pipe_fds[1]); // Close the write end. + + int status = 0; + auto wait_cleanup = Cleanup([child_pid, &status] { + EXPECT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); + }); + auto kill_cleanup = Cleanup([child_pid] { + EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + }); + + // Wait for the child. + char buf = 0; + int res = ReadFd(pipe_fds[0], &buf, sizeof(buf)); + if (res < 0) { + return PosixError(errno, "Read from pipe"); + } else if (res == 0) { + return PosixError(EPIPE, "Unable to read from pipe: EOF"); + } + + if (running) { + // The first arg, RSD, refers to a "running process", or a process with a + // state of Running (R), Interruptable Sleep (S) or Uninterruptable + // Sleep (D). + CompareProcessState("RSD", child_pid); + RETURN_IF_ERRNO(running(child_pid)); + } + + // Kill the process. + kill_cleanup.Release()(); + siginfo_t info; + // Wait until the child process has exited (WEXITED flag) but don't + // reap the child (WNOWAIT flag). + EXPECT_THAT(waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED), + SyscallSucceeds()); + + if (zombied) { + // Arg of "Z" refers to a Zombied Process. + CompareProcessState("Z", child_pid); + RETURN_IF_ERRNO(zombied(child_pid)); + } + + // Wait on the process. + wait_cleanup.Release()(); + // If the process is reaped, then then this should return + // with ECHILD. + EXPECT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallFailsWithErrno(ECHILD)); + + if (exited) { + RETURN_IF_ERRNO(exited(child_pid)); + } + + return NoError(); +} + +// Access the file returned by name when a subprocess is running. +PosixError AccessWhileRunning(std::function<std::string(int pid)> name, + int flags, std::function<void(int fd)> access) { + FileDescriptor fd; + return WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); + + access(fd.get()); + return NoError(); + }, + nullptr, nullptr); +} + +// Access the file returned by name when the a subprocess is zombied. +PosixError AccessWhileZombied(std::function<std::string(int pid)> name, + int flags, std::function<void(int fd)> access) { + FileDescriptor fd; + return WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); + return NoError(); + }, + [&](int pid) -> PosixError { + // Zombied. + access(fd.get()); + return NoError(); + }, + nullptr); +} + +// Access the file returned by name when the a subprocess is exited. +PosixError AccessWhileExited(std::function<std::string(int pid)> name, + int flags, std::function<void(int fd)> access) { + FileDescriptor fd; + return WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); + return NoError(); + }, + nullptr, + [&](int pid) -> PosixError { + // Exited. + access(fd.get()); + return NoError(); + }); +} + +// ReadFd(fd=/proc/PID/basename) while PID is running. +int ReadWhileRunning(std::string const& basename, void* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileRunning( + [&](int pid) -> std::string { + return absl::StrCat("/proc/", pid, "/", basename); + }, + O_RDONLY, + [&](int fd) { + ret = ReadFd(fd, buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// ReadFd(fd=/proc/PID/basename) while PID is zombied. +int ReadWhileZombied(std::string const& basename, void* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileZombied( + [&](int pid) -> std::string { + return absl::StrCat("/proc/", pid, "/", basename); + }, + O_RDONLY, + [&](int fd) { + ret = ReadFd(fd, buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// ReadFd(fd=/proc/PID/basename) while PID is exited. +int ReadWhileExited(std::string const& basename, void* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileExited( + [&](int pid) -> std::string { + return absl::StrCat("/proc/", pid, "/", basename); + }, + O_RDONLY, + [&](int fd) { + ret = ReadFd(fd, buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// readlinkat(fd=/proc/PID/, basename) while PID is running. +int ReadlinkWhileRunning(std::string const& basename, char* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileRunning( + [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, + O_DIRECTORY, + [&](int fd) { + ret = readlinkat(fd, basename.c_str(), buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// readlinkat(fd=/proc/PID/, basename) while PID is zombied. +int ReadlinkWhileZombied(std::string const& basename, char* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileZombied( + [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, + O_DIRECTORY, + [&](int fd) { + ret = readlinkat(fd, basename.c_str(), buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// readlinkat(fd=/proc/PID/, basename) while PID is exited. +int ReadlinkWhileExited(std::string const& basename, char* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileExited( + [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, + O_DIRECTORY, + [&](int fd) { + ret = readlinkat(fd, basename.c_str(), buf, count); + err = errno; + })); + errno = err; + return ret; +} + +TEST(ProcTest, NotFoundInRoot) { + struct stat s; + EXPECT_THAT(stat("/proc/foobar", &s), SyscallFailsWithErrno(ENOENT)); +} + +TEST(ProcSelfTest, IsThreadGroupLeader) { + ScopedThread([] { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_NE(tgid, tid); + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self")); + EXPECT_EQ(link, absl::StrCat(tgid)); + }); +} + +TEST(ProcThreadSelfTest, Basic) { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_EQ(tgid, tid); + auto link_threadself = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); + EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); + // Just read one file inside thread-self to ensure that the link is valid. + auto link_threadself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); + auto link_procself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); + EXPECT_EQ(link_threadself_exe, link_procself_exe); +} + +TEST(ProcThreadSelfTest, Thread) { + ScopedThread([] { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_NE(tgid, tid); + auto link_threadself = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); + + EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); + // Just read one file inside thread-self to ensure that the link is valid. + auto link_threadself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); + auto link_procself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); + EXPECT_EQ(link_threadself_exe, link_procself_exe); + // A thread should not have "/proc/<tid>/task". + struct stat s; + EXPECT_THAT(stat("/proc/thread-self/task", &s), + SyscallFailsWithErrno(ENOENT)); + }); +} + +// Returns the /proc/PID/maps entry for the MAP_PRIVATE | MAP_ANONYMOUS mapping +// m with start address addr and length len. +std::string AnonymousMapsEntry(uintptr_t addr, size_t len, int prot) { + return absl::StrCat(absl::Hex(addr, absl::PadSpec::kZeroPad8), "-", + absl::Hex(addr + len, absl::PadSpec::kZeroPad8), " ", + prot & PROT_READ ? "r" : "-", + prot & PROT_WRITE ? "w" : "-", + prot & PROT_EXEC ? "x" : "-", "p 00000000 00:00 0 "); +} + +std::string AnonymousMapsEntryForMapping(const Mapping& m, int prot) { + return AnonymousMapsEntry(m.addr(), m.len(), prot); +} + +PosixErrorOr<std::map<uint64_t, uint64_t>> ReadProcSelfAuxv() { + std::string auxv_file; + RETURN_IF_ERRNO(GetContents("/proc/self/auxv", &auxv_file)); + const Elf64_auxv_t* auxv_data = + reinterpret_cast<const Elf64_auxv_t*>(auxv_file.data()); + std::map<uint64_t, uint64_t> auxv_entries; + for (int i = 0; auxv_data[i].a_type != AT_NULL; i++) { + auto a_type = auxv_data[i].a_type; + EXPECT_EQ(0, auxv_entries.count(a_type)) << "a_type: " << a_type; + auxv_entries.emplace(a_type, auxv_data[i].a_un.a_val); + } + return auxv_entries; +} + +TEST(ProcSelfAuxv, EntryPresence) { + auto auxv_entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); + + EXPECT_EQ(auxv_entries.count(AT_ENTRY), 1); + EXPECT_EQ(auxv_entries.count(AT_PHDR), 1); + EXPECT_EQ(auxv_entries.count(AT_PHENT), 1); + EXPECT_EQ(auxv_entries.count(AT_PHNUM), 1); + EXPECT_EQ(auxv_entries.count(AT_BASE), 1); + EXPECT_EQ(auxv_entries.count(AT_UID), 1); + EXPECT_EQ(auxv_entries.count(AT_EUID), 1); + EXPECT_EQ(auxv_entries.count(AT_GID), 1); + EXPECT_EQ(auxv_entries.count(AT_EGID), 1); + EXPECT_EQ(auxv_entries.count(AT_SECURE), 1); + EXPECT_EQ(auxv_entries.count(AT_CLKTCK), 1); + EXPECT_EQ(auxv_entries.count(AT_RANDOM), 1); + EXPECT_EQ(auxv_entries.count(AT_EXECFN), 1); + EXPECT_EQ(auxv_entries.count(AT_PAGESZ), 1); + EXPECT_EQ(auxv_entries.count(AT_SYSINFO_EHDR), 1); +} + +TEST(ProcSelfAuxv, EntryValues) { + auto proc_auxv = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); + + // We need to find the ELF auxiliary vector. The section of memory pointed to + // by envp contains some pointers to non-null pointers, followed by a single + // pointer to a null pointer, followed by the auxiliary vector. + char** envpi = environ; + while (*envpi) { + ++envpi; + } + + const Elf64_auxv_t* envp_auxv = + reinterpret_cast<const Elf64_auxv_t*>(envpi + 1); + int i; + for (i = 0; envp_auxv[i].a_type != AT_NULL; i++) { + auto a_type = envp_auxv[i].a_type; + EXPECT_EQ(proc_auxv.count(a_type), 1); + EXPECT_EQ(proc_auxv[a_type], envp_auxv[i].a_un.a_val) + << "a_type: " << a_type; + } + EXPECT_EQ(i, proc_auxv.size()); +} + +// Just open and read /proc/self/maps, check that we can find [stack] +TEST(ProcSelfMaps, Basic) { + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> stacks; + // Make sure there's a stack in there. + for (const auto& str : strings) { + if (str.find("[stack]") != std::string::npos) { + stacks.push_back(str); + } + } + ASSERT_EQ(1, stacks.size()) << "[stack] not found in: " << proc_self_maps; + // Linux pads to 73 characters then we add 7. + EXPECT_EQ(80, stacks[0].length()); +} + +TEST(ProcSelfMaps, Map1) { + Mapping mapping = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_READ, MAP_PRIVATE)); + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> addrs; + // Make sure if is listed. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(mapping, PROT_READ)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); +} + +TEST(ProcSelfMaps, Map2) { + // NOTE(magi): The permissions must be different or the pages will get merged. + Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); + Mapping map2 = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); + + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> addrs; + // Make sure if is listed. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); + addrs.clear(); + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); +} + +TEST(ProcSelfMaps, MapUnmap) { + Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); + Mapping map2 = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); + + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> addrs; + // Make sure if is listed. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()) << proc_self_maps; + addrs.clear(); + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); + + map2.reset(); + + // Read it again. + proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + strings = absl::StrSplit(proc_self_maps, '\n'); + // First entry should be there. + addrs.clear(); + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); + addrs.clear(); + // But not the second. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { + addrs.push_back(str); + } + } + ASSERT_EQ(0, addrs.size()); +} + +TEST(ProcSelfMaps, Mprotect) { + // FIXME(jamieliu): Linux's mprotect() sometimes fails to merge VMAs in this + // case. + SKIP_IF(!IsRunningOnGvisor()); + + // Reserve 5 pages of address space. + Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(5 * kPageSize, PROT_NONE, MAP_PRIVATE)); + + // Change the permissions on the middle 3 pages. (The first and last pages may + // be merged with other vmas on either side, so they aren't tested directly; + // they just ensure that the middle 3 pages are bracketed by VMAs with + // incompatible permissions.) + ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + kPageSize), + 3 * kPageSize, PROT_READ), + SyscallSucceeds()); + + // Check that the middle 3 pages make up a single VMA. + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, + 3 * kPageSize, PROT_READ))); + + // Change the permissions on the middle page only. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), + kPageSize, PROT_READ | PROT_WRITE), + SyscallSucceeds()); + + // Check that the single VMA has been split into 3 VMAs. + proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + strings = absl::StrSplit(proc_self_maps, '\n'); + EXPECT_THAT( + strings, + IsSupersetOf( + {AnonymousMapsEntry(m.addr() + kPageSize, kPageSize, PROT_READ), + AnonymousMapsEntry(m.addr() + 2 * kPageSize, kPageSize, + PROT_READ | PROT_WRITE), + AnonymousMapsEntry(m.addr() + 3 * kPageSize, kPageSize, + PROT_READ)})); + + // Change the permissions on the middle page back. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), + kPageSize, PROT_READ), + SyscallSucceeds()); + + // Check that the 3 VMAs have been merged back into a single VMA. + proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + strings = absl::StrSplit(proc_self_maps, '\n'); + EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, + 3 * kPageSize, PROT_READ))); +} + +TEST(ProcSelfFd, OpenFd) { + int pipe_fds[2]; + ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds()); + + // Reopen the write end. + const std::string path = absl::StrCat("/proc/self/fd/", pipe_fds[1]); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_WRONLY)); + + // Ensure that a read/write works. + const std::string data = "hello"; + std::unique_ptr<char[]> buffer(new char[data.size()]); + EXPECT_THAT(write(fd.get(), data.c_str(), data.size()), + SyscallSucceedsWithValue(5)); + EXPECT_THAT(read(pipe_fds[0], buffer.get(), data.size()), + SyscallSucceedsWithValue(5)); + EXPECT_EQ(strncmp(buffer.get(), data.c_str(), data.size()), 0); + + // Cleanup. + ASSERT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +TEST(ProcSelfFdInfo, CorrectFds) { + // Make sure there is at least one open file. + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Get files in /proc/self/fd. + auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fd", false)); + + // Get files in /proc/self/fdinfo. + auto fdinfo_files = + ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fdinfo", false)); + + // They should contain the same fds. + EXPECT_THAT(fd_files, UnorderedElementsAreArray(fdinfo_files)); + + // Both should contain fd. + auto fd_s = absl::StrCat(fd.get()); + EXPECT_THAT(fd_files, Contains(fd_s)); +} + +TEST(ProcSelfFdInfo, Flags) { + std::string path = NewTempAbsPath(); + + // Create file here with O_CREAT to test that O_CREAT does not appear in + // fdinfo flags. + int flags = O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC; + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, flags, 0644)); + + // Automatically delete path. + TempPath temp_path(path); + + // O_CREAT does not appear in fdinfo flags. + flags &= ~O_CREAT; + + // O_LARGEFILE always appears (on x86_64). + flags |= kOLargeFile; + + auto fd_info = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/self/fdinfo/", fd.get()))); + EXPECT_THAT(fd_info, HasSubstr(absl::StrFormat("flags:\t%#o", flags))); +} + +TEST(ProcSelfExe, Absolute) { + auto exe = ASSERT_NO_ERRNO_AND_VALUE( + ReadLink(absl::StrCat("/proc/", getpid(), "/exe"))); + EXPECT_EQ(exe[0], '/'); +} + +// Sanity check for /proc/cpuinfo fields that must be present. +TEST(ProcCpuinfo, RequiredFieldsArePresent) { + std::string proc_cpuinfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo")); + ASSERT_FALSE(proc_cpuinfo.empty()); + std::vector<std::string> cpuinfo_fields = absl::StrSplit(proc_cpuinfo, '\n'); + + // Check that the usual fields are there. We don't really care about the + // contents. + for (const std::string& field : required_fields) { + EXPECT_THAT(proc_cpuinfo, HasSubstr(field)); + } +} + +TEST(ProcCpuinfo, DeniesWriteNonRoot) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + // Also drops capabilities. + constexpr int kNobody = 65534; + EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds()); + EXPECT_THAT(open("/proc/cpuinfo", O_WRONLY), SyscallFailsWithErrno(EACCES)); + // TODO(gvisor.dev/issue/1193): Properly support setting size attributes in + // kernfs. + if (!IsRunningOnGvisor() || IsRunningWithVFS1()) { + EXPECT_THAT(truncate("/proc/cpuinfo", 123), + SyscallFailsWithErrno(EACCES)); + } + }); +} + +// With root privileges, it is possible to open /proc/cpuinfo with write mode, +// but all write operations will return EIO. +TEST(ProcCpuinfo, DeniesWriteRoot) { + // VFS1 does not behave differently for root/non-root. + SKIP_IF(IsRunningWithVFS1()); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); + + int fd; + EXPECT_THAT(fd = open("/proc/cpuinfo", O_WRONLY), SyscallSucceeds()); + if (fd > 0) { + EXPECT_THAT(write(fd, "x", 1), SyscallFailsWithErrno(EIO)); + EXPECT_THAT(pwrite(fd, "x", 1, 123), SyscallFailsWithErrno(EIO)); + } + // TODO(gvisor.dev/issue/1193): Properly support setting size attributes in + // kernfs. + if (!IsRunningOnGvisor() || IsRunningWithVFS1()) { + if (fd > 0) { + EXPECT_THAT(ftruncate(fd, 123), SyscallFailsWithErrno(EIO)); + } + EXPECT_THAT(truncate("/proc/cpuinfo", 123), SyscallFailsWithErrno(EIO)); + } +} + +// Sanity checks that uptime is present. +TEST(ProcUptime, IsPresent) { + std::string proc_uptime = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); + ASSERT_FALSE(proc_uptime.empty()); + std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' '); + + // Parse once. + double uptime0, uptime1, idletime0, idletime1; + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime0)); + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime0)); + + // Sleep for one second. + absl::SleepFor(absl::Seconds(1)); + + // Parse again. + proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); + ASSERT_FALSE(proc_uptime.empty()); + uptime_parts = absl::StrSplit(proc_uptime, ' '); + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime1)); + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime1)); + + // Sanity check. + // + // We assert that between 0.99 and 59.99 seconds have passed. If more than a + // minute has passed, then we must be executing really, really slowly. + EXPECT_GE(uptime0, 0.0); + EXPECT_GE(idletime0, 0.0); + EXPECT_GT(uptime1, uptime0); + EXPECT_GE(uptime1, uptime0 + 0.99); + EXPECT_LE(uptime1, uptime0 + 59.99); + EXPECT_GE(idletime1, idletime0); +} + +TEST(ProcMeminfo, ContainsBasicFields) { + std::string proc_meminfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo")); + EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"), + ContainsRegex(R"(MemFree:\s+[0-9]+ kB)"))); +} + +TEST(ProcStat, ContainsBasicFields) { + std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + + std::vector<std::string> names; + for (auto const& line : absl::StrSplit(proc_stat, '\n')) { + std::vector<std::string> fields = + absl::StrSplit(line, ' ', absl::SkipWhitespace()); + if (fields.empty()) { + continue; + } + names.push_back(fields[0]); + } + + EXPECT_THAT(names, + IsSupersetOf({"cpu", "intr", "ctxt", "btime", "processes", + "procs_running", "procs_blocked", "softirq"})); +} + +TEST(ProcStat, EndsWithNewline) { + std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + EXPECT_EQ(proc_stat.back(), '\n'); +} + +TEST(ProcStat, Fields) { + std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + + std::vector<std::string> names; + for (auto const& line : absl::StrSplit(proc_stat, '\n')) { + std::vector<std::string> fields = + absl::StrSplit(line, ' ', absl::SkipWhitespace()); + if (fields.empty()) { + continue; + } + + if (absl::StartsWith(fields[0], "cpu")) { + // As of Linux 3.11, each CPU entry has 10 fields, plus the name. + EXPECT_GE(fields.size(), 11) << proc_stat; + } else if (fields[0] == "ctxt") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "btime") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "itime") { + // Single field. + ASSERT_EQ(fields.size(), 2) << proc_stat; + // This is the only floating point field. + double val; + EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_stat; + continue; + } else if (fields[0] == "processes") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "procs_running") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "procs_blocked") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "softirq") { + // As of Linux 3.11, there are 10 softirqs. 12 fields for name + total. + EXPECT_GE(fields.size(), 12) << proc_stat; + } + + // All fields besides itime are valid base 10 numbers. + for (size_t i = 1; i < fields.size(); i++) { + uint64_t val; + EXPECT_TRUE(absl::SimpleAtoi(fields[i], &val)) << proc_stat; + } + } +} + +TEST(ProcLoadavg, EndsWithNewline) { + std::string proc_loadvg = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); + EXPECT_EQ(proc_loadvg.back(), '\n'); +} + +TEST(ProcLoadavg, Fields) { + std::string proc_loadvg = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); + std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n'); + + // Single line. + EXPECT_EQ(lines.size(), 2) << proc_loadvg; + + std::vector<std::string> fields = + absl::StrSplit(lines[0], absl::ByAnyChar(" /"), absl::SkipWhitespace()); + + // Six fields. + EXPECT_EQ(fields.size(), 6) << proc_loadvg; + + double val; + uint64_t val2; + // First three fields are floating point numbers. + EXPECT_TRUE(absl::SimpleAtod(fields[0], &val)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtod(fields[2], &val)) << proc_loadvg; + // Rest of the fields are valid base 10 numbers. + EXPECT_TRUE(absl::SimpleAtoi(fields[3], &val2)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtoi(fields[4], &val2)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtoi(fields[5], &val2)) << proc_loadvg; +} + +// NOTE: Tests in priority.cc also check certain priority related fields in +// /proc/self/stat. + +class ProcPidStatTest : public ::testing::TestWithParam<std::string> {}; + +TEST_P(ProcPidStatTest, HasBasicFields) { + std::string proc_pid_stat = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", GetParam(), "/stat"))); + + ASSERT_FALSE(proc_pid_stat.empty()); + std::vector<std::string> fields = absl::StrSplit(proc_pid_stat, ' '); + ASSERT_GE(fields.size(), 24); + EXPECT_EQ(absl::StrCat(getpid()), fields[0]); + // fields[1] is the thread name. + EXPECT_EQ("R", fields[2]); // task state + EXPECT_EQ(absl::StrCat(getppid()), fields[3]); + + // If the test starts up quickly, then the process start time and the kernel + // boot time will be very close, and the proc starttime field (which is the + // delta of the two times) will be 0. For that unfortunate reason, we can + // only check that starttime >= 0, and not that it is strictly > 0. + uint64_t starttime; + ASSERT_TRUE(absl::SimpleAtoi(fields[21], &starttime)); + EXPECT_GE(starttime, 0); + + uint64_t vss; + ASSERT_TRUE(absl::SimpleAtoi(fields[22], &vss)); + EXPECT_GT(vss, 0); + + uint64_t rss; + ASSERT_TRUE(absl::SimpleAtoi(fields[23], &rss)); + EXPECT_GT(rss, 0); + + uint64_t rsslim; + ASSERT_TRUE(absl::SimpleAtoi(fields[24], &rsslim)); + EXPECT_GT(rsslim, 0); +} + +INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatTest, + ::testing::Values("self", absl::StrCat(getpid()))); + +using ProcPidStatmTest = ::testing::TestWithParam<std::string>; + +TEST_P(ProcPidStatmTest, HasBasicFields) { + std::string proc_pid_statm = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", GetParam(), "/statm"))); + ASSERT_FALSE(proc_pid_statm.empty()); + std::vector<std::string> fields = absl::StrSplit(proc_pid_statm, ' '); + ASSERT_GE(fields.size(), 7); + + uint64_t vss; + ASSERT_TRUE(absl::SimpleAtoi(fields[0], &vss)); + EXPECT_GT(vss, 0); + + uint64_t rss; + ASSERT_TRUE(absl::SimpleAtoi(fields[1], &rss)); + EXPECT_GT(rss, 0); +} + +INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatmTest, + ::testing::Values("self", absl::StrCat(getpid()))); + +PosixErrorOr<uint64_t> CurrentRSS() { + ASSIGN_OR_RETURN_ERRNO(auto proc_self_stat, GetContents("/proc/self/stat")); + if (proc_self_stat.empty()) { + return PosixError(EINVAL, "empty /proc/self/stat"); + } + + std::vector<std::string> fields = absl::StrSplit(proc_self_stat, ' '); + if (fields.size() < 24) { + return PosixError( + EINVAL, + absl::StrCat("/proc/self/stat has too few fields: ", proc_self_stat)); + } + + uint64_t rss; + if (!absl::SimpleAtoi(fields[23], &rss)) { + return PosixError( + EINVAL, absl::StrCat("/proc/self/stat RSS field is not a number: ", + fields[23])); + } + + // RSS is given in number of pages. + return rss * kPageSize; +} + +// The size of mapping created by MapPopulateRSS. +constexpr uint64_t kMappingSize = 100 << 20; + +// Tolerance on RSS comparisons to account for background thread mappings, +// reclaimed pages, newly faulted pages, etc. +constexpr uint64_t kRSSTolerance = 10 << 20; + +// Capture RSS before and after an anonymous mapping with passed prot. +void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) { + *before = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); + + // N.B. The kernel asynchronously accumulates per-task RSS counters into the + // mm RSS, which is exposed by /proc/PID/stat. Task exit is a synchronization + // point (kernel/exit.c:do_exit -> sync_mm_rss), so perform the mapping on + // another thread to ensure it is reflected in RSS after the thread exits. + Mapping mapping; + ScopedThread t([&mapping, prot] { + mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kMappingSize, prot, MAP_PRIVATE | MAP_POPULATE)); + }); + t.Join(); + + *after = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); +} + +// TODO(b/73896574): Test for PROT_READ + MAP_POPULATE anonymous mappings. Their +// semantics are more subtle: +// +// Small pages -> Zero page mapped, not counted in RSS +// (mm/memory.c:do_anonymous_page). +// +// Huge pages (THP enabled, use_zero_page=0) -> Pages committed +// (mm/memory.c:__handle_mm_fault -> create_huge_pmd). +// +// Huge pages (THP enabled, use_zero_page=1) -> Zero page mapped, not counted in +// RSS (mm/huge_memory.c:do_huge_pmd_anonymous_page). + +// PROT_WRITE + MAP_POPULATE anonymous mappings are always committed. +TEST(ProcSelfStat, PopulateWriteRSS) { + uint64_t before, after; + MapPopulateRSS(PROT_READ | PROT_WRITE, &before, &after); + + // Mapping is committed. + EXPECT_NEAR(before + kMappingSize, after, kRSSTolerance); +} + +// PROT_NONE + MAP_POPULATE anonymous mappings are never committed. +TEST(ProcSelfStat, PopulateNoneRSS) { + uint64_t before, after; + MapPopulateRSS(PROT_NONE, &before, &after); + + // Mapping not committed. + EXPECT_NEAR(before, after, kRSSTolerance); +} + +// Returns the calling thread's name. +PosixErrorOr<std::string> ThreadName() { + // "The buffer should allow space for up to 16 bytes; the returned std::string + // will be null-terminated if it is shorter than that." - prctl(2). But we + // always want the thread name to be null-terminated. + char thread_name[17]; + int rc = prctl(PR_GET_NAME, thread_name, 0, 0, 0); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "prctl(PR_GET_NAME)"); + } + thread_name[16] = '\0'; + return std::string(thread_name); +} + +// Parses the contents of a /proc/[pid]/status file into a collection of +// key-value pairs. +PosixErrorOr<std::map<std::string, std::string>> ParseProcStatus( + absl::string_view status_str) { + std::map<std::string, std::string> fields; + for (absl::string_view const line : + absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { + const std::pair<absl::string_view, absl::string_view> kv = + absl::StrSplit(line, absl::MaxSplits(":\t", 1)); + if (kv.first.empty()) { + return PosixError( + EINVAL, absl::StrCat("failed to parse key in line \"", line, "\"")); + } + std::string key(kv.first); + if (fields.count(key)) { + return PosixError(EINVAL, + absl::StrCat("duplicate key \"", kv.first, "\"")); + } + std::string value(kv.second); + absl::StripLeadingAsciiWhitespace(&value); + fields.emplace(std::move(key), std::move(value)); + } + return fields; +} + +TEST(ParseProcStatusTest, ParsesSimpleStatusFileWithMixedWhitespaceCorrectly) { + EXPECT_THAT( + ParseProcStatus( + "Name:\tinit\nState:\tS (sleeping)\nCapEff:\t 0000001fffffffff\n"), + IsPosixErrorOkAndHolds(UnorderedElementsAre( + Pair("Name", "init"), Pair("State", "S (sleeping)"), + Pair("CapEff", "0000001fffffffff")))); +} + +TEST(ParseProcStatusTest, DetectsDuplicateKeys) { + auto proc_status_or = ParseProcStatus("Name:\tfoo\nName:\tfoo\n"); + EXPECT_THAT(proc_status_or, + PosixErrorIs(EINVAL, ::testing::StrEq("duplicate key \"Name\""))); +} + +TEST(ParseProcStatusTest, DetectsMissingTabs) { + EXPECT_THAT(ParseProcStatus("Name:foo\nPid: 1\n"), + IsPosixErrorOkAndHolds(UnorderedElementsAre(Pair("Name:foo", ""), + Pair("Pid: 1", "")))); +} + +TEST(ProcPidStatusTest, HasBasicFields) { + // Do this on a separate thread since we want tgid != tid. + ScopedThread([] { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_NE(tgid, tid); + const auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(ThreadName()); + + std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", tid, "/status"))); + + ASSERT_FALSE(status_str.empty()); + const auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); + EXPECT_THAT(status, IsSupersetOf({Pair("Name", thread_name), + Pair("Tgid", absl::StrCat(tgid)), + Pair("Pid", absl::StrCat(tid)), + Pair("PPid", absl::StrCat(getppid()))})); + }); +} + +TEST(ProcPidStatusTest, StateRunning) { + // Task must be running when reading the file. + const pid_t tid = syscall(SYS_gettid); + std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", tid, "/status"))); + + EXPECT_THAT(ParseProcStatus(status_str), + IsPosixErrorOkAndHolds(Contains(Pair("State", "R (running)")))); +} + +TEST(ProcPidStatusTest, StateSleeping_NoRandomSave) { + // Starts a child process that blocks and checks that State is sleeping. + auto res = WithSubprocess( + [&](int pid) -> PosixError { + // Because this test is timing based we will disable cooperative saving + // and the test itself also has random saving disabled. + const DisableSave ds; + // Try multiple times in case the child isn't sleeping when status file + // is read. + MonotonicTimer timer; + timer.Start(); + for (;;) { + ASSIGN_OR_RETURN_ERRNO( + std::string status_str, + GetContents(absl::StrCat("/proc/", pid, "/status"))); + ASSIGN_OR_RETURN_ERRNO(auto map, ParseProcStatus(status_str)); + if (map["State"] == std::string("S (sleeping)")) { + // Test passed! + return NoError(); + } + if (timer.Duration() > absl::Seconds(10)) { + return PosixError(ETIMEDOUT, "Timeout waiting for child to sleep"); + } + absl::SleepFor(absl::Milliseconds(10)); + } + }, + nullptr, nullptr); + ASSERT_NO_ERRNO(res); +} + +TEST(ProcPidStatusTest, ValuesAreTabDelimited) { + std::string status_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); + ASSERT_FALSE(status_str.empty()); + for (absl::string_view const line : + absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { + EXPECT_NE(std::string::npos, line.find(":\t")); + } +} + +// Threads properly counts running threads. +// +// TODO(mpratt): Test zombied threads while the thread group leader is still +// running with generalized fork and clone children from the wait test. +TEST(ProcPidStatusTest, Threads) { + char buf[4096] = {}; + EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf) - 1), + SyscallSucceedsWithValue(Gt(0))); + + auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); + auto it = status.find("Threads"); + ASSERT_NE(it, status.end()); + int threads = -1; + EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) + << "Threads value " << it->second << " is not a number"; + // Don't make assumptions about the exact number of threads, as it may not be + // constant. + EXPECT_GE(threads, 1); + + memset(buf, 0, sizeof(buf)); + EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf) - 1), + SyscallSucceedsWithValue(Gt(0))); + + status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); + it = status.find("Threads"); + ASSERT_NE(it, status.end()); + threads = -1; + EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) + << "Threads value " << it->second << " is not a number"; + // There must be only the thread group leader remaining, zombied. + EXPECT_EQ(threads, 1); +} + +// Returns true if all characters in s are digits. +bool IsDigits(absl::string_view s) { + return std::all_of(s.begin(), s.end(), absl::ascii_isdigit); +} + +TEST(ProcPidStatTest, VmStats) { + std::string status_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); + ASSERT_FALSE(status_str.empty()); + auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); + + const auto vss_it = status.find("VmSize"); + ASSERT_NE(vss_it, status.end()); + + absl::string_view vss_str(vss_it->second); + + // Room for the " kB" suffix plus at least one digit. + ASSERT_GT(vss_str.length(), 3); + EXPECT_TRUE(absl::EndsWith(vss_str, " kB")); + // Everything else is part of a number. + EXPECT_TRUE(IsDigits(vss_str.substr(0, vss_str.length() - 3))) << vss_str; + // ... which is not 0. + EXPECT_NE('0', vss_str[0]); + + const auto rss_it = status.find("VmRSS"); + ASSERT_NE(rss_it, status.end()); + + absl::string_view rss_str(rss_it->second); + + // Room for the " kB" suffix plus at least one digit. + ASSERT_GT(rss_str.length(), 3); + EXPECT_TRUE(absl::EndsWith(rss_str, " kB")); + // Everything else is part of a number. + EXPECT_TRUE(IsDigits(rss_str.substr(0, rss_str.length() - 3))) << rss_str; + // ... which is not 0. + EXPECT_NE('0', rss_str[0]); + + const auto data_it = status.find("VmData"); + ASSERT_NE(data_it, status.end()); + + absl::string_view data_str(data_it->second); + + // Room for the " kB" suffix plus at least one digit. + ASSERT_GT(data_str.length(), 3); + EXPECT_TRUE(absl::EndsWith(data_str, " kB")); + // Everything else is part of a number. + EXPECT_TRUE(IsDigits(data_str.substr(0, data_str.length() - 3))) << data_str; + // ... which is not 0. + EXPECT_NE('0', data_str[0]); +} + +// Parse an array of NUL-terminated char* arrays, returning a vector of +// strings. +std::vector<std::string> ParseNulTerminatedStrings(std::string contents) { + EXPECT_EQ('\0', contents.back()); + // The split will leave an empty string if the NUL-byte remains, so pop + // it. + contents.pop_back(); + + return absl::StrSplit(contents, '\0'); +} + +TEST(ProcPidCmdline, MatchesArgv) { + std::vector<std::string> proc_cmdline = ParseNulTerminatedStrings( + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); + EXPECT_THAT(saved_argv, ContainerEq(proc_cmdline)); +} + +TEST(ProcPidEnviron, MatchesEnviron) { + std::vector<std::string> proc_environ = ParseNulTerminatedStrings( + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/environ"))); + // Get the environment from the environ variable, which we will compare with + // /proc/self/environ. + std::vector<std::string> env; + for (char** v = environ; *v; v++) { + env.push_back(*v); + } + EXPECT_THAT(env, ContainerEq(proc_environ)); +} + +TEST(ProcPidCmdline, SubprocessForkSameCmdline) { + std::vector<std::string> proc_cmdline_parent; + std::vector<std::string> proc_cmdline; + proc_cmdline_parent = ParseNulTerminatedStrings( + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); + auto res = WithSubprocess( + [&](int pid) -> PosixError { + ASSIGN_OR_RETURN_ERRNO( + auto raw_cmdline, + GetContents(absl::StrCat("/proc/", pid, "/cmdline"))); + proc_cmdline = ParseNulTerminatedStrings(raw_cmdline); + return NoError(); + }, + nullptr, nullptr); + ASSERT_NO_ERRNO(res); + + for (size_t i = 0; i < proc_cmdline_parent.size(); i++) { + EXPECT_EQ(proc_cmdline_parent[i], proc_cmdline[i]); + } +} + +// Test whether /proc/PID/ symlinks can be read for a running process. +TEST(ProcPidSymlink, SubprocessRunning) { + char buf[1]; + + EXPECT_THAT(ReadlinkWhileRunning("exe", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadlinkWhileRunning("ns/net", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadlinkWhileRunning("ns/pid", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadlinkWhileRunning("ns/user", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST(ProcPidSymlink, SubprocessZombied) { + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + char buf[1]; + + int want = EACCES; + if (!IsRunningOnGvisor()) { + auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); + if (version.major > 4 || (version.major == 4 && version.minor > 3)) { + want = ENOENT; + } + } + + EXPECT_THAT(ReadlinkWhileZombied("exe", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + + if (!IsRunningOnGvisor()) { + EXPECT_THAT(ReadlinkWhileZombied("ns/net", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + } + + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between linux on proc + // files. + // + // ~4.3: Syscall fails with EACCES. + // 4.17: Syscall succeeds and returns 1. + // + if (!IsRunningOnGvisor()) { + return; + } + + EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + + EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); +} + +// Test whether /proc/PID/ symlinks can be read for an exited process. +TEST(ProcPidSymlink, SubprocessExited) { + char buf[1]; + + EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadlinkWhileExited("ns/net", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadlinkWhileExited("ns/pid", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadlinkWhileExited("ns/user", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); +} + +// /proc/PID/exe points to the correct binary. +TEST(ProcPidExe, Subprocess) { + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); + auto expected_absolute_path = + ASSERT_NO_ERRNO_AND_VALUE(MakeAbsolute(link, "")); + + char actual[PATH_MAX + 1] = {}; + ASSERT_THAT(ReadlinkWhileRunning("exe", actual, sizeof(actual)), + SyscallSucceedsWithValue(Gt(0))); + EXPECT_EQ(actual, expected_absolute_path); +} + +// Test whether /proc/PID/ files can be read for a running process. +TEST(ProcPidFile, SubprocessRunning) { + char buf[1]; + + EXPECT_THAT(ReadWhileRunning("auxv", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("cmdline", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("comm", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("gid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("io", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("maps", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("stat", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("oom_score", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("oom_score_adj", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + +// Test whether /proc/PID/ files can be read for a zombie process. +TEST(ProcPidFile, SubprocessZombie) { + char buf[1]; + + // FIXME(gvisor.dev/issue/164): Loosen requirement due to inconsistent + // behavior on different kernels. + // + // ~4.3: Succeds and returns 0. + // 4.17: Succeeds and returns 1. + // gVisor: Succeeds and returns 0. + EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds()); + + EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); + + EXPECT_THAT(ReadWhileZombied("comm", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("gid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("maps", buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); + + EXPECT_THAT(ReadWhileZombied("stat", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("oom_score", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("oom_score_adj", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux + // on proc files. + // + // ~4.3: Fails and returns EACCES. + // gVisor & 4.17: Succeeds and returns 1. + // + // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)), + // SyscallFailsWithErrno(EACCES)); +} + +// Test whether /proc/PID/ files can be read for an exited process. +TEST(ProcPidFile, SubprocessExited) { + char buf[1]; + + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels. + // + // ~4.3: Fails and returns ESRCH. + // gVisor: Fails with ESRCH. + // 4.17: Succeeds and returns 1. + // + // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)), + // SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadWhileExited("cmdline", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("comm", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("gid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("io", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Returns EOF on gVisor. + EXPECT_THAT(ReadWhileExited("maps", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("stat", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("status", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("oom_score", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("oom_score_adj", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); +} + +PosixError DirContainsImpl(absl::string_view path, + const std::vector<std::string>& targets, + bool strict) { + ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false)); + bool success = true; + + for (auto& expected_entry : targets) { + auto cursor = std::find(listing.begin(), listing.end(), expected_entry); + if (cursor == listing.end()) { + success = false; + } + } + + if (!success) { + return PosixError( + ENOENT, + absl::StrCat("Failed to find one or more paths in '", path, "'")); + } + + if (strict) { + if (targets.size() != listing.size()) { + return PosixError( + EINVAL, + absl::StrCat("Expected to find ", targets.size(), " elements in '", + path, "', but found ", listing.size())); + } + } + + return NoError(); +} + +PosixError DirContains(absl::string_view path, + const std::vector<std::string>& targets) { + return DirContainsImpl(path, targets, false); +} + +PosixError DirContainsExactly(absl::string_view path, + const std::vector<std::string>& targets) { + return DirContainsImpl(path, targets, true); +} + +PosixError EventuallyDirContainsExactly( + absl::string_view path, const std::vector<std::string>& targets) { + constexpr int kRetryCount = 100; + const absl::Duration kRetryDelay = absl::Milliseconds(100); + + for (int i = 0; i < kRetryCount; ++i) { + auto res = DirContainsExactly(path, targets); + if (res.ok()) { + return res; + } else if (i < kRetryCount - 1) { + // Sleep if this isn't the final iteration. + absl::SleepFor(kRetryDelay); + } + } + return PosixError(ETIMEDOUT, + "Timed out while waiting for directory to contain files "); +} + +TEST(ProcTask, Basic) { + EXPECT_NO_ERRNO( + DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())})); +} + +std::vector<std::string> TaskFiles( + const std::vector<std::string>& initial_contents, + const std::vector<pid_t>& pids) { + return VecCat<std::string>( + initial_contents, + ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); }, + pids)); +} + +std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) { + return TaskFiles({".", "..", absl::StrCat(getpid())}, pids); +} + +// Helper class for creating a new task in the current thread group. +class BlockingChild { + public: + BlockingChild() : thread_([=] { Start(); }) {} + ~BlockingChild() { Join(); } + + pid_t Tid() const { + absl::MutexLock ml(&mu_); + mu_.Await(absl::Condition(&tid_ready_)); + return tid_; + } + + void Join() { Stop(); } + + private: + void Start() { + absl::MutexLock ml(&mu_); + tid_ = syscall(__NR_gettid); + tid_ready_ = true; + mu_.Await(absl::Condition(&stop_)); + } + + void Stop() { + absl::MutexLock ml(&mu_); + stop_ = true; + } + + mutable absl::Mutex mu_; + bool stop_ ABSL_GUARDED_BY(mu_) = false; + pid_t tid_; + bool tid_ready_ ABSL_GUARDED_BY(mu_) = false; + + // Must be last to ensure that the destructor for the thread is run before + // any other member of the object is destroyed. + ScopedThread thread_; +}; + +TEST(ProcTask, NewThreadAppears) { + auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task", false)); + BlockingChild child1; + EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", + TaskFiles(initial, {child1.Tid()}))); +} + +TEST(ProcTask, KilledThreadsDisappear) { + auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task/", false)); + + BlockingChild child1; + EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", + TaskFiles(initial, {child1.Tid()}))); + + // Stat child1's task file. Regression test for b/32097707. + struct stat statbuf; + const std::string child1_task_file = + absl::StrCat("/proc/self/task/", child1.Tid()); + EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallSucceeds()); + + BlockingChild child2; + EXPECT_NO_ERRNO(DirContainsExactly( + "/proc/self/task", TaskFiles(initial, {child1.Tid(), child2.Tid()}))); + + BlockingChild child3; + BlockingChild child4; + BlockingChild child5; + EXPECT_NO_ERRNO(DirContainsExactly( + "/proc/self/task", + TaskFiles(initial, {child1.Tid(), child2.Tid(), child3.Tid(), + child4.Tid(), child5.Tid()}))); + + child2.Join(); + EXPECT_NO_ERRNO(EventuallyDirContainsExactly( + "/proc/self/task", TaskFiles(initial, {child1.Tid(), child3.Tid(), + child4.Tid(), child5.Tid()}))); + + child1.Join(); + child4.Join(); + EXPECT_NO_ERRNO(EventuallyDirContainsExactly( + "/proc/self/task", TaskFiles(initial, {child3.Tid(), child5.Tid()}))); + + // Stat child1's task file again. This time it should fail. See b/32097707. + EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), + SyscallFailsWithErrno(ENOENT)); + + child3.Join(); + child5.Join(); + EXPECT_NO_ERRNO(EventuallyDirContainsExactly("/proc/self/task", initial)); +} + +TEST(ProcTask, ChildTaskDir) { + BlockingChild child1; + EXPECT_NO_ERRNO(DirContains("/proc/self/task", TaskFiles({child1.Tid()}))); + EXPECT_NO_ERRNO(DirContains(absl::StrCat("/proc/", child1.Tid(), "/task"), + TaskFiles({child1.Tid()}))); +} + +PosixError VerifyPidDir(std::string path) { + return DirContains(path, {"exe", "fd", "io", "maps", "ns", "stat", "status"}); +} + +TEST(ProcTask, VerifyTaskDir) { + EXPECT_NO_ERRNO(VerifyPidDir("/proc/self")); + + EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", getpid()))); + BlockingChild child1; + EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", child1.Tid()))); + + // Only the first level of task directories should contain the 'task' + // directory. That is: + // + // /proc/1234/task <- should exist + // /proc/1234/task/1234/task <- should not exist + // /proc/1234/task/1235/task <- should not exist (where 1235 is in the same + // thread group as 1234). + EXPECT_FALSE( + DirContains(absl::StrCat("/proc/self/task/", getpid()), {"task"}).ok()) + << "Found 'task' directory in an inner directory."; +} + +TEST(ProcTask, TaskDirCannotBeDeleted) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + EXPECT_THAT(rmdir("/proc/self/task"), SyscallFails()); + EXPECT_THAT(rmdir(absl::StrCat("/proc/self/task/", getpid()).c_str()), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ProcTask, TaskDirHasCorrectMetadata) { + struct stat st; + EXPECT_THAT(stat("/proc/self/task", &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + + // Verify file is readable and executable by everyone. + mode_t expected_permissions = + S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + mode_t permissions = st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO); + EXPECT_EQ(expected_permissions, permissions); +} + +TEST(ProcTask, TaskDirCanSeekToEnd) { + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/task", O_RDONLY)); + EXPECT_THAT(lseek(dirfd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(ProcTask, VerifyTaskDirNlinks) { + // A task directory will have 3 links if the taskgroup has a single + // thread. For example, the following shows where the links to + // '/proc/12345/task comes' from for a single threaded process with pid 12345: + // + // /proc/12345/task <-- 1 link for the directory itself + // . <-- link from "." + // .. + // 12345 + // . + // .. <-- link from ".." to parent. + // <other contents of a task dir> + // + // We can't assert an absolute number of links since we don't control how many + // threads the test framework spawns. Instead, we'll ensure creating a new + // thread increases the number of links as expected. + + // Once we reach the test body, we can count on the thread count being stable + // unless we spawn a new one. + uint64_t initial_links = ASSERT_NO_ERRNO_AND_VALUE(Links("/proc/self/task")); + ASSERT_GE(initial_links, 3); + + // For each new subtask, we should gain a new link. + BlockingChild child1; + EXPECT_THAT(Links("/proc/self/task"), + IsPosixErrorOkAndHolds(initial_links + 1)); + BlockingChild child2; + EXPECT_THAT(Links("/proc/self/task"), + IsPosixErrorOkAndHolds(initial_links + 2)); +} + +TEST(ProcTask, CommContainsThreadNameAndTrailingNewline) { + constexpr char kThreadName[] = "TestThread12345"; + ASSERT_THAT(prctl(PR_SET_NAME, kThreadName), SyscallSucceeds()); + + auto thread_name = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(JoinPath("/proc", absl::StrCat(getpid()), "task", + absl::StrCat(syscall(SYS_gettid)), "comm"))); + EXPECT_EQ(absl::StrCat(kThreadName, "\n"), thread_name); +} + +TEST(ProcTaskNs, NsDirExistsAndHasCorrectMetadata) { + EXPECT_NO_ERRNO(DirContains("/proc/self/ns", {"net", "pid", "user"})); + + // Let's just test the 'pid' entry, all of them are very similar. + struct stat st; + EXPECT_THAT(lstat("/proc/self/ns/pid", &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISLNK(st.st_mode)); + + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/ns/pid")); + EXPECT_THAT(link, ::testing::StartsWith("pid:[")); +} + +TEST(ProcTaskNs, AccessOnNsNodeSucceeds) { + EXPECT_THAT(access("/proc/self/ns/pid", F_OK), SyscallSucceeds()); +} + +TEST(ProcSysKernelHostname, Exists) { + EXPECT_THAT(open("/proc/sys/kernel/hostname", O_RDONLY), SyscallSucceeds()); +} + +TEST(ProcSysKernelHostname, MatchesUname) { + struct utsname buf; + EXPECT_THAT(uname(&buf), SyscallSucceeds()); + const std::string hostname = absl::StrCat(buf.nodename, "\n"); + auto procfs_hostname = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/hostname")); + EXPECT_EQ(procfs_hostname, hostname); +} + +TEST(ProcSysVmMmapMinAddr, HasNumericValue) { + const std::string mmap_min_addr_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/mmap_min_addr")); + uintptr_t mmap_min_addr; + EXPECT_TRUE(absl::SimpleAtoi(mmap_min_addr_str, &mmap_min_addr)) + << "/proc/sys/vm/mmap_min_addr does not contain a numeric value: " + << mmap_min_addr_str; +} + +TEST(ProcSysVmOvercommitMemory, HasNumericValue) { + const std::string overcommit_memory_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/overcommit_memory")); + uintptr_t overcommit_memory; + EXPECT_TRUE(absl::SimpleAtoi(overcommit_memory_str, &overcommit_memory)) + << "/proc/sys/vm/overcommit_memory does not contain a numeric value: " + << overcommit_memory; +} + +// Check that link for proc fd entries point the target node, not the +// symlink itself. Regression test for b/31155070. +TEST(ProcTaskFd, FstatatFollowsSymlink) { + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + struct stat sproc = {}; + EXPECT_THAT( + fstatat(-1, absl::StrCat("/proc/self/fd/", fd.get()).c_str(), &sproc, 0), + SyscallSucceeds()); + + struct stat sfile = {}; + EXPECT_THAT(fstatat(-1, file.path().c_str(), &sfile, 0), SyscallSucceeds()); + + // If fstatat follows the fd symlink, the device and inode numbers should + // match at a minimum. + EXPECT_EQ(sproc.st_dev, sfile.st_dev); + EXPECT_EQ(sproc.st_ino, sfile.st_ino); + EXPECT_EQ(0, memcmp(&sfile, &sproc, sizeof(sfile))); +} + +TEST(ProcFilesystems, Bug65172365) { + std::string proc_filesystems = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/filesystems")); + ASSERT_FALSE(proc_filesystems.empty()); +} + +TEST(ProcFilesystems, PresenceOfShmMaxMniAll) { + uint64_t shmmax = 0; + uint64_t shmall = 0; + uint64_t shmmni = 0; + std::string proc_file; + proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmax")); + ASSERT_FALSE(proc_file.empty()); + ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmax)); + proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmall")); + ASSERT_FALSE(proc_file.empty()); + ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmall)); + proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmni")); + ASSERT_FALSE(proc_file.empty()); + ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmni)); + + ASSERT_GT(shmmax, 0); + ASSERT_GT(shmall, 0); + ASSERT_GT(shmmni, 0); + ASSERT_LE(shmall, shmmax); + + // These values should never be higher than this by default, for more + // information see uapi/linux/shm.h + ASSERT_LE(shmmax, ULONG_MAX - (1UL << 24)); + ASSERT_LE(shmall, ULONG_MAX - (1UL << 24)); +} + +// Check that /proc/mounts is a symlink to self/mounts. +TEST(ProcMounts, IsSymlink) { + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/mounts")); + EXPECT_EQ(link, "self/mounts"); +} + +TEST(ProcSelfMountinfo, RequiredFieldsArePresent) { + auto mountinfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mountinfo")); + EXPECT_THAT( + mountinfo, + AllOf( + // Root mount. + ContainsRegex( + R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ /\S* / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"), + // Proc mount - always rw. + ContainsRegex( + R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)"))); +} + +// Check that /proc/self/mounts looks something like a real mounts file. +TEST(ProcSelfMounts, RequiredFieldsArePresent) { + auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts")); + EXPECT_THAT(mounts, + AllOf( + // Root mount. + ContainsRegex(R"(\S+ / \S+ (rw|ro)\S* [0-9]+ [0-9]+\s)"), + // Root mount. + ContainsRegex(R"(\S+ /proc \S+ rw\S* [0-9]+ [0-9]+\s)"))); +} + +void CheckDuplicatesRecursively(std::string path) { + std::vector<std::string> child_dirs; + + // There is the known issue of the linux procfs, that two consequent calls of + // readdir can return the same entry twice if between these calls one or more + // entries have been removed from this directory. + int max_attempts = 5; + for (int i = 0; i < max_attempts; i++) { + child_dirs.clear(); + errno = 0; + bool success = true; + DIR* dir = opendir(path.c_str()); + if (dir == nullptr) { + // Ignore any directories we can't read or missing directories as the + // directory could have been deleted/mutated from the time the parent + // directory contents were read. + return; + } + auto dir_closer = Cleanup([&dir]() { closedir(dir); }); + std::unordered_set<std::string> children; + while (true) { + // Readdir(3): If the end of the directory stream is reached, NULL is + // returned and errno is not changed. If an error occurs, NULL is + // returned and errno is set appropriately. To distinguish end of stream + // and from an error, set errno to zero before calling readdir() and then + // check the value of errno if NULL is returned. + errno = 0; + struct dirent* dp = readdir(dir); + if (dp == nullptr) { + // Linux will return EINVAL when calling getdents on a /proc/tid/net + // file corresponding to a zombie task. + // See fs/proc/proc_net.c:proc_tgid_net_readdir(). + // + // We just ignore the directory in this case. + if (errno == EINVAL && absl::StartsWith(path, "/proc/") && + absl::EndsWith(path, "/net")) { + break; + } + + // Otherwise, no errors are allowed. + ASSERT_EQ(errno, 0) << path; + break; // We're done. + } + + const std::string name = dp->d_name; + + if (name == "." || name == "..") { + continue; + } + + // Ignore a duplicate entry if it isn't the last attempt. + if (i == max_attempts - 1) { + ASSERT_EQ(children.find(name), children.end()) + << absl::StrCat(path, "/", name); + } else if (children.find(name) != children.end()) { + std::cerr << "Duplicate entry: " << i << ":" + << absl::StrCat(path, "/", name) << std::endl; + success = false; + break; + } + children.insert(name); + + if (dp->d_type == DT_DIR) { + child_dirs.push_back(name); + } + } + if (success) { + break; + } + } + for (auto dname = child_dirs.begin(); dname != child_dirs.end(); dname++) { + CheckDuplicatesRecursively(absl::StrCat(path, "/", *dname)); + } +} + +TEST(Proc, NoDuplicates) { CheckDuplicatesRecursively("/proc"); } + +// Most /proc/PID files are owned by the task user with SUID_DUMP_USER. +TEST(ProcPid, UserDumpableOwner) { + int before; + ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); + auto cleanup = Cleanup([before] { + ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); + }); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds()); + + // This applies to the task directory itself and files inside. + struct stat st; + ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); + EXPECT_EQ(st.st_uid, geteuid()); + EXPECT_EQ(st.st_gid, getegid()); + + ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); + EXPECT_EQ(st.st_uid, geteuid()); + EXPECT_EQ(st.st_gid, getegid()); +} + +// /proc/PID files are owned by root with SUID_DUMP_DISABLE. +TEST(ProcPid, RootDumpableOwner) { + int before; + ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); + auto cleanup = Cleanup([before] { + ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); + }); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds()); + + // This *does not* applies to the task directory itself (or other 0555 + // directories), but does to files inside. + struct stat st; + ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); + EXPECT_EQ(st.st_uid, geteuid()); + EXPECT_EQ(st.st_gid, getegid()); + + // This file is owned by root. Also allow nobody in case this test is running + // in a userns without root mapped. + ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); + EXPECT_THAT(st.st_uid, AnyOf(Eq(0), Eq(65534))); + EXPECT_THAT(st.st_gid, AnyOf(Eq(0), Eq(65534))); +} + +TEST(Proc, GetdentsEnoent) { + FileDescriptor fd; + ASSERT_NO_ERRNO(WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(absl::StrCat("/proc/", pid, "/task"), + O_RDONLY | O_DIRECTORY)); + + return NoError(); + }, + nullptr, nullptr)); + char buf[1024]; + ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)), + SyscallFailsWithErrno(ENOENT)); +} + +void CheckSyscwFromIOFile(const std::string& path, const std::string& regex) { + std::string output; + ASSERT_NO_ERRNO(GetContents(path, &output)); + ASSERT_THAT(output, ContainsRegex(absl::StrCat("syscw:\\s+", regex, "\n"))); +} + +// Checks that there is variable accounting of IO between threads/tasks. +TEST(Proc, PidTidIOAccounting) { + absl::Notification notification; + + // Run a thread with a bunch of writes. Check that io account records exactly + // the number of write calls. File open/close is there to prevent buffering. + ScopedThread writer([¬ification] { + const int num_writes = 100; + for (int i = 0; i < num_writes; i++) { + auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_NO_ERRNO(SetContents(path.path(), "a")); + } + notification.Notify(); + const std::string& writer_dir = + absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); + + CheckSyscwFromIOFile(writer_dir, std::to_string(num_writes)); + }); + + // Run a thread and do no writes. Check that no writes are recorded. + ScopedThread noop([¬ification] { + notification.WaitForNotification(); + const std::string& noop_dir = + absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); + + CheckSyscwFromIOFile(noop_dir, "0"); + }); + + writer.Join(); + noop.Join(); +} + +} // namespace +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + for (int i = 0; i < argc; ++i) { + gvisor::testing::saved_argv.emplace_back(std::string(argv[i])); + } + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc new file mode 100644 index 000000000..3377b65cf --- /dev/null +++ b/test/syscalls/linux/proc_net.cc @@ -0,0 +1,482 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <errno.h> +#include <netinet/in.h> +#include <poll.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <sys/types.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr const char kProcNet[] = "/proc/net"; + +TEST(ProcNetSymlinkTarget, FileMode) { + struct stat s; + ASSERT_THAT(stat(kProcNet, &s), SyscallSucceeds()); + EXPECT_EQ(s.st_mode & S_IFMT, S_IFDIR); + EXPECT_EQ(s.st_mode & 0777, 0555); +} + +TEST(ProcNetSymlink, FileMode) { + struct stat s; + ASSERT_THAT(lstat(kProcNet, &s), SyscallSucceeds()); + EXPECT_EQ(s.st_mode & S_IFMT, S_IFLNK); + EXPECT_EQ(s.st_mode & 0777, 0777); +} + +TEST(ProcNetSymlink, Contents) { + char buf[40] = {}; + int n = readlink(kProcNet, buf, sizeof(buf)); + ASSERT_THAT(n, SyscallSucceeds()); + + buf[n] = 0; + EXPECT_STREQ(buf, "self/net"); +} + +TEST(ProcNetIfInet6, Format) { + auto ifinet6 = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/if_inet6")); + EXPECT_THAT(ifinet6, + ::testing::MatchesRegex( + // Ex: "00000000000000000000000000000001 01 80 10 80 lo\n" + "^([a-f0-9]{32}( [a-f0-9]{2}){4} +[a-z][a-z0-9]*\n)+$")); +} + +TEST(ProcSysNetIpv4Sack, Exists) { + EXPECT_THAT(open("/proc/sys/net/ipv4/tcp_sack", O_RDONLY), SyscallSucceeds()); +} + +TEST(ProcSysNetIpv4Sack, CanReadAndWrite) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_DAC_OVERRIDE)))); + + auto const fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/sys/net/ipv4/tcp_sack", O_RDWR)); + + char buf; + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_TRUE(buf == '0' || buf == '1') << "unexpected tcp_sack: " << buf; + + char to_write = (buf == '1') ? '0' : '1'; + EXPECT_THAT(PwriteFd(fd.get(), &to_write, sizeof(to_write), 0), + SyscallSucceedsWithValue(sizeof(to_write))); + + buf = 0; + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); + EXPECT_EQ(buf, to_write); +} + +// DeviceEntry is an entry in /proc/net/dev +struct DeviceEntry { + std::string name; + uint64_t stats[16]; +}; + +PosixErrorOr<std::vector<DeviceEntry>> GetDeviceMetricsFromProc( + const std::string dev) { + std::vector<std::string> lines = absl::StrSplit(dev, '\n'); + std::vector<DeviceEntry> entries; + + // /proc/net/dev prints 2 lines of headers followed by a line of metrics for + // each network interface. + for (unsigned i = 2; i < lines.size(); i++) { + // Ignore empty lines. + if (lines[i].empty()) { + continue; + } + + std::vector<std::string> values = + absl::StrSplit(lines[i], ' ', absl::SkipWhitespace()); + + // Interface name + 16 values. + if (values.size() != 17) { + return PosixError(EINVAL, "invalid line: " + lines[i]); + } + + DeviceEntry entry; + entry.name = values[0]; + // Skip the interface name and read only the values. + for (unsigned j = 1; j < 17; j++) { + uint64_t num; + if (!absl::SimpleAtoi(values[j], &num)) { + return PosixError(EINVAL, "invalid value: " + values[j]); + } + entry.stats[j - 1] = num; + } + + entries.push_back(entry); + } + + return entries; +} + +// TEST(ProcNetDev, Format) tests that /proc/net/dev is parsable and +// contains at least one entry. +TEST(ProcNetDev, Format) { + auto dev = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/dev")); + auto entries = ASSERT_NO_ERRNO_AND_VALUE(GetDeviceMetricsFromProc(dev)); + + EXPECT_GT(entries.size(), 0); +} + +PosixErrorOr<uint64_t> GetSNMPMetricFromProc(const std::string snmp, + const std::string& type, + const std::string& item) { + std::vector<std::string> snmp_vec = absl::StrSplit(snmp, '\n'); + + // /proc/net/snmp prints a line of headers followed by a line of metrics. + // Only search the headers. + for (unsigned i = 0; i < snmp_vec.size(); i = i + 2) { + if (!absl::StartsWith(snmp_vec[i], type)) continue; + + std::vector<std::string> fields = + absl::StrSplit(snmp_vec[i], ' ', absl::SkipWhitespace()); + + EXPECT_TRUE((i + 1) < snmp_vec.size()); + std::vector<std::string> values = + absl::StrSplit(snmp_vec[i + 1], ' ', absl::SkipWhitespace()); + + EXPECT_TRUE(!fields.empty() && fields.size() == values.size()); + + // Metrics start at the first index. + for (unsigned j = 1; j < fields.size(); j++) { + if (fields[j] == item) { + uint64_t val; + if (!absl::SimpleAtoi(values[j], &val)) { + return PosixError(EINVAL, + absl::StrCat("field is not a number: ", values[j])); + } + + return val; + } + } + } + // We should never get here. + return PosixError( + EINVAL, absl::StrCat("failed to find ", type, "/", item, " in:", snmp)); +} + +TEST(ProcNetSnmp, TcpReset_NoRandomSave) { + // TODO(gvisor.dev/issue/866): epsocket metrics are not savable. + DisableSave ds; + + uint64_t oldAttemptFails; + uint64_t oldActiveOpens; + uint64_t oldOutRsts; + auto snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + oldActiveOpens = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "ActiveOpens")); + oldOutRsts = + ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Tcp", "OutRsts")); + oldAttemptFails = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "AttemptFails")); + + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, 0)); + + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_port = htons(1234), + }; + + ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1); + ASSERT_THAT(connect(s.get(), (struct sockaddr*)&sin, sizeof(sin)), + SyscallFailsWithErrno(ECONNREFUSED)); + + uint64_t newAttemptFails; + uint64_t newActiveOpens; + uint64_t newOutRsts; + snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + newActiveOpens = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "ActiveOpens")); + newOutRsts = + ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Tcp", "OutRsts")); + newAttemptFails = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "AttemptFails")); + + EXPECT_EQ(oldActiveOpens, newActiveOpens - 1); + EXPECT_EQ(oldOutRsts, newOutRsts - 1); + EXPECT_EQ(oldAttemptFails, newAttemptFails - 1); +} + +TEST(ProcNetSnmp, TcpEstab_NoRandomSave) { + // TODO(gvisor.dev/issue/866): epsocket metrics are not savable. + DisableSave ds; + + uint64_t oldEstabResets; + uint64_t oldActiveOpens; + uint64_t oldPassiveOpens; + uint64_t oldCurrEstab; + auto snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + oldActiveOpens = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "ActiveOpens")); + oldPassiveOpens = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "PassiveOpens")); + oldCurrEstab = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "CurrEstab")); + oldEstabResets = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "EstabResets")); + + FileDescriptor s_listen = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, 0)); + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_port = 0, + }; + + ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1); + ASSERT_THAT(bind(s_listen.get(), (struct sockaddr*)&sin, sizeof(sin)), + SyscallSucceeds()); + ASSERT_THAT(listen(s_listen.get(), 1), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = sizeof(sin); + ASSERT_THAT( + getsockname(s_listen.get(), reinterpret_cast<sockaddr*>(&sin), &addrlen), + SyscallSucceeds()); + + FileDescriptor s_connect = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, 0)); + ASSERT_THAT(connect(s_connect.get(), (struct sockaddr*)&sin, sizeof(sin)), + SyscallSucceeds()); + + auto s_accept = + ASSERT_NO_ERRNO_AND_VALUE(Accept(s_listen.get(), nullptr, nullptr)); + + uint64_t newEstabResets; + uint64_t newActiveOpens; + uint64_t newPassiveOpens; + uint64_t newCurrEstab; + snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + newActiveOpens = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "ActiveOpens")); + newPassiveOpens = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "PassiveOpens")); + newCurrEstab = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "CurrEstab")); + + EXPECT_EQ(oldActiveOpens, newActiveOpens - 1); + EXPECT_EQ(oldPassiveOpens, newPassiveOpens - 1); + EXPECT_EQ(oldCurrEstab, newCurrEstab - 2); + + // Send 1 byte from client to server. + ASSERT_THAT(send(s_connect.get(), "a", 1, 0), SyscallSucceedsWithValue(1)); + + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + + // Wait until server-side fd sees the data on its side but don't read it. + struct pollfd poll_fd = {s_accept.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now close server-side fd without reading the data which leads to a RST + // packet sent to client side. + s_accept.reset(-1); + + // Wait until client-side fd sees RST packet. + struct pollfd poll_fd1 = {s_connect.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd1, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now close client-side fd. + s_connect.reset(-1); + + // Wait until the process of the netstack. + absl::SleepFor(absl::Seconds(1)); + + snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + newCurrEstab = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "CurrEstab")); + newEstabResets = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Tcp", "EstabResets")); + + EXPECT_EQ(oldCurrEstab, newCurrEstab); + EXPECT_EQ(oldEstabResets, newEstabResets - 2); +} + +TEST(ProcNetSnmp, UdpNoPorts_NoRandomSave) { + // TODO(gvisor.dev/issue/866): epsocket metrics are not savable. + DisableSave ds; + + uint64_t oldOutDatagrams; + uint64_t oldNoPorts; + auto snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + oldOutDatagrams = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Udp", "OutDatagrams")); + oldNoPorts = + ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Udp", "NoPorts")); + + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_port = htons(4444), + }; + ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1); + ASSERT_THAT(sendto(s.get(), "a", 1, 0, (struct sockaddr*)&sin, sizeof(sin)), + SyscallSucceedsWithValue(1)); + + uint64_t newOutDatagrams; + uint64_t newNoPorts; + snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + newOutDatagrams = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Udp", "OutDatagrams")); + newNoPorts = + ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Udp", "NoPorts")); + + EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1); + EXPECT_EQ(oldNoPorts, newNoPorts - 1); +} + +TEST(ProcNetSnmp, UdpIn_NoRandomSave) { + // TODO(gvisor.dev/issue/866): epsocket metrics are not savable. + const DisableSave ds; + + uint64_t oldOutDatagrams; + uint64_t oldInDatagrams; + auto snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + oldOutDatagrams = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Udp", "OutDatagrams")); + oldInDatagrams = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Udp", "InDatagrams")); + + std::cerr << "snmp: " << std::endl << snmp << std::endl; + FileDescriptor server = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_port = htons(0), + }; + ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1); + ASSERT_THAT(bind(server.get(), (struct sockaddr*)&sin, sizeof(sin)), + SyscallSucceeds()); + // Get the port bound by the server socket. + socklen_t addrlen = sizeof(sin); + ASSERT_THAT( + getsockname(server.get(), reinterpret_cast<sockaddr*>(&sin), &addrlen), + SyscallSucceeds()); + + FileDescriptor client = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + ASSERT_THAT( + sendto(client.get(), "a", 1, 0, (struct sockaddr*)&sin, sizeof(sin)), + SyscallSucceedsWithValue(1)); + + char buf[128]; + ASSERT_THAT(recvfrom(server.get(), buf, sizeof(buf), 0, NULL, NULL), + SyscallSucceedsWithValue(1)); + + uint64_t newOutDatagrams; + uint64_t newInDatagrams; + snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + std::cerr << "new snmp: " << std::endl << snmp << std::endl; + newOutDatagrams = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Udp", "OutDatagrams")); + newInDatagrams = ASSERT_NO_ERRNO_AND_VALUE( + GetSNMPMetricFromProc(snmp, "Udp", "InDatagrams")); + + EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1); + EXPECT_EQ(oldInDatagrams, newInDatagrams - 1); +} + +TEST(ProcNetSnmp, CheckNetStat) { + // TODO(b/155123175): SNMP and netstat don't work on gVisor. + SKIP_IF(IsRunningOnGvisor()); + + std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/netstat")); + + int name_count = 0; + int value_count = 0; + std::vector<absl::string_view> lines = absl::StrSplit(contents, '\n'); + for (int i = 0; i + 1 < lines.size(); i += 2) { + std::vector<absl::string_view> names = + absl::StrSplit(lines[i], absl::ByAnyChar("\t ")); + std::vector<absl::string_view> values = + absl::StrSplit(lines[i + 1], absl::ByAnyChar("\t ")); + EXPECT_EQ(names.size(), values.size()) << " mismatch in lines '" << lines[i] + << "' and '" << lines[i + 1] << "'"; + for (int j = 0; j < names.size() && j < values.size(); ++j) { + if (names[j] == "TCPOrigDataSent" || names[j] == "TCPSynRetrans" || + names[j] == "TCPDSACKRecv" || names[j] == "TCPDSACKOfoRecv") { + ++name_count; + int64_t val; + if (absl::SimpleAtoi(values[j], &val)) { + ++value_count; + } + } + } + } + EXPECT_EQ(name_count, 4); + EXPECT_EQ(value_count, 4); +} + +TEST(ProcNetSnmp, Stat) { + struct stat st = {}; + ASSERT_THAT(stat("/proc/net/snmp", &st), SyscallSucceeds()); +} + +TEST(ProcNetSnmp, CheckSnmp) { + // TODO(b/155123175): SNMP and netstat don't work on gVisor. + SKIP_IF(IsRunningOnGvisor()); + + std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + + int name_count = 0; + int value_count = 0; + std::vector<absl::string_view> lines = absl::StrSplit(contents, '\n'); + for (int i = 0; i + 1 < lines.size(); i += 2) { + std::vector<absl::string_view> names = + absl::StrSplit(lines[i], absl::ByAnyChar("\t ")); + std::vector<absl::string_view> values = + absl::StrSplit(lines[i + 1], absl::ByAnyChar("\t ")); + EXPECT_EQ(names.size(), values.size()) << " mismatch in lines '" << lines[i] + << "' and '" << lines[i + 1] << "'"; + for (int j = 0; j < names.size() && j < values.size(); ++j) { + if (names[j] == "RetransSegs") { + ++name_count; + int64_t val; + if (absl::SimpleAtoi(values[j], &val)) { + ++value_count; + } + } + } + } + EXPECT_EQ(name_count, 1); + EXPECT_EQ(value_count, 1); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/proc_net_tcp.cc b/test/syscalls/linux/proc_net_tcp.cc new file mode 100644 index 000000000..5b6e3e3cd --- /dev/null +++ b/test/syscalls/linux/proc_net_tcp.cc @@ -0,0 +1,496 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/tcp.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using absl::StrCat; +using absl::StrSplit; + +constexpr char kProcNetTCPHeader[] = + " sl local_address rem_address st tx_queue rx_queue tr tm->when " + "retrnsmt uid timeout inode " + " "; + +// TCPEntry represents a single entry from /proc/net/tcp. +struct TCPEntry { + uint32_t local_addr; + uint16_t local_port; + + uint32_t remote_addr; + uint16_t remote_port; + + uint64_t state; + uint64_t uid; + uint64_t inode; +}; + +// Finds the first entry in 'entries' for which 'predicate' returns true. +// Returns true on match, and sets 'match' to a copy of the matching entry. If +// 'match' is null, it's ignored. +bool FindBy(const std::vector<TCPEntry>& entries, TCPEntry* match, + std::function<bool(const TCPEntry&)> predicate) { + for (const TCPEntry& entry : entries) { + if (predicate(entry)) { + if (match != nullptr) { + *match = entry; + } + return true; + } + } + return false; +} + +bool FindByLocalAddr(const std::vector<TCPEntry>& entries, TCPEntry* match, + const struct sockaddr* addr) { + uint32_t host = IPFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); + return FindBy(entries, match, [host, port](const TCPEntry& e) { + return (e.local_addr == host && e.local_port == port); + }); +} + +bool FindByRemoteAddr(const std::vector<TCPEntry>& entries, TCPEntry* match, + const struct sockaddr* addr) { + uint32_t host = IPFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); + return FindBy(entries, match, [host, port](const TCPEntry& e) { + return (e.remote_addr == host && e.remote_port == port); + }); +} + +// Returns a parsed representation of /proc/net/tcp entries. +PosixErrorOr<std::vector<TCPEntry>> ProcNetTCPEntries() { + std::string content; + RETURN_IF_ERRNO(GetContents("/proc/net/tcp", &content)); + + bool found_header = false; + std::vector<TCPEntry> entries; + std::vector<std::string> lines = StrSplit(content, '\n'); + std::cerr << "<contents of /proc/net/tcp>" << std::endl; + for (const std::string& line : lines) { + std::cerr << line << std::endl; + + if (!found_header) { + EXPECT_EQ(line, kProcNetTCPHeader); + found_header = true; + continue; + } + if (line.empty()) { + continue; + } + + // Parse a single entry from /proc/net/tcp. + // + // Example entries: + // + // clang-format off + // + // sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + // 0: 00000000:006F 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 1968 1 0000000000000000 100 0 0 10 0 + // 1: 0100007F:7533 00000000:0000 0A 00000000:00000000 00:00000000 00000000 120 0 10684 1 0000000000000000 100 0 0 10 0 + // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + // + // clang-format on + + TCPEntry entry; + std::vector<std::string> fields = + StrSplit(line, absl::ByAnyChar(": "), absl::SkipEmpty()); + + ASSIGN_OR_RETURN_ERRNO(entry.local_addr, AtoiBase(fields[1], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.local_port, AtoiBase(fields[2], 16)); + + ASSIGN_OR_RETURN_ERRNO(entry.remote_addr, AtoiBase(fields[3], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16)); + + ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi<uint64_t>(fields[11])); + ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi<uint64_t>(fields[13])); + + entries.push_back(entry); + } + std::cerr << "<end of /proc/net/tcp>" << std::endl; + + return entries; +} + +TEST(ProcNetTCP, Exists) { + const std::string content = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/tcp")); + const std::string header_line = StrCat(kProcNetTCPHeader, "\n"); + if (IsRunningOnGvisor()) { + // Should be just the header since we don't have any tcp sockets yet. + EXPECT_EQ(content, header_line); + } else { + // On a general linux machine, we could have abitrary sockets on the system, + // so just check the header. + EXPECT_THAT(content, ::testing::StartsWith(header_line)); + } +} + +TEST(ProcNetTCP, EntryUID) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create()); + std::vector<TCPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + TCPEntry e; + ASSERT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr())); + EXPECT_EQ(e.uid, geteuid()); + ASSERT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr())); + EXPECT_EQ(e.uid, geteuid()); +} + +TEST(ProcNetTCP, BindAcceptConnect) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create()); + std::vector<TCPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + // We can only make assertions about the total number of entries if we control + // the entire "machine". + if (IsRunningOnGvisor()) { + EXPECT_EQ(entries.size(), 2); + } + + EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->first_addr())); + EXPECT_TRUE(FindByRemoteAddr(entries, nullptr, sockets->first_addr())); +} + +TEST(ProcNetTCP, InodeReasonable) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create()); + std::vector<TCPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + + TCPEntry accepted_entry; + ASSERT_TRUE(FindByLocalAddr(entries, &accepted_entry, sockets->first_addr())); + EXPECT_NE(accepted_entry.inode, 0); + + TCPEntry client_entry; + ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, sockets->first_addr())); + EXPECT_NE(client_entry.inode, 0); + EXPECT_NE(accepted_entry.inode, client_entry.inode); +} + +TEST(ProcNetTCP, State) { + std::unique_ptr<FileDescriptor> server = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create()); + + auto test_addr = V4Loopback(); + ASSERT_THAT( + bind(server->get(), reinterpret_cast<struct sockaddr*>(&test_addr.addr), + test_addr.addr_len), + SyscallSucceeds()); + + struct sockaddr addr; + socklen_t addrlen = sizeof(struct sockaddr); + ASSERT_THAT(getsockname(server->get(), &addr, &addrlen), SyscallSucceeds()); + ASSERT_EQ(addrlen, sizeof(struct sockaddr)); + + ASSERT_THAT(listen(server->get(), 10), SyscallSucceeds()); + std::vector<TCPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + TCPEntry listen_entry; + ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr)); + EXPECT_EQ(listen_entry.state, TCP_LISTEN); + + std::unique_ptr<FileDescriptor> client = + ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create()); + ASSERT_THAT(RetryEINTR(connect)(client->get(), &addr, addrlen), + SyscallSucceeds()); + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr)); + EXPECT_EQ(listen_entry.state, TCP_LISTEN); + TCPEntry client_entry; + ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, &addr)); + EXPECT_EQ(client_entry.state, TCP_ESTABLISHED); + + FileDescriptor accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(server->get(), nullptr, nullptr)); + + const uint32_t accepted_local_host = IPFromInetSockaddr(&addr); + const uint16_t accepted_local_port = PortFromInetSockaddr(&addr); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); + TCPEntry accepted_entry; + ASSERT_TRUE(FindBy(entries, &accepted_entry, + [client_entry, accepted_local_host, + accepted_local_port](const TCPEntry& e) { + return e.local_addr == accepted_local_host && + e.local_port == accepted_local_port && + e.remote_addr == client_entry.local_addr && + e.remote_port == client_entry.local_port; + })); + EXPECT_EQ(accepted_entry.state, TCP_ESTABLISHED); +} + +constexpr char kProcNetTCP6Header[] = + " sl local_address remote_address" + " st tx_queue rx_queue tr tm->when retrnsmt" + " uid timeout inode"; + +// TCP6Entry represents a single entry from /proc/net/tcp6. +struct TCP6Entry { + struct in6_addr local_addr; + uint16_t local_port; + + struct in6_addr remote_addr; + uint16_t remote_port; + + uint64_t state; + uint64_t uid; + uint64_t inode; +}; + +bool IPv6AddrEqual(const struct in6_addr* a1, const struct in6_addr* a2) { + return memcmp(a1, a2, sizeof(struct in6_addr)) == 0; +} + +// Finds the first entry in 'entries' for which 'predicate' returns true. +// Returns true on match, and sets 'match' to a copy of the matching entry. If +// 'match' is null, it's ignored. +bool FindBy6(const std::vector<TCP6Entry>& entries, TCP6Entry* match, + std::function<bool(const TCP6Entry&)> predicate) { + for (const TCP6Entry& entry : entries) { + if (predicate(entry)) { + if (match != nullptr) { + *match = entry; + } + return true; + } + } + return false; +} + +const struct in6_addr* IP6FromInetSockaddr(const struct sockaddr* addr) { + auto* addr6 = reinterpret_cast<const struct sockaddr_in6*>(addr); + return &addr6->sin6_addr; +} + +bool FindByLocalAddr6(const std::vector<TCP6Entry>& entries, TCP6Entry* match, + const struct sockaddr* addr) { + const struct in6_addr* local = IP6FromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); + return FindBy6(entries, match, [local, port](const TCP6Entry& e) { + return (IPv6AddrEqual(&e.local_addr, local) && e.local_port == port); + }); +} + +bool FindByRemoteAddr6(const std::vector<TCP6Entry>& entries, TCP6Entry* match, + const struct sockaddr* addr) { + const struct in6_addr* remote = IP6FromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); + return FindBy6(entries, match, [remote, port](const TCP6Entry& e) { + return (IPv6AddrEqual(&e.remote_addr, remote) && e.remote_port == port); + }); +} + +void ReadIPv6Address(std::string s, struct in6_addr* addr) { + uint32_t a0, a1, a2, a3; + const char* fmt = "%08X%08X%08X%08X"; + EXPECT_EQ(sscanf(s.c_str(), fmt, &a0, &a1, &a2, &a3), 4); + + uint8_t* b = addr->s6_addr; + *((uint32_t*)&b[0]) = a0; + *((uint32_t*)&b[4]) = a1; + *((uint32_t*)&b[8]) = a2; + *((uint32_t*)&b[12]) = a3; +} + +// Returns a parsed representation of /proc/net/tcp6 entries. +PosixErrorOr<std::vector<TCP6Entry>> ProcNetTCP6Entries() { + std::string content; + RETURN_IF_ERRNO(GetContents("/proc/net/tcp6", &content)); + + bool found_header = false; + std::vector<TCP6Entry> entries; + std::vector<std::string> lines = StrSplit(content, '\n'); + std::cerr << "<contents of /proc/net/tcp6>" << std::endl; + for (const std::string& line : lines) { + std::cerr << line << std::endl; + + if (!found_header) { + EXPECT_EQ(line, kProcNetTCP6Header); + found_header = true; + continue; + } + if (line.empty()) { + continue; + } + + // Parse a single entry from /proc/net/tcp6. + // + // Example entries: + // + // clang-format off + // + // sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + // 0: 00000000000000000000000000000000:1F90 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 876340 1 ffff8803da9c9380 100 0 0 10 0 + // 1: 00000000000000000000000000000000:C350 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 876987 1 ffff8803ec408000 100 0 0 10 0 + // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + // + // clang-format on + + TCP6Entry entry; + std::vector<std::string> fields = + StrSplit(line, absl::ByAnyChar(": "), absl::SkipEmpty()); + + ReadIPv6Address(fields[1], &entry.local_addr); + ASSIGN_OR_RETURN_ERRNO(entry.local_port, AtoiBase(fields[2], 16)); + ReadIPv6Address(fields[3], &entry.remote_addr); + ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi<uint64_t>(fields[11])); + ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi<uint64_t>(fields[13])); + + entries.push_back(entry); + } + std::cerr << "<end of /proc/net/tcp6>" << std::endl; + + return entries; +} + +TEST(ProcNetTCP6, Exists) { + const std::string content = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/tcp6")); + const std::string header_line = StrCat(kProcNetTCP6Header, "\n"); + if (IsRunningOnGvisor()) { + // Should be just the header since we don't have any tcp sockets yet. + EXPECT_EQ(content, header_line); + } else { + // On a general linux machine, we could have abitrary sockets on the system, + // so just check the header. + EXPECT_THAT(content, ::testing::StartsWith(header_line)); + } +} + +TEST(ProcNetTCP6, EntryUID) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPAcceptBindSocketPair(0).Create()); + std::vector<TCP6Entry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries()); + TCP6Entry e; + + ASSERT_TRUE(FindByLocalAddr6(entries, &e, sockets->first_addr())); + EXPECT_EQ(e.uid, geteuid()); + ASSERT_TRUE(FindByRemoteAddr6(entries, &e, sockets->first_addr())); + EXPECT_EQ(e.uid, geteuid()); +} + +TEST(ProcNetTCP6, BindAcceptConnect) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPAcceptBindSocketPair(0).Create()); + std::vector<TCP6Entry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries()); + // We can only make assertions about the total number of entries if we control + // the entire "machine". + if (IsRunningOnGvisor()) { + EXPECT_EQ(entries.size(), 2); + } + + EXPECT_TRUE(FindByLocalAddr6(entries, nullptr, sockets->first_addr())); + EXPECT_TRUE(FindByRemoteAddr6(entries, nullptr, sockets->first_addr())); +} + +TEST(ProcNetTCP6, InodeReasonable) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPAcceptBindSocketPair(0).Create()); + std::vector<TCP6Entry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries()); + + TCP6Entry accepted_entry; + + ASSERT_TRUE( + FindByLocalAddr6(entries, &accepted_entry, sockets->first_addr())); + EXPECT_NE(accepted_entry.inode, 0); + + TCP6Entry client_entry; + ASSERT_TRUE(FindByRemoteAddr6(entries, &client_entry, sockets->first_addr())); + EXPECT_NE(client_entry.inode, 0); + EXPECT_NE(accepted_entry.inode, client_entry.inode); +} + +TEST(ProcNetTCP6, State) { + std::unique_ptr<FileDescriptor> server = + ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPUnboundSocket(0).Create()); + + auto test_addr = V6Loopback(); + ASSERT_THAT( + bind(server->get(), reinterpret_cast<struct sockaddr*>(&test_addr.addr), + test_addr.addr_len), + SyscallSucceeds()); + + struct sockaddr_in6 addr6; + socklen_t addrlen = sizeof(struct sockaddr_in6); + auto* addr = reinterpret_cast<struct sockaddr*>(&addr6); + ASSERT_THAT(getsockname(server->get(), addr, &addrlen), SyscallSucceeds()); + ASSERT_EQ(addrlen, sizeof(struct sockaddr_in6)); + + ASSERT_THAT(listen(server->get(), 10), SyscallSucceeds()); + std::vector<TCP6Entry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries()); + TCP6Entry listen_entry; + + ASSERT_TRUE(FindByLocalAddr6(entries, &listen_entry, addr)); + EXPECT_EQ(listen_entry.state, TCP_LISTEN); + + std::unique_ptr<FileDescriptor> client = + ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPUnboundSocket(0).Create()); + ASSERT_THAT(RetryEINTR(connect)(client->get(), addr, addrlen), + SyscallSucceeds()); + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries()); + ASSERT_TRUE(FindByLocalAddr6(entries, &listen_entry, addr)); + EXPECT_EQ(listen_entry.state, TCP_LISTEN); + TCP6Entry client_entry; + ASSERT_TRUE(FindByRemoteAddr6(entries, &client_entry, addr)); + EXPECT_EQ(client_entry.state, TCP_ESTABLISHED); + + FileDescriptor accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(server->get(), nullptr, nullptr)); + + const struct in6_addr* local = IP6FromInetSockaddr(addr); + const uint16_t accepted_local_port = PortFromInetSockaddr(addr); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries()); + TCP6Entry accepted_entry; + ASSERT_TRUE(FindBy6( + entries, &accepted_entry, + [client_entry, local, accepted_local_port](const TCP6Entry& e) { + return IPv6AddrEqual(&e.local_addr, local) && + e.local_port == accepted_local_port && + IPv6AddrEqual(&e.remote_addr, &client_entry.local_addr) && + e.remote_port == client_entry.local_port; + })); + EXPECT_EQ(accepted_entry.state, TCP_ESTABLISHED); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/proc_net_udp.cc b/test/syscalls/linux/proc_net_udp.cc new file mode 100644 index 000000000..786b4b4af --- /dev/null +++ b/test/syscalls/linux/proc_net_udp.cc @@ -0,0 +1,309 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/tcp.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using absl::StrCat; +using absl::StrFormat; +using absl::StrSplit; + +constexpr char kProcNetUDPHeader[] = + " sl local_address rem_address st tx_queue rx_queue tr tm->when " + "retrnsmt uid timeout inode ref pointer drops "; + +// UDPEntry represents a single entry from /proc/net/udp. +struct UDPEntry { + uint32_t local_addr; + uint16_t local_port; + + uint32_t remote_addr; + uint16_t remote_port; + + uint64_t state; + uint64_t uid; + uint64_t inode; +}; + +std::string DescribeFirstInetSocket(const SocketPair& sockets) { + const struct sockaddr* addr = sockets.first_addr(); + return StrFormat("First test socket: fd:%d %8X:%4X", sockets.first_fd(), + IPFromInetSockaddr(addr), PortFromInetSockaddr(addr)); +} + +std::string DescribeSecondInetSocket(const SocketPair& sockets) { + const struct sockaddr* addr = sockets.second_addr(); + return StrFormat("Second test socket fd:%d %8X:%4X", sockets.second_fd(), + IPFromInetSockaddr(addr), PortFromInetSockaddr(addr)); +} + +// Finds the first entry in 'entries' for which 'predicate' returns true. +// Returns true on match, and set 'match' to a copy of the matching entry. If +// 'match' is null, it's ignored. +bool FindBy(const std::vector<UDPEntry>& entries, UDPEntry* match, + std::function<bool(const UDPEntry&)> predicate) { + for (const UDPEntry& entry : entries) { + if (predicate(entry)) { + if (match != nullptr) { + *match = entry; + } + return true; + } + } + return false; +} + +bool FindByLocalAddr(const std::vector<UDPEntry>& entries, UDPEntry* match, + const struct sockaddr* addr) { + uint32_t host = IPFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); + return FindBy(entries, match, [host, port](const UDPEntry& e) { + return (e.local_addr == host && e.local_port == port); + }); +} + +bool FindByRemoteAddr(const std::vector<UDPEntry>& entries, UDPEntry* match, + const struct sockaddr* addr) { + uint32_t host = IPFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); + return FindBy(entries, match, [host, port](const UDPEntry& e) { + return (e.remote_addr == host && e.remote_port == port); + }); +} + +PosixErrorOr<uint64_t> InodeFromSocketFD(int fd) { + ASSIGN_OR_RETURN_ERRNO(struct stat s, Fstat(fd)); + if (!S_ISSOCK(s.st_mode)) { + return PosixError(EINVAL, StrFormat("FD %d is not a socket", fd)); + } + return s.st_ino; +} + +PosixErrorOr<bool> FindByFD(const std::vector<UDPEntry>& entries, + UDPEntry* match, int fd) { + ASSIGN_OR_RETURN_ERRNO(uint64_t inode, InodeFromSocketFD(fd)); + return FindBy(entries, match, + [inode](const UDPEntry& e) { return (e.inode == inode); }); +} + +// Returns a parsed representation of /proc/net/udp entries. +PosixErrorOr<std::vector<UDPEntry>> ProcNetUDPEntries() { + std::string content; + RETURN_IF_ERRNO(GetContents("/proc/net/udp", &content)); + + bool found_header = false; + std::vector<UDPEntry> entries; + std::vector<std::string> lines = StrSplit(content, '\n'); + std::cerr << "<contents of /proc/net/udp>" << std::endl; + for (const std::string& line : lines) { + std::cerr << line << std::endl; + + if (!found_header) { + EXPECT_EQ(line, kProcNetUDPHeader); + found_header = true; + continue; + } + if (line.empty()) { + continue; + } + + // Parse a single entry from /proc/net/udp. + // + // Example entries: + // + // clang-format off + // + // sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops + // 3503: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 00000000 0 0 33317 2 0000000000000000 0 + // 3518: 00000000:0044 00000000:0000 07 00000000:00000000 00:00000000 00000000 0 0 40394 2 0000000000000000 0 + // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + // + // clang-format on + + UDPEntry entry; + std::vector<std::string> fields = + StrSplit(line, absl::ByAnyChar(": "), absl::SkipEmpty()); + + ASSIGN_OR_RETURN_ERRNO(entry.local_addr, AtoiBase(fields[1], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.local_port, AtoiBase(fields[2], 16)); + + ASSIGN_OR_RETURN_ERRNO(entry.remote_addr, AtoiBase(fields[3], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16)); + + ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi<uint64_t>(fields[11])); + ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi<uint64_t>(fields[13])); + + // Linux shares internal data structures between TCP and UDP sockets. The + // proc entries for UDP sockets share some fields with TCP sockets, but + // these fields should always be zero as they're not meaningful for UDP + // sockets. + EXPECT_EQ(fields[8], "00") << StrFormat("sl:%s, tr", fields[0]); + EXPECT_EQ(fields[9], "00000000") << StrFormat("sl:%s, tm->when", fields[0]); + EXPECT_EQ(fields[10], "00000000") + << StrFormat("sl:%s, retrnsmt", fields[0]); + EXPECT_EQ(fields[12], "0") << StrFormat("sl:%s, timeout", fields[0]); + + entries.push_back(entry); + } + std::cerr << "<end of /proc/net/udp>" << std::endl; + + return entries; +} + +TEST(ProcNetUDP, Exists) { + const std::string content = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/udp")); + const std::string header_line = StrCat(kProcNetUDPHeader, "\n"); + EXPECT_THAT(content, ::testing::StartsWith(header_line)); +} + +TEST(ProcNetUDP, EntryUID) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4UDPBidirectionalBindSocketPair(0).Create()); + std::vector<UDPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries()); + UDPEntry e; + ASSERT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr())) + << DescribeFirstInetSocket(*sockets); + EXPECT_EQ(e.uid, geteuid()); + ASSERT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr())) + << DescribeSecondInetSocket(*sockets); + EXPECT_EQ(e.uid, geteuid()); +} + +TEST(ProcNetUDP, FindMutualEntries) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4UDPBidirectionalBindSocketPair(0).Create()); + std::vector<UDPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries()); + + EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->first_addr())) + << DescribeFirstInetSocket(*sockets); + EXPECT_TRUE(FindByRemoteAddr(entries, nullptr, sockets->first_addr())) + << DescribeSecondInetSocket(*sockets); + + EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->second_addr())) + << DescribeSecondInetSocket(*sockets); + EXPECT_TRUE(FindByRemoteAddr(entries, nullptr, sockets->second_addr())) + << DescribeFirstInetSocket(*sockets); +} + +TEST(ProcNetUDP, EntriesRemovedOnClose) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4UDPBidirectionalBindSocketPair(0).Create()); + std::vector<UDPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries()); + + EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->first_addr())) + << DescribeFirstInetSocket(*sockets); + EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->second_addr())) + << DescribeSecondInetSocket(*sockets); + + EXPECT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries()); + // First socket's entry should be gone, but the second socket's entry should + // still exist. + EXPECT_FALSE(FindByLocalAddr(entries, nullptr, sockets->first_addr())) + << DescribeFirstInetSocket(*sockets); + EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->second_addr())) + << DescribeSecondInetSocket(*sockets); + + EXPECT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries()); + // Both entries should be gone. + EXPECT_FALSE(FindByLocalAddr(entries, nullptr, sockets->first_addr())) + << DescribeFirstInetSocket(*sockets); + EXPECT_FALSE(FindByLocalAddr(entries, nullptr, sockets->second_addr())) + << DescribeSecondInetSocket(*sockets); +} + +PosixErrorOr<std::unique_ptr<FileDescriptor>> BoundUDPSocket() { + ASSIGN_OR_RETURN_ERRNO(std::unique_ptr<FileDescriptor> socket, + IPv4UDPUnboundSocket(0).Create()); + struct sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + int res = bind(socket->get(), reinterpret_cast<const struct sockaddr*>(&addr), + sizeof(addr)); + if (res) { + return PosixError(errno, "bind()"); + } + return socket; +} + +TEST(ProcNetUDP, BoundEntry) { + std::unique_ptr<FileDescriptor> socket = + ASSERT_NO_ERRNO_AND_VALUE(BoundUDPSocket()); + struct sockaddr addr; + socklen_t len = sizeof(addr); + ASSERT_THAT(getsockname(socket->get(), &addr, &len), SyscallSucceeds()); + uint16_t port = PortFromInetSockaddr(&addr); + + std::vector<UDPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries()); + UDPEntry e; + ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(FindByFD(entries, &e, socket->get()))); + EXPECT_EQ(e.local_port, port); + EXPECT_EQ(e.remote_addr, 0); + EXPECT_EQ(e.remote_port, 0); +} + +TEST(ProcNetUDP, BoundSocketStateClosed) { + std::unique_ptr<FileDescriptor> socket = + ASSERT_NO_ERRNO_AND_VALUE(BoundUDPSocket()); + std::vector<UDPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries()); + UDPEntry e; + ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(FindByFD(entries, &e, socket->get()))); + EXPECT_EQ(e.state, TCP_CLOSE); +} + +TEST(ProcNetUDP, ConnectedSocketStateEstablished) { + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(IPv4UDPBidirectionalBindSocketPair(0).Create()); + std::vector<UDPEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries()); + + UDPEntry e; + ASSERT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr())) + << DescribeFirstInetSocket(*sockets); + EXPECT_EQ(e.state, TCP_ESTABLISHED); + + ASSERT_TRUE(FindByLocalAddr(entries, &e, sockets->second_addr())) + << DescribeSecondInetSocket(*sockets); + EXPECT_EQ(e.state, TCP_ESTABLISHED); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/proc_net_unix.cc b/test/syscalls/linux/proc_net_unix.cc new file mode 100644 index 000000000..a63067586 --- /dev/null +++ b/test/syscalls/linux/proc_net_unix.cc @@ -0,0 +1,443 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using absl::StrCat; +using absl::StreamFormat; +using absl::StrFormat; + +constexpr char kProcNetUnixHeader[] = + "Num RefCount Protocol Flags Type St Inode Path"; + +// Possible values of the "st" field in a /proc/net/unix entry. Source: Linux +// kernel, include/uapi/linux/net.h. +enum { + SS_FREE = 0, // Not allocated + SS_UNCONNECTED, // Unconnected to any socket + SS_CONNECTING, // In process of connecting + SS_CONNECTED, // Connected to socket + SS_DISCONNECTING // In process of disconnecting +}; + +// UnixEntry represents a single entry from /proc/net/unix. +struct UnixEntry { + uintptr_t addr; + uint64_t refs; + uint64_t protocol; + uint64_t flags; + uint64_t type; + uint64_t state; + uint64_t inode; + std::string path; +}; + +// Abstract socket paths can have either trailing null bytes or '@'s as padding +// at the end, depending on the linux version. This function strips any such +// padding. +void StripAbstractPathPadding(std::string* s) { + const char pad_char = s->back(); + if (pad_char != '\0' && pad_char != '@') { + return; + } + + const auto last_pos = s->find_last_not_of(pad_char); + if (last_pos != std::string::npos) { + s->resize(last_pos + 1); + } +} + +// Precondition: addr must be a unix socket address (i.e. sockaddr_un) and +// addr->sun_path must be null-terminated. This is always the case if addr comes +// from Linux: +// +// Per man unix(7): +// +// "When the address of a pathname socket is returned (by [getsockname(2)]), its +// length is +// +// offsetof(struct sockaddr_un, sun_path) + strlen(sun_path) + 1 +// +// and sun_path contains the null-terminated pathname." +std::string ExtractPath(const struct sockaddr* addr) { + const char* path = + reinterpret_cast<const struct sockaddr_un*>(addr)->sun_path; + // Note: sockaddr_un.sun_path is an embedded character array of length + // UNIX_PATH_MAX, so we can always safely dereference the first 2 bytes below. + // + // We also rely on the path being null-terminated. + if (path[0] == 0) { + std::string abstract_path = StrCat("@", &path[1]); + StripAbstractPathPadding(&abstract_path); + return abstract_path; + } + return std::string(path); +} + +// Returns a parsed representation of /proc/net/unix entries. +PosixErrorOr<std::vector<UnixEntry>> ProcNetUnixEntries() { + std::string content; + RETURN_IF_ERRNO(GetContents("/proc/net/unix", &content)); + + bool skipped_header = false; + std::vector<UnixEntry> entries; + std::vector<std::string> lines = absl::StrSplit(content, '\n'); + std::cerr << "<contents of /proc/net/unix>" << std::endl; + for (const std::string& line : lines) { + // Emit the proc entry to the test output to provide context for the test + // results. + std::cerr << line << std::endl; + + if (!skipped_header) { + EXPECT_EQ(line, kProcNetUnixHeader); + skipped_header = true; + continue; + } + if (line.empty()) { + continue; + } + + // Parse a single entry from /proc/net/unix. + // + // Sample file: + // + // clang-format off + // + // Num RefCount Protocol Flags Type St Inode Path" + // ffffa130e7041c00: 00000002 00000000 00010000 0001 01 1299413685 /tmp/control_server/13293772586877554487 + // ffffa14f547dc400: 00000002 00000000 00010000 0001 01 3793 @remote_coredump + // + // clang-format on + // + // Note that from the second entry, the inode number can be padded using + // spaces, so we need to handle it separately during parsing. See + // net/unix/af_unix.c:unix_seq_show() for how these entries are produced. In + // particular, only the inode field is padded with spaces. + UnixEntry entry; + + // Process the first 6 fields, up to but not including "Inode". + std::vector<std::string> fields = + absl::StrSplit(line, absl::MaxSplits(' ', 6)); + + if (fields.size() < 7) { + return PosixError(EINVAL, StrFormat("Invalid entry: '%s'\n", line)); + } + + // AtoiBase can't handle the ':' in the "Num" field, so strip it out. + std::vector<std::string> addr = absl::StrSplit(fields[0], ':'); + ASSIGN_OR_RETURN_ERRNO(entry.addr, AtoiBase(addr[0], 16)); + + ASSIGN_OR_RETURN_ERRNO(entry.refs, AtoiBase(fields[1], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.protocol, AtoiBase(fields[2], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.flags, AtoiBase(fields[3], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.type, AtoiBase(fields[4], 16)); + ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16)); + + absl::string_view rest = absl::StripAsciiWhitespace(fields[6]); + fields = absl::StrSplit(rest, absl::MaxSplits(' ', 1)); + if (fields.empty()) { + return PosixError( + EINVAL, StrFormat("Invalid entry, missing 'Inode': '%s'\n", line)); + } + ASSIGN_OR_RETURN_ERRNO(entry.inode, AtoiBase(fields[0], 10)); + + entry.path = ""; + if (fields.size() > 1) { + entry.path = fields[1]; + StripAbstractPathPadding(&entry.path); + } + + entries.push_back(entry); + } + std::cerr << "<end of /proc/net/unix>" << std::endl; + + return entries; +} + +// Finds the first entry in 'entries' for which 'predicate' returns true. +// Returns true on match, and sets 'match' to point to the matching entry. +bool FindBy(std::vector<UnixEntry> entries, UnixEntry* match, + std::function<bool(const UnixEntry&)> predicate) { + for (int i = 0; i < entries.size(); ++i) { + if (predicate(entries[i])) { + *match = entries[i]; + return true; + } + } + return false; +} + +bool FindByPath(std::vector<UnixEntry> entries, UnixEntry* match, + const std::string& path) { + return FindBy(entries, match, + [path](const UnixEntry& e) { return e.path == path; }); +} + +TEST(ProcNetUnix, Exists) { + const std::string content = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/unix")); + const std::string header_line = StrCat(kProcNetUnixHeader, "\n"); + if (IsRunningOnGvisor()) { + // Should be just the header since we don't have any unix domain sockets + // yet. + EXPECT_EQ(content, header_line); + } else { + // However, on a general linux machine, we could have abitrary sockets on + // the system, so just check the header. + EXPECT_THAT(content, ::testing::StartsWith(header_line)); + } +} + +TEST(ProcNetUnix, FilesystemBindAcceptConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE( + FilesystemBoundUnixDomainSocketPair(SOCK_STREAM).Create()); + + std::string path1 = ExtractPath(sockets->first_addr()); + std::string path2 = ExtractPath(sockets->second_addr()); + std::cerr << StreamFormat("Server socket address (path1): %s\n", path1); + std::cerr << StreamFormat("Client socket address (path2): %s\n", path2); + + std::vector<UnixEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + if (IsRunningOnGvisor()) { + EXPECT_EQ(entries.size(), 2); + } + + // The server-side socket's path is listed in the socket entry... + UnixEntry s1; + EXPECT_TRUE(FindByPath(entries, &s1, path1)); + + // ... but the client-side socket's path is not. + UnixEntry s2; + EXPECT_FALSE(FindByPath(entries, &s2, path2)); +} + +TEST(ProcNetUnix, AbstractBindAcceptConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE( + AbstractBoundUnixDomainSocketPair(SOCK_STREAM).Create()); + + std::string path1 = ExtractPath(sockets->first_addr()); + std::string path2 = ExtractPath(sockets->second_addr()); + std::cerr << StreamFormat("Server socket address (path1): '%s'\n", path1); + std::cerr << StreamFormat("Client socket address (path2): '%s'\n", path2); + + std::vector<UnixEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + if (IsRunningOnGvisor()) { + EXPECT_EQ(entries.size(), 2); + } + + // The server-side socket's path is listed in the socket entry... + UnixEntry s1; + EXPECT_TRUE(FindByPath(entries, &s1, path1)); + + // ... but the client-side socket's path is not. + UnixEntry s2; + EXPECT_FALSE(FindByPath(entries, &s2, path2)); +} + +TEST(ProcNetUnix, SocketPair) { + // Under gvisor, ensure a socketpair() syscall creates exactly 2 new + // entries. We have no way to verify this under Linux, as we have no control + // over socket creation on a general Linux machine. + SKIP_IF(!IsRunningOnGvisor()); + + std::vector<UnixEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + ASSERT_EQ(entries.size(), 0); + + auto sockets = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_STREAM).Create()); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + EXPECT_EQ(entries.size(), 2); +} + +TEST(ProcNetUnix, StreamSocketStateUnconnectedOnBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE( + AbstractUnboundUnixDomainSocketPair(SOCK_STREAM).Create()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + std::vector<UnixEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + + const std::string address = ExtractPath(sockets->first_addr()); + UnixEntry bind_entry; + ASSERT_TRUE(FindByPath(entries, &bind_entry, address)); + EXPECT_EQ(bind_entry.state, SS_UNCONNECTED); +} + +TEST(ProcNetUnix, StreamSocketStateStateUnconnectedOnListen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE( + AbstractUnboundUnixDomainSocketPair(SOCK_STREAM).Create()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + std::vector<UnixEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + + const std::string address = ExtractPath(sockets->first_addr()); + UnixEntry bind_entry; + ASSERT_TRUE(FindByPath(entries, &bind_entry, address)); + EXPECT_EQ(bind_entry.state, SS_UNCONNECTED); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + UnixEntry listen_entry; + ASSERT_TRUE( + FindByPath(entries, &listen_entry, ExtractPath(sockets->first_addr()))); + EXPECT_EQ(listen_entry.state, SS_UNCONNECTED); + // The bind and listen entries should refer to the same socket. + EXPECT_EQ(listen_entry.inode, bind_entry.inode); +} + +TEST(ProcNetUnix, StreamSocketStateStateConnectedOnAccept) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE( + AbstractUnboundUnixDomainSocketPair(SOCK_STREAM).Create()); + const std::string address = ExtractPath(sockets->first_addr()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + std::vector<UnixEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + UnixEntry listen_entry; + ASSERT_TRUE( + FindByPath(entries, &listen_entry, ExtractPath(sockets->first_addr()))); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int clientfd; + ASSERT_THAT(clientfd = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + + // Find the entry for the accepted socket. UDS proc entries don't have a + // remote address, so we distinguish the accepted socket from the listen + // socket by checking for a different inode. + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + UnixEntry accept_entry; + ASSERT_TRUE(FindBy( + entries, &accept_entry, [address, listen_entry](const UnixEntry& e) { + return e.path == address && e.inode != listen_entry.inode; + })); + EXPECT_EQ(accept_entry.state, SS_CONNECTED); + // Listen entry should still be in SS_UNCONNECTED state. + ASSERT_TRUE(FindBy(entries, &listen_entry, + [&sockets, listen_entry](const UnixEntry& e) { + return e.path == ExtractPath(sockets->first_addr()) && + e.inode == listen_entry.inode; + })); + EXPECT_EQ(listen_entry.state, SS_UNCONNECTED); +} + +TEST(ProcNetUnix, DgramSocketStateDisconnectingOnBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE( + AbstractUnboundUnixDomainSocketPair(SOCK_DGRAM).Create()); + + std::vector<UnixEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + + // On gVisor, the only two UDS on the system are the ones we just created and + // we rely on this to locate the test socket entries in the remainder of the + // test. On a generic Linux system, we have no easy way to locate the + // corresponding entries, as they don't have an address yet. + if (IsRunningOnGvisor()) { + ASSERT_EQ(entries.size(), 2); + for (const auto& e : entries) { + ASSERT_EQ(e.state, SS_DISCONNECTING); + } + } + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + const std::string address = ExtractPath(sockets->first_addr()); + UnixEntry bind_entry; + ASSERT_TRUE(FindByPath(entries, &bind_entry, address)); + EXPECT_EQ(bind_entry.state, SS_UNCONNECTED); +} + +TEST(ProcNetUnix, DgramSocketStateConnectingOnConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE( + AbstractUnboundUnixDomainSocketPair(SOCK_DGRAM).Create()); + + std::vector<UnixEntry> entries = + ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + + // On gVisor, the only two UDS on the system are the ones we just created and + // we rely on this to locate the test socket entries in the remainder of the + // test. On a generic Linux system, we have no easy way to locate the + // corresponding entries, as they don't have an address yet. + if (IsRunningOnGvisor()) { + ASSERT_EQ(entries.size(), 2); + for (const auto& e : entries) { + ASSERT_EQ(e.state, SS_DISCONNECTING); + } + } + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + const std::string address = ExtractPath(sockets->first_addr()); + UnixEntry bind_entry; + ASSERT_TRUE(FindByPath(entries, &bind_entry, address)); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries()); + + // Once again, we have no easy way to identify the connecting socket as it has + // no listed address. We can only identify the entry as the "non-bind socket + // entry" on gVisor, where we're guaranteed to have only the two entries we + // create during this test. + if (IsRunningOnGvisor()) { + ASSERT_EQ(entries.size(), 2); + UnixEntry connect_entry; + ASSERT_TRUE( + FindBy(entries, &connect_entry, [bind_entry](const UnixEntry& e) { + return e.inode != bind_entry.inode; + })); + EXPECT_EQ(connect_entry.state, SS_CONNECTING); + } +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/proc_pid_oomscore.cc b/test/syscalls/linux/proc_pid_oomscore.cc new file mode 100644 index 000000000..707821a3f --- /dev/null +++ b/test/syscalls/linux/proc_pid_oomscore.cc @@ -0,0 +1,72 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> + +#include <exception> +#include <iostream> +#include <string> + +#include "test/util/fs_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<int> ReadProcNumber(std::string path) { + ASSIGN_OR_RETURN_ERRNO(std::string contents, GetContents(path)); + EXPECT_EQ(contents[contents.length() - 1], '\n'); + + int num; + if (!absl::SimpleAtoi(contents, &num)) { + return PosixError(EINVAL, "invalid value: " + contents); + } + + return num; +} + +TEST(ProcPidOomscoreTest, BasicRead) { + auto const oom_score = + ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score")); + EXPECT_LE(oom_score, 1000); + EXPECT_GE(oom_score, -1000); +} + +TEST(ProcPidOomscoreAdjTest, BasicRead) { + auto const oom_score = + ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score_adj")); + + // oom_score_adj defaults to 0. + EXPECT_EQ(oom_score, 0); +} + +TEST(ProcPidOomscoreAdjTest, BasicWrite) { + constexpr int test_value = 7; + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/oom_score_adj", O_WRONLY)); + ASSERT_THAT( + RetryEINTR(write)(fd.get(), std::to_string(test_value).c_str(), 1), + SyscallSucceeds()); + + auto const oom_score = + ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score_adj")); + EXPECT_EQ(oom_score, test_value); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/proc_pid_smaps.cc b/test/syscalls/linux/proc_pid_smaps.cc new file mode 100644 index 000000000..9fb1b3a2c --- /dev/null +++ b/test/syscalls/linux/proc_pid_smaps.cc @@ -0,0 +1,468 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <iostream> +#include <string> +#include <utility> +#include <vector> + +#include "absl/container/flat_hash_set.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/proc_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::Contains; +using ::testing::ElementsAreArray; +using ::testing::IsSupersetOf; +using ::testing::Not; +using ::testing::Optional; + +namespace gvisor { +namespace testing { + +namespace { + +struct ProcPidSmapsEntry { + ProcMapsEntry maps_entry; + + // These fields should always exist, as they were included in e070ad49f311 + // "[PATCH] add /proc/pid/smaps". + size_t size_kb; + size_t rss_kb; + size_t shared_clean_kb; + size_t shared_dirty_kb; + size_t private_clean_kb; + size_t private_dirty_kb; + + // These fields were added later and may not be present. + absl::optional<size_t> pss_kb; + absl::optional<size_t> referenced_kb; + absl::optional<size_t> anonymous_kb; + absl::optional<size_t> anon_huge_pages_kb; + absl::optional<size_t> shared_hugetlb_kb; + absl::optional<size_t> private_hugetlb_kb; + absl::optional<size_t> swap_kb; + absl::optional<size_t> swap_pss_kb; + absl::optional<size_t> kernel_page_size_kb; + absl::optional<size_t> mmu_page_size_kb; + absl::optional<size_t> locked_kb; + + // Caution: "Note that there is no guarantee that every flag and associated + // mnemonic will be present in all further kernel releases. Things get + // changed, the flags may be vanished or the reverse -- new added." - Linux + // Documentation/filesystems/proc.txt, on VmFlags. Avoid checking for any + // flags that are not extremely well-established. + absl::optional<std::vector<std::string>> vm_flags; +}; + +// Given the value part of a /proc/[pid]/smaps field containing a value in kB +// (for example, " 4 kB", returns the value in kB (in this example, 4). +PosixErrorOr<size_t> SmapsValueKb(absl::string_view value) { + // TODO(jamieliu): let us use RE2 or <regex> + std::pair<absl::string_view, absl::string_view> parts = + absl::StrSplit(value, ' ', absl::SkipEmpty()); + if (parts.second != "kB") { + return PosixError(EINVAL, + absl::StrCat("invalid smaps field value: ", value)); + } + ASSIGN_OR_RETURN_ERRNO(auto val_kb, Atoi<size_t>(parts.first)); + return val_kb; +} + +PosixErrorOr<std::vector<ProcPidSmapsEntry>> ParseProcPidSmaps( + absl::string_view contents) { + std::vector<ProcPidSmapsEntry> entries; + absl::optional<ProcPidSmapsEntry> entry; + bool have_size_kb = false; + bool have_rss_kb = false; + bool have_shared_clean_kb = false; + bool have_shared_dirty_kb = false; + bool have_private_clean_kb = false; + bool have_private_dirty_kb = false; + + auto const finish_entry = [&] { + if (entry) { + if (!have_size_kb) { + return PosixError(EINVAL, "smaps entry is missing Size"); + } + if (!have_rss_kb) { + return PosixError(EINVAL, "smaps entry is missing Rss"); + } + if (!have_shared_clean_kb) { + return PosixError(EINVAL, "smaps entry is missing Shared_Clean"); + } + if (!have_shared_dirty_kb) { + return PosixError(EINVAL, "smaps entry is missing Shared_Dirty"); + } + if (!have_private_clean_kb) { + return PosixError(EINVAL, "smaps entry is missing Private_Clean"); + } + if (!have_private_dirty_kb) { + return PosixError(EINVAL, "smaps entry is missing Private_Dirty"); + } + // std::move(entry.value()) instead of std::move(entry).value(), because + // otherwise tools may report a "use-after-move" warning, which is + // spurious because entry.emplace() below resets entry to a new + // ProcPidSmapsEntry. + entries.emplace_back(std::move(entry.value())); + } + entry.emplace(); + have_size_kb = false; + have_rss_kb = false; + have_shared_clean_kb = false; + have_shared_dirty_kb = false; + have_private_clean_kb = false; + have_private_dirty_kb = false; + return NoError(); + }; + + // Holds key/value pairs from smaps field lines. Declared here so it can be + // captured by reference by the following lambdas. + std::vector<absl::string_view> key_value; + + auto const on_required_field_kb = [&](size_t* field, bool* have_field) { + if (*have_field) { + return PosixError( + EINVAL, + absl::StrFormat("smaps entry has duplicate %s line", key_value[0])); + } + ASSIGN_OR_RETURN_ERRNO(*field, SmapsValueKb(key_value[1])); + *have_field = true; + return NoError(); + }; + + auto const on_optional_field_kb = [&](absl::optional<size_t>* field) { + if (*field) { + return PosixError( + EINVAL, + absl::StrFormat("smaps entry has duplicate %s line", key_value[0])); + } + ASSIGN_OR_RETURN_ERRNO(*field, SmapsValueKb(key_value[1])); + return NoError(); + }; + + absl::flat_hash_set<std::string> unknown_fields; + auto const on_unknown_field = [&] { + absl::string_view key = key_value[0]; + // Don't mention unknown fields more than once. + if (unknown_fields.count(key)) { + return; + } + unknown_fields.insert(std::string(key)); + std::cerr << "skipping unknown smaps field " << key << std::endl; + }; + + auto lines = absl::StrSplit(contents, '\n', absl::SkipEmpty()); + for (absl::string_view l : lines) { + // Is this line a valid /proc/[pid]/maps entry? + auto maybe_maps_entry = ParseProcMapsLine(l); + if (maybe_maps_entry.ok()) { + // This marks the beginning of a new /proc/[pid]/smaps entry. + RETURN_IF_ERRNO(finish_entry()); + entry->maps_entry = std::move(maybe_maps_entry).ValueOrDie(); + continue; + } + // Otherwise it's a field in an existing /proc/[pid]/smaps entry of the form + // "key:value" (where value in practice will be preceded by a variable + // amount of whitespace). + if (!entry) { + std::cerr << "smaps line not considered a maps line: " + << maybe_maps_entry.error_message() << std::endl; + return PosixError( + EINVAL, + absl::StrCat("smaps field line without preceding maps line: ", l)); + } + key_value = absl::StrSplit(l, absl::MaxSplits(':', 1)); + if (key_value.size() != 2) { + return PosixError(EINVAL, absl::StrCat("invalid smaps field line: ", l)); + } + absl::string_view const key = key_value[0]; + if (key == "Size") { + RETURN_IF_ERRNO(on_required_field_kb(&entry->size_kb, &have_size_kb)); + } else if (key == "Rss") { + RETURN_IF_ERRNO(on_required_field_kb(&entry->rss_kb, &have_rss_kb)); + } else if (key == "Shared_Clean") { + RETURN_IF_ERRNO( + on_required_field_kb(&entry->shared_clean_kb, &have_shared_clean_kb)); + } else if (key == "Shared_Dirty") { + RETURN_IF_ERRNO( + on_required_field_kb(&entry->shared_dirty_kb, &have_shared_dirty_kb)); + } else if (key == "Private_Clean") { + RETURN_IF_ERRNO(on_required_field_kb(&entry->private_clean_kb, + &have_private_clean_kb)); + } else if (key == "Private_Dirty") { + RETURN_IF_ERRNO(on_required_field_kb(&entry->private_dirty_kb, + &have_private_dirty_kb)); + } else if (key == "Pss") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->pss_kb)); + } else if (key == "Referenced") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->referenced_kb)); + } else if (key == "Anonymous") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->anonymous_kb)); + } else if (key == "AnonHugePages") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->anon_huge_pages_kb)); + } else if (key == "Shared_Hugetlb") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->shared_hugetlb_kb)); + } else if (key == "Private_Hugetlb") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->private_hugetlb_kb)); + } else if (key == "Swap") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->swap_kb)); + } else if (key == "SwapPss") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->swap_pss_kb)); + } else if (key == "KernelPageSize") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->kernel_page_size_kb)); + } else if (key == "MMUPageSize") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->mmu_page_size_kb)); + } else if (key == "Locked") { + RETURN_IF_ERRNO(on_optional_field_kb(&entry->locked_kb)); + } else if (key == "VmFlags") { + if (entry->vm_flags) { + return PosixError(EINVAL, "duplicate VmFlags line"); + } + entry->vm_flags = absl::StrSplit(key_value[1], ' ', absl::SkipEmpty()); + } else { + on_unknown_field(); + } + } + RETURN_IF_ERRNO(finish_entry()); + return entries; +}; + +TEST(ParseProcPidSmapsTest, Correctness) { + auto entries = ASSERT_NO_ERRNO_AND_VALUE( + ParseProcPidSmaps("0-10000 rw-s 00000000 00:00 0 " + " /dev/zero (deleted)\n" + "Size: 0 kB\n" + "Rss: 1 kB\n" + "Pss: 2 kB\n" + "Shared_Clean: 3 kB\n" + "Shared_Dirty: 4 kB\n" + "Private_Clean: 5 kB\n" + "Private_Dirty: 6 kB\n" + "Referenced: 7 kB\n" + "Anonymous: 8 kB\n" + "AnonHugePages: 9 kB\n" + "Shared_Hugetlb: 10 kB\n" + "Private_Hugetlb: 11 kB\n" + "Swap: 12 kB\n" + "SwapPss: 13 kB\n" + "KernelPageSize: 14 kB\n" + "MMUPageSize: 15 kB\n" + "Locked: 16 kB\n" + "FutureUnknownKey: 17 kB\n" + "VmFlags: rd wr sh mr mw me ms lo ?? sd \n")); + ASSERT_EQ(entries.size(), 1); + auto& entry = entries[0]; + EXPECT_EQ(entry.maps_entry.filename, "/dev/zero (deleted)"); + EXPECT_EQ(entry.size_kb, 0); + EXPECT_EQ(entry.rss_kb, 1); + EXPECT_THAT(entry.pss_kb, Optional(2)); + EXPECT_EQ(entry.shared_clean_kb, 3); + EXPECT_EQ(entry.shared_dirty_kb, 4); + EXPECT_EQ(entry.private_clean_kb, 5); + EXPECT_EQ(entry.private_dirty_kb, 6); + EXPECT_THAT(entry.referenced_kb, Optional(7)); + EXPECT_THAT(entry.anonymous_kb, Optional(8)); + EXPECT_THAT(entry.anon_huge_pages_kb, Optional(9)); + EXPECT_THAT(entry.shared_hugetlb_kb, Optional(10)); + EXPECT_THAT(entry.private_hugetlb_kb, Optional(11)); + EXPECT_THAT(entry.swap_kb, Optional(12)); + EXPECT_THAT(entry.swap_pss_kb, Optional(13)); + EXPECT_THAT(entry.kernel_page_size_kb, Optional(14)); + EXPECT_THAT(entry.mmu_page_size_kb, Optional(15)); + EXPECT_THAT(entry.locked_kb, Optional(16)); + EXPECT_THAT(entry.vm_flags, + Optional(ElementsAreArray({"rd", "wr", "sh", "mr", "mw", "me", + "ms", "lo", "??", "sd"}))); +} + +// Returns the unique entry in entries containing the given address. +PosixErrorOr<ProcPidSmapsEntry> FindUniqueSmapsEntry( + std::vector<ProcPidSmapsEntry> const& entries, uintptr_t addr) { + auto const pred = [&](ProcPidSmapsEntry const& entry) { + return entry.maps_entry.start <= addr && addr < entry.maps_entry.end; + }; + auto const it = std::find_if(entries.begin(), entries.end(), pred); + if (it == entries.end()) { + return PosixError(EINVAL, + absl::StrFormat("no entry contains address %#x", addr)); + } + auto const it2 = std::find_if(it + 1, entries.end(), pred); + if (it2 != entries.end()) { + return PosixError( + EINVAL, + absl::StrFormat("overlapping entries [%#x-%#x) and [%#x-%#x) both " + "contain address %#x", + it->maps_entry.start, it->maps_entry.end, + it2->maps_entry.start, it2->maps_entry.end, addr)); + } + return *it; +} + +PosixErrorOr<std::vector<ProcPidSmapsEntry>> ReadProcSelfSmaps() { + ASSIGN_OR_RETURN_ERRNO(std::string contents, GetContents("/proc/self/smaps")); + return ParseProcPidSmaps(contents); +} + +TEST(ProcPidSmapsTest, SharedAnon) { + // Map with MAP_POPULATE so we get some RSS. + Mapping const m = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon( + 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE)); + auto const entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfSmaps()); + auto const entry = + ASSERT_NO_ERRNO_AND_VALUE(FindUniqueSmapsEntry(entries, m.addr())); + + EXPECT_EQ(entry.size_kb, m.len() / 1024); + // It's possible that populated pages have been swapped out, so RSS might be + // less than size. + EXPECT_LE(entry.rss_kb, entry.size_kb); + + if (entry.pss_kb) { + // PSS should be exactly equal to RSS since no other address spaces should + // be sharing our new mapping. + EXPECT_EQ(entry.pss_kb.value(), entry.rss_kb); + } + + // "Shared" and "private" in smaps refers to whether or not *physical pages* + // are shared; thus all pages in our MAP_SHARED mapping should nevertheless + // be private. + EXPECT_EQ(entry.shared_clean_kb, 0); + EXPECT_EQ(entry.shared_dirty_kb, 0); + EXPECT_EQ(entry.private_clean_kb + entry.private_dirty_kb, entry.rss_kb) + << "Private_Clean = " << entry.private_clean_kb + << " kB, Private_Dirty = " << entry.private_dirty_kb << " kB"; + + // Shared anonymous mappings are implemented as a shmem file, so their pages + // are not PageAnon. + if (entry.anonymous_kb) { + EXPECT_EQ(entry.anonymous_kb.value(), 0); + } + + if (entry.vm_flags) { + EXPECT_THAT(entry.vm_flags.value(), + IsSupersetOf({"rd", "wr", "sh", "mr", "mw", "me", "ms"})); + EXPECT_THAT(entry.vm_flags.value(), Not(Contains("ex"))); + } +} + +TEST(ProcPidSmapsTest, PrivateAnon) { + // Map with MAP_POPULATE so we get some RSS. + Mapping const m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(2 * kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_POPULATE)); + auto const entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfSmaps()); + auto const entry = + ASSERT_NO_ERRNO_AND_VALUE(FindUniqueSmapsEntry(entries, m.addr())); + + // It's possible that our mapping was merged with another vma, so the smaps + // entry might be bigger than our original mapping. + EXPECT_GE(entry.size_kb, m.len() / 1024); + EXPECT_LE(entry.rss_kb, entry.size_kb); + if (entry.pss_kb) { + EXPECT_LE(entry.pss_kb.value(), entry.rss_kb); + } + + if (entry.anonymous_kb) { + EXPECT_EQ(entry.anonymous_kb.value(), entry.rss_kb); + } + + if (entry.vm_flags) { + EXPECT_THAT(entry.vm_flags.value(), IsSupersetOf({"wr", "mr", "mw", "me"})); + // We passed PROT_WRITE to mmap. On at least x86, the mapping is in + // practice readable because there is no way to configure the MMU to make + // pages writable but not readable. However, VmFlags should reflect the + // flags set on the VMA, so "rd" (VM_READ) should not appear in VmFlags. + EXPECT_THAT(entry.vm_flags.value(), Not(Contains("rd"))); + EXPECT_THAT(entry.vm_flags.value(), Not(Contains("ex"))); + EXPECT_THAT(entry.vm_flags.value(), Not(Contains("sh"))); + EXPECT_THAT(entry.vm_flags.value(), Not(Contains("ms"))); + } +} + +TEST(ProcPidSmapsTest, SharedReadOnlyFile) { + size_t const kFileSize = kPageSize; + + auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_THAT(truncate(temp_file.path().c_str(), kFileSize), SyscallSucceeds()); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY)); + + auto const m = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kFileSize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd.get(), 0)); + auto const entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfSmaps()); + auto const entry = + ASSERT_NO_ERRNO_AND_VALUE(FindUniqueSmapsEntry(entries, m.addr())); + + // Most of the same logic as the SharedAnon case applies. + EXPECT_EQ(entry.size_kb, kFileSize / 1024); + EXPECT_LE(entry.rss_kb, entry.size_kb); + if (entry.pss_kb) { + EXPECT_EQ(entry.pss_kb.value(), entry.rss_kb); + } + EXPECT_EQ(entry.shared_clean_kb, 0); + EXPECT_EQ(entry.shared_dirty_kb, 0); + EXPECT_EQ(entry.private_clean_kb + entry.private_dirty_kb, entry.rss_kb) + << "Private_Clean = " << entry.private_clean_kb + << " kB, Private_Dirty = " << entry.private_dirty_kb << " kB"; + if (entry.anonymous_kb) { + EXPECT_EQ(entry.anonymous_kb.value(), 0); + } + + if (entry.vm_flags) { + EXPECT_THAT(entry.vm_flags.value(), IsSupersetOf({"rd", "mr", "me", "ms"})); + EXPECT_THAT(entry.vm_flags.value(), Not(Contains("wr"))); + EXPECT_THAT(entry.vm_flags.value(), Not(Contains("ex"))); + // Because the mapped file was opened O_RDONLY, the VMA is !VM_MAYWRITE and + // also !VM_SHARED. + EXPECT_THAT(entry.vm_flags.value(), Not(Contains("sh"))); + EXPECT_THAT(entry.vm_flags.value(), Not(Contains("mw"))); + } +} + +// Tests that gVisor's /proc/[pid]/smaps provides all of the fields we expect it +// to, which as of this writing is all fields provided by Linux 4.4. +TEST(ProcPidSmapsTest, GvisorFields) { + SKIP_IF(!IsRunningOnGvisor()); + auto const entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfSmaps()); + for (auto const& entry : entries) { + EXPECT_TRUE(entry.pss_kb); + EXPECT_TRUE(entry.referenced_kb); + EXPECT_TRUE(entry.anonymous_kb); + EXPECT_TRUE(entry.anon_huge_pages_kb); + EXPECT_TRUE(entry.shared_hugetlb_kb); + EXPECT_TRUE(entry.private_hugetlb_kb); + EXPECT_TRUE(entry.swap_kb); + EXPECT_TRUE(entry.swap_pss_kb); + EXPECT_THAT(entry.kernel_page_size_kb, Optional(kPageSize / 1024)); + EXPECT_THAT(entry.mmu_page_size_kb, Optional(kPageSize / 1024)); + EXPECT_TRUE(entry.locked_kb); + EXPECT_TRUE(entry.vm_flags); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/proc_pid_uid_gid_map.cc b/test/syscalls/linux/proc_pid_uid_gid_map.cc new file mode 100644 index 000000000..748f7be58 --- /dev/null +++ b/test/syscalls/linux/proc_pid_uid_gid_map.cc @@ -0,0 +1,311 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sched.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <functional> +#include <string> +#include <tuple> +#include <utility> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" +#include "test/util/test_util.h" +#include "test/util/time_util.h" + +namespace gvisor { +namespace testing { + +PosixErrorOr<int> InNewUserNamespace(const std::function<void()>& fn) { + return InForkedProcess([&] { + TEST_PCHECK(unshare(CLONE_NEWUSER) == 0); + MaybeSave(); + fn(); + }); +} + +PosixErrorOr<std::tuple<pid_t, Cleanup>> CreateProcessInNewUserNamespace() { + int pipefd[2]; + if (pipe(pipefd) < 0) { + return PosixError(errno, "pipe failed"); + } + const auto cleanup_pipe_read = + Cleanup([&] { EXPECT_THAT(close(pipefd[0]), SyscallSucceeds()); }); + auto cleanup_pipe_write = + Cleanup([&] { EXPECT_THAT(close(pipefd[1]), SyscallSucceeds()); }); + pid_t child_pid = fork(); + if (child_pid < 0) { + return PosixError(errno, "fork failed"); + } + if (child_pid == 0) { + // Close our copy of the pipe's read end, which doesn't really matter. + TEST_PCHECK(close(pipefd[0]) >= 0); + TEST_PCHECK(unshare(CLONE_NEWUSER) == 0); + MaybeSave(); + // Indicate that we've switched namespaces by unblocking the parent's read. + TEST_PCHECK(close(pipefd[1]) >= 0); + while (true) { + SleepSafe(absl::Minutes(1)); + } + } + auto cleanup_child = Cleanup([child_pid] { + EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << "status = " << status; + }); + // Close our copy of the pipe's write end, then wait for the child to close + // its copy, indicating that it's switched namespaces. + cleanup_pipe_write.Release()(); + char buf; + if (RetryEINTR(read)(pipefd[0], &buf, 1) < 0) { + return PosixError(errno, "reading from pipe failed"); + } + MaybeSave(); + return std::make_tuple(child_pid, std::move(cleanup_child)); +} + +// TEST_CHECK-fails on error, since this function is used in contexts that +// require async-signal-safety. +void DenySetgroupsByPath(const char* path) { + int fd = open(path, O_WRONLY); + if (fd < 0 && errno == ENOENT) { + // On kernels where this file doesn't exist, writing "deny" to it isn't + // necessary to write to gid_map. + return; + } + TEST_PCHECK(fd >= 0); + MaybeSave(); + char deny[] = "deny"; + TEST_PCHECK(write(fd, deny, sizeof(deny)) == sizeof(deny)); + MaybeSave(); + TEST_PCHECK(close(fd) == 0); +} + +void DenySelfSetgroups() { DenySetgroupsByPath("/proc/self/setgroups"); } + +void DenyPidSetgroups(pid_t pid) { + DenySetgroupsByPath(absl::StrCat("/proc/", pid, "/setgroups").c_str()); +} + +// Returns a valid UID/GID that isn't id. +uint32_t another_id(uint32_t id) { return (id + 1) % 65535; } + +struct TestParam { + std::string desc; + int cap; + std::function<std::string(absl::string_view)> get_map_filename; + std::function<uint32_t()> get_current_id; +}; + +std::string DescribeTestParam(const ::testing::TestParamInfo<TestParam>& info) { + return info.param.desc; +} + +std::vector<TestParam> UidGidMapTestParams() { + return {TestParam{"UID", CAP_SETUID, + [](absl::string_view pid) { + return absl::StrCat("/proc/", pid, "/uid_map"); + }, + []() -> uint32_t { return getuid(); }}, + TestParam{"GID", CAP_SETGID, + [](absl::string_view pid) { + return absl::StrCat("/proc/", pid, "/gid_map"); + }, + []() -> uint32_t { return getgid(); }}}; +} + +class ProcUidGidMapTest : public ::testing::TestWithParam<TestParam> { + protected: + uint32_t CurrentID() { return GetParam().get_current_id(); } +}; + +class ProcSelfUidGidMapTest : public ProcUidGidMapTest { + protected: + PosixErrorOr<int> InNewUserNamespaceWithMapFD( + const std::function<void(int)>& fn) { + std::string map_filename = GetParam().get_map_filename("self"); + return InNewUserNamespace([&] { + int fd = open(map_filename.c_str(), O_RDWR); + TEST_PCHECK(fd >= 0); + MaybeSave(); + fn(fd); + TEST_PCHECK(close(fd) == 0); + }); + } +}; + +class ProcPidUidGidMapTest : public ProcUidGidMapTest { + protected: + PosixErrorOr<bool> HaveSetIDCapability() { + return HaveCapability(GetParam().cap); + } + + // Returns true if the caller is running in a user namespace with all IDs + // mapped. This matters for tests that expect to successfully map arbitrary + // IDs into a child user namespace, since even with CAP_SET*ID this is only + // possible if those IDs are mapped into the current one. + PosixErrorOr<bool> AllIDsMapped() { + ASSIGN_OR_RETURN_ERRNO(std::string id_map, + GetContents(GetParam().get_map_filename("self"))); + absl::StripTrailingAsciiWhitespace(&id_map); + std::vector<std::string> id_map_parts = + absl::StrSplit(id_map, ' ', absl::SkipEmpty()); + return id_map_parts == std::vector<std::string>({"0", "0", "4294967295"}); + } + + PosixErrorOr<FileDescriptor> OpenMapFile(pid_t pid) { + return Open(GetParam().get_map_filename(absl::StrCat(pid)), O_RDWR); + } +}; + +TEST_P(ProcSelfUidGidMapTest, IsInitiallyEmpty) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + EXPECT_THAT(InNewUserNamespaceWithMapFD([](int fd) { + char buf[64]; + TEST_PCHECK(read(fd, buf, sizeof(buf)) == 0); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST_P(ProcSelfUidGidMapTest, IdentityMapOwnID) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + uint32_t id = CurrentID(); + std::string line = absl::StrCat(id, " ", id, " 1"); + EXPECT_THAT( + InNewUserNamespaceWithMapFD([&](int fd) { + DenySelfSetgroups(); + TEST_PCHECK(write(fd, line.c_str(), line.size()) == line.size()); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST_P(ProcSelfUidGidMapTest, TrailingNewlineAndNULIgnored) { + // This is identical to IdentityMapOwnID, except that a trailing newline, NUL, + // and an invalid (incomplete) map entry are appended to the valid entry. The + // newline should be accepted, and everything after the NUL should be ignored. + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + uint32_t id = CurrentID(); + std::string line = absl::StrCat(id, " ", id, " 1\n\0 4 3"); + EXPECT_THAT( + InNewUserNamespaceWithMapFD([&](int fd) { + DenySelfSetgroups(); + // The write should return the full size of the write, even though + // characters after the NUL were ignored. + TEST_PCHECK(write(fd, line.c_str(), line.size()) == line.size()); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST_P(ProcSelfUidGidMapTest, NonIdentityMapOwnID) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + uint32_t id = CurrentID(); + uint32_t id2 = another_id(id); + std::string line = absl::StrCat(id2, " ", id, " 1"); + EXPECT_THAT( + InNewUserNamespaceWithMapFD([&](int fd) { + DenySelfSetgroups(); + TEST_PCHECK(write(fd, line.c_str(), line.size()) == line.size()); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST_P(ProcSelfUidGidMapTest, MapOtherID) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + // Whether or not we have CAP_SET*ID is irrelevant: the process running in the + // new (child) user namespace won't have any capabilities in the current + // (parent) user namespace, which is needed. + uint32_t id = CurrentID(); + uint32_t id2 = another_id(id); + std::string line = absl::StrCat(id, " ", id2, " 1"); + EXPECT_THAT(InNewUserNamespaceWithMapFD([&](int fd) { + DenySelfSetgroups(); + TEST_PCHECK(write(fd, line.c_str(), line.size()) < 0); + TEST_CHECK(errno == EPERM); + }), + IsPosixErrorOkAndHolds(0)); +} + +INSTANTIATE_TEST_SUITE_P(All, ProcSelfUidGidMapTest, + ::testing::ValuesIn(UidGidMapTestParams()), + DescribeTestParam); + +TEST_P(ProcPidUidGidMapTest, MapOtherIDPrivileged) { + // Like ProcSelfUidGidMapTest_MapOtherID, but since we have CAP_SET*ID in the + // parent user namespace (this one), we can map IDs that aren't ours. + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveSetIDCapability())); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(AllIDsMapped())); + + pid_t child_pid; + Cleanup cleanup_child; + std::tie(child_pid, cleanup_child) = + ASSERT_NO_ERRNO_AND_VALUE(CreateProcessInNewUserNamespace()); + + uint32_t id = CurrentID(); + uint32_t id2 = another_id(id); + std::string line = absl::StrCat(id, " ", id2, " 1"); + DenyPidSetgroups(child_pid); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenMapFile(child_pid)); + EXPECT_THAT(write(fd.get(), line.c_str(), line.size()), + SyscallSucceedsWithValue(line.size())); +} + +TEST_P(ProcPidUidGidMapTest, MapAnyIDsPrivileged) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveSetIDCapability())); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(AllIDsMapped())); + + pid_t child_pid; + Cleanup cleanup_child; + std::tie(child_pid, cleanup_child) = + ASSERT_NO_ERRNO_AND_VALUE(CreateProcessInNewUserNamespace()); + + // Test all of: + // + // - Mapping ranges of length > 1 + // + // - Mapping multiple ranges + // + // - Non-identity mappings + char entries[] = "2 0 2\n4 6 2"; + DenyPidSetgroups(child_pid); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenMapFile(child_pid)); + EXPECT_THAT(write(fd.get(), entries, sizeof(entries)), + SyscallSucceedsWithValue(sizeof(entries))); +} + +INSTANTIATE_TEST_SUITE_P(All, ProcPidUidGidMapTest, + ::testing::ValuesIn(UidGidMapTestParams()), + DescribeTestParam); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pselect.cc b/test/syscalls/linux/pselect.cc new file mode 100644 index 000000000..4e43c4d7f --- /dev/null +++ b/test/syscalls/linux/pselect.cc @@ -0,0 +1,190 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/select.h> + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/base_poll_test.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +struct MaskWithSize { + sigset_t* mask; + size_t mask_size; +}; + +// Linux and glibc have a different idea of the sizeof sigset_t. When calling +// the syscall directly, use what the kernel expects. +unsigned kSigsetSize = SIGRTMAX / 8; + +// Linux pselect(2) differs from the glibc wrapper function in that Linux +// updates the timeout with the amount of time remaining. In order to test this +// behavior we need to use the syscall directly. +int syscallPselect6(int nfds, fd_set* readfds, fd_set* writefds, + fd_set* exceptfds, struct timespec* timeout, + const MaskWithSize* mask_with_size) { + return syscall(SYS_pselect6, nfds, readfds, writefds, exceptfds, timeout, + mask_with_size); +} + +class PselectTest : public BasePollTest { + protected: + void SetUp() override { BasePollTest::SetUp(); } + void TearDown() override { BasePollTest::TearDown(); } +}; + +// See that when there are no FD sets, pselect behaves like sleep. +TEST_F(PselectTest, NullFds) { + struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10)); + ASSERT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); + + timeout = absl::ToTimespec(absl::Milliseconds(10)); + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +TEST_F(PselectTest, ClosedFds) { + fd_set read_set; + FD_ZERO(&read_set); + int fd; + ASSERT_THAT(fd = dup(1), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + FD_SET(fd, &read_set); + struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10)); + EXPECT_THAT( + syscallPselect6(fd + 1, &read_set, nullptr, nullptr, &timeout, nullptr), + SyscallFailsWithErrno(EBADF)); +} + +TEST_F(PselectTest, ZeroTimeout) { + struct timespec timeout = {}; + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +// If random S/R interrupts the pselect, SIGALRM may be delivered before pselect +// restarts, causing the pselect to hang forever. +TEST_F(PselectTest, NoTimeout_NoRandomSave) { + // When there's no timeout, pselect may never return so set a timer. + SetTimer(absl::Milliseconds(100)); + // See that we get interrupted by the timer. + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); +} + +TEST_F(PselectTest, InvalidTimeoutNegative) { + struct timespec timeout = absl::ToTimespec(absl::Seconds(-1)); + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallFailsWithErrno(EINVAL)); + EXPECT_EQ(timeout.tv_sec, -1); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +TEST_F(PselectTest, InvalidTimeoutNotNormalized) { + struct timespec timeout = {0, 1000000001}; + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallFailsWithErrno(EINVAL)); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 1000000001); +} + +TEST_F(PselectTest, EmptySigMaskInvalidMaskSize) { + struct timespec timeout = {}; + MaskWithSize invalid = {nullptr, 7}; + EXPECT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, &invalid), + SyscallSucceeds()); +} + +TEST_F(PselectTest, EmptySigMaskValidMaskSize) { + struct timespec timeout = {}; + MaskWithSize invalid = {nullptr, 8}; + EXPECT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, &invalid), + SyscallSucceeds()); +} + +TEST_F(PselectTest, InvalidMaskSize) { + struct timespec timeout = {}; + sigset_t sigmask; + ASSERT_THAT(sigemptyset(&sigmask), SyscallSucceeds()); + MaskWithSize invalid = {&sigmask, 7}; + EXPECT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &invalid), + SyscallFailsWithErrno(EINVAL)); +} + +// Verify that signals blocked by the pselect mask (that would otherwise be +// allowed) do not interrupt pselect. +TEST_F(PselectTest, SignalMaskBlocksSignal) { + absl::Duration duration(absl::Seconds(30)); + struct timespec timeout = absl::ToTimespec(duration); + absl::Duration timer_duration(absl::Seconds(10)); + + // Call with a mask that blocks SIGALRM. See that pselect is not interrupted + // (i.e. returns 0) and that upon completion, the timer has fired. + sigset_t mask; + ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + ASSERT_THAT(sigaddset(&mask, SIGALRM), SyscallSucceeds()); + MaskWithSize mask_with_size = {&mask, kSigsetSize}; + SetTimer(timer_duration); + MaybeSave(); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT( + syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &mask_with_size), + SyscallSucceeds()); + EXPECT_TRUE(TimerFired()); + EXPECT_EQ(absl::DurationFromTimespec(timeout), absl::Duration()); +} + +// Verify that signals allowed by the pselect mask (that would otherwise be +// blocked) interrupt pselect. +TEST_F(PselectTest, SignalMaskAllowsSignal) { + absl::Duration duration = absl::Seconds(30); + struct timespec timeout = absl::ToTimespec(duration); + absl::Duration timer_duration = absl::Seconds(10); + + sigset_t mask; + ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + + // Block SIGALRM. + auto cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGALRM)); + + // Call with a mask that unblocks SIGALRM. See that pselect is interrupted. + MaskWithSize mask_with_size = {&mask, kSigsetSize}; + SetTimer(timer_duration); + MaybeSave(); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT( + syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &mask_with_size), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); + EXPECT_GT(absl::DurationFromTimespec(timeout), absl::Duration()); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc new file mode 100644 index 000000000..926690eb8 --- /dev/null +++ b/test/syscalls/linux/ptrace.cc @@ -0,0 +1,1229 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <elf.h> +#include <signal.h> +#include <stddef.h> +#include <sys/ptrace.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/user.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <iostream> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/platform_util.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/time_util.h" + +ABSL_FLAG(bool, ptrace_test_execve_child, false, + "If true, run the " + "PtraceExecveTest_Execve_GetRegs_PeekUser_SIGKILL_TraceClone_" + "TraceExit child workload."); + +namespace gvisor { +namespace testing { + +namespace { + +// PTRACE_GETSIGMASK and PTRACE_SETSIGMASK are not defined until glibc 2.23 +// (fb53a27c5741 "Add new header definitions from Linux 4.4 (plus older ptrace +// definitions)"). +constexpr auto kPtraceGetSigMask = static_cast<__ptrace_request>(0x420a); +constexpr auto kPtraceSetSigMask = static_cast<__ptrace_request>(0x420b); + +// PTRACE_SYSEMU is not defined until glibc 2.27 (c48831d0eebf "linux/x86: sync +// sys/ptrace.h with Linux 4.14 [BZ #22433]"). +constexpr auto kPtraceSysemu = static_cast<__ptrace_request>(31); + +// PTRACE_EVENT_STOP is not defined until glibc 2.26 (3f67d1a7021e "Add Linux +// PTRACE_EVENT_STOP"). +constexpr int kPtraceEventStop = 128; + +// Sends sig to the current process with tgkill(2). +// +// glibc's raise(2) may change the signal mask before sending the signal. These +// extra syscalls make tests of syscall, signal interception, etc. difficult to +// write. +void RaiseSignal(int sig) { + pid_t pid = getpid(); + TEST_PCHECK(pid > 0); + pid_t tid = gettid(); + TEST_PCHECK(tid > 0); + TEST_PCHECK(tgkill(pid, tid, sig) == 0); +} + +// Returns the Yama ptrace scope. +PosixErrorOr<int> YamaPtraceScope() { + constexpr char kYamaPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope"; + + ASSIGN_OR_RETURN_ERRNO(bool exists, Exists(kYamaPtraceScopePath)); + if (!exists) { + // File doesn't exist means no Yama, so the scope is disabled -> 0. + return 0; + } + + std::string contents; + RETURN_IF_ERRNO(GetContents(kYamaPtraceScopePath, &contents)); + + int scope; + if (!absl::SimpleAtoi(contents, &scope)) { + return PosixError(EINVAL, absl::StrCat(contents, ": not a valid number")); + } + + return scope; +} + +TEST(PtraceTest, AttachSelf) { + EXPECT_THAT(ptrace(PTRACE_ATTACH, gettid(), 0, 0), + SyscallFailsWithErrno(EPERM)); +} + +TEST(PtraceTest, AttachSameThreadGroup) { + pid_t const tid = gettid(); + ScopedThread([&] { + EXPECT_THAT(ptrace(PTRACE_ATTACH, tid, 0, 0), SyscallFailsWithErrno(EPERM)); + }); +} + +TEST(PtraceTest, AttachParent_PeekData_PokeData_SignalSuppression) { + // Yama prevents attaching to a parent. Skip the test if the scope is anything + // except disabled. + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 0); + + constexpr long kBeforePokeDataValue = 10; + constexpr long kAfterPokeDataValue = 20; + + volatile long word = kBeforePokeDataValue; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Attach to the parent. + pid_t const parent_pid = getppid(); + TEST_PCHECK(ptrace(PTRACE_ATTACH, parent_pid, 0, 0) == 0); + MaybeSave(); + + // Block until the parent enters signal-delivery-stop as a result of the + // SIGSTOP sent by PTRACE_ATTACH. + int status; + TEST_PCHECK(waitpid(parent_pid, &status, 0) == parent_pid); + MaybeSave(); + TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + // Replace the value of word in the parent process with kAfterPokeDataValue. + long const parent_word = ptrace(PTRACE_PEEKDATA, parent_pid, &word, 0); + MaybeSave(); + TEST_CHECK(parent_word == kBeforePokeDataValue); + TEST_PCHECK( + ptrace(PTRACE_POKEDATA, parent_pid, &word, kAfterPokeDataValue) == 0); + MaybeSave(); + + // Detach from the parent and suppress the SIGSTOP. If the SIGSTOP is not + // suppressed, the parent will hang in group-stop, causing the test to time + // out. + TEST_PCHECK(ptrace(PTRACE_DETACH, parent_pid, 0, 0) == 0); + MaybeSave(); + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to complete. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Check that the child's PTRACE_POKEDATA was effective. + EXPECT_EQ(kAfterPokeDataValue, word); +} + +TEST(PtraceTest, GetSigMask) { + // glibc and the Linux kernel define a sigset_t with different sizes. To avoid + // creating a kernel_sigset_t and recreating all the modification functions + // (sigemptyset, etc), we just hardcode the kernel sigset size. + constexpr int kSizeofKernelSigset = 8; + constexpr int kBlockSignal = SIGUSR1; + sigset_t blocked; + sigemptyset(&blocked); + sigaddset(&blocked, kBlockSignal); + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Install a signal handler for kBlockSignal to avoid termination and block + // it. + TEST_PCHECK(signal( + kBlockSignal, +[](int signo) {}) != SIG_ERR); + MaybeSave(); + TEST_PCHECK(sigprocmask(SIG_SETMASK, &blocked, nullptr) == 0); + MaybeSave(); + + // Enable tracing. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + + // This should be blocked. + RaiseSignal(kBlockSignal); + + // This should be suppressed by parent, who will change signal mask in the + // meantime, which means kBlockSignal should be delivered once this resumes. + RaiseSignal(SIGSTOP); + + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Get current signal mask. + sigset_t set; + EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, kSizeofKernelSigset, &set), + SyscallSucceeds()); + EXPECT_THAT(blocked, EqualsSigset(set)); + + // Try to get current signal mask with bad size argument. + EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, 0, nullptr), + SyscallFailsWithErrno(EINVAL)); + + // Try to set bad signal mask. + sigset_t* bad_addr = reinterpret_cast<sigset_t*>(-1); + EXPECT_THAT( + ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, bad_addr), + SyscallFailsWithErrno(EFAULT)); + + // Set signal mask to empty set. + sigset_t set1; + sigemptyset(&set1); + EXPECT_THAT(ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, &set1), + SyscallSucceeds()); + + // Suppress SIGSTOP and resume the child. It should re-enter + // signal-delivery-stop for kBlockSignal. + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kBlockSignal) + << " status " << status; + + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + // Let's see that process exited normally. + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST(PtraceTest, GetSiginfo_SetSiginfo_SignalInjection) { + constexpr int kOriginalSigno = SIGUSR1; + constexpr int kInjectedSigno = SIGUSR2; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Override all signal handlers. + struct sigaction sa = {}; + sa.sa_handler = +[](int signo) { _exit(signo); }; + TEST_PCHECK(sigfillset(&sa.sa_mask) == 0); + for (int signo = 1; signo < 32; signo++) { + if (signo == SIGKILL || signo == SIGSTOP) { + continue; + } + TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0); + } + for (int signo = SIGRTMIN; signo <= SIGRTMAX; signo++) { + TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0); + } + + // Unblock all signals. + TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0); + MaybeSave(); + + // Send ourselves kOriginalSignal while ptraced and exit with the signal we + // actually receive via the signal handler, if any, or 0 if we don't receive + // a signal. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + RaiseSignal(kOriginalSigno); + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself kOriginalSigno and enter + // signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno) + << " status " << status; + + siginfo_t siginfo = {}; + ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo), + SyscallSucceeds()); + EXPECT_EQ(kOriginalSigno, siginfo.si_signo); + EXPECT_EQ(SI_TKILL, siginfo.si_code); + + // Replace the signal with kInjectedSigno, and check that the child exits + // with kInjectedSigno, indicating that signal injection was successful. + siginfo.si_signo = kInjectedSigno; + ASSERT_THAT(ptrace(PTRACE_SETSIGINFO, child_pid, 0, &siginfo), + SyscallSucceeds()); + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, kInjectedSigno), + SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == kInjectedSigno) + << " status " << status; +} + +TEST(PtraceTest, SIGKILLDoesNotCauseSignalDeliveryStop) { + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + RaiseSignal(SIGKILL); + TEST_CHECK_MSG(false, "Survived SIGKILL?"); + _exit(1); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Expect the child to die to SIGKILL without entering signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +TEST(PtraceTest, PtraceKill) { + constexpr int kOriginalSigno = SIGUSR1; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + + // PTRACE_KILL only works if tracee has entered signal-delivery-stop. + RaiseSignal(kOriginalSigno); + TEST_CHECK_MSG(false, "Failed to kill the process?"); + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself kOriginalSigno and enter + // signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno) + << " status " << status; + + ASSERT_THAT(ptrace(PTRACE_KILL, child_pid, 0, 0), SyscallSucceeds()); + + // Expect the child to die with SIGKILL. + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +TEST(PtraceTest, GetRegSet) { + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Enable tracing. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + + // Use kill explicitly because we check the syscall argument register below. + kill(getpid(), SIGSTOP); + + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Get the general registers. + struct user_regs_struct regs; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov), + SyscallSucceeds()); + + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + +#if defined(__x86_64__) + // Child called kill(2), with SIGSTOP as arg 2. + EXPECT_EQ(regs.rsi, SIGSTOP); +#elif defined(__aarch64__) + EXPECT_EQ(regs.regs[1], SIGSTOP); +#endif + + // Suppress SIGSTOP and resume the child. + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + // Let's see that process exited normally. + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST(PtraceTest, AttachingConvertsGroupStopToPtraceStop) { + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + while (true) { + pause(); + } + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // SIGSTOP the child and wait for it to stop. + ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Attach to the child and expect it to re-enter a traced group-stop despite + // already being stopped. + ASSERT_THAT(ptrace(PTRACE_ATTACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Verify that the child is ptrace-stopped by checking that it can receive + // ptrace commands requiring a ptrace-stop. + EXPECT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, 0), SyscallSucceeds()); + + // Group-stop is distinguished from signal-delivery-stop by PTRACE_GETSIGINFO + // failing with EINVAL. + siginfo_t siginfo = {}; + EXPECT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo), + SyscallFailsWithErrno(EINVAL)); + + // Detach from the child and expect it to stay stopped without a notification. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED | WNOHANG), + SyscallSucceedsWithValue(0)); + + // Sending it SIGCONT should cause it to leave its stop. + ASSERT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, WCONTINUED), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFCONTINUED(status)) << " status " << status; + + // Clean up the child. + ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +// Fixture for tests parameterized by whether or not to use PTRACE_O_TRACEEXEC. +class PtraceExecveTest : public ::testing::TestWithParam<bool> { + protected: + bool TraceExec() const { return GetParam(); } +}; + +TEST_P(PtraceExecveTest, Execve_GetRegs_PeekUser_SIGKILL_TraceClone_TraceExit) { + ExecveArray const owned_child_argv = {"/proc/self/exe", + "--ptrace_test_execve_child"}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. The test relies on calling execve() in a non-leader + // thread; pthread_create() isn't async-signal-safe, so the safest way to + // do this is to execve() first, then enable tracing and run the expected + // child process behavior in the new subprocess. + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Enable PTRACE_O_TRACECLONE so we can get the ID of the child's non-leader + // thread, PTRACE_O_TRACEEXIT so we can observe the leader's death, and + // PTRACE_O_TRACEEXEC if required by the test. (The leader doesn't call + // execve, but options should be inherited across clone.) + long opts = PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT; + if (TraceExec()) { + opts |= PTRACE_O_TRACEEXEC; + } + ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, opts), SyscallSucceeds()); + + // Suppress the SIGSTOP and wait for the child's leader thread to report + // PTRACE_EVENT_CLONE. Get the new thread's ID from the event. + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_CLONE << 8), status >> 8); + unsigned long eventmsg; + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg), + SyscallSucceeds()); + pid_t const nonleader_tid = eventmsg; + pid_t const leader_tid = child_pid; + + // The new thread should be ptraced and in signal-delivery-stop by SIGSTOP due + // to PTRACE_O_TRACECLONE. + // + // Before bf959931ddb88c4e4366e96dd22e68fa0db9527c "wait/ptrace: assume __WALL + // if the child is traced" (4.7) , waiting on it requires __WCLONE since, as a + // non-leader, its termination signal is 0. After, a standard wait is + // sufficient. + ASSERT_THAT(waitpid(nonleader_tid, &status, __WCLONE), + SyscallSucceedsWithValue(nonleader_tid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Resume both child threads. + for (pid_t const tid : {leader_tid, nonleader_tid}) { + ASSERT_THAT(ptrace(PTRACE_CONT, tid, 0, 0), SyscallSucceeds()); + } + + // The non-leader child thread should call execve, causing the leader thread + // to enter PTRACE_EVENT_EXIT with an apparent exit code of 0. At this point, + // the leader has not yet exited, so the non-leader should be blocked in + // execve. + ASSERT_THAT(waitpid(leader_tid, &status, 0), + SyscallSucceedsWithValue(leader_tid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8); + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg), + SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(eventmsg) && WEXITSTATUS(eventmsg) == 0) + << " eventmsg " << eventmsg; + EXPECT_THAT(waitpid(nonleader_tid, &status, __WCLONE | WNOHANG), + SyscallSucceedsWithValue(0)); + + // Allow the leader to continue exiting. This should allow the non-leader to + // complete its execve, causing the original leader to be reaped without + // further notice and the non-leader to steal its ID. + ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(leader_tid, &status, 0), + SyscallSucceedsWithValue(leader_tid)); + if (TraceExec()) { + // If PTRACE_O_TRACEEXEC was enabled, the execing thread should be in + // PTRACE_EVENT_EXEC-stop, with the event message set to its old thread ID. + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXEC << 8), status >> 8); + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg), + SyscallSucceeds()); + EXPECT_EQ(nonleader_tid, eventmsg); + } else { + // Otherwise, the execing thread should have received SIGTRAP and should now + // be in signal-delivery-stop. + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << " status " << status; + } + +#ifdef __x86_64__ + { + // CS should be 0x33, indicating an 64-bit binary. + constexpr uint64_t kAMD64UserCS = 0x33; + EXPECT_THAT(ptrace(PTRACE_PEEKUSER, leader_tid, + offsetof(struct user_regs_struct, cs), 0), + SyscallSucceedsWithValue(kAMD64UserCS)); + struct user_regs_struct regs = {}; + ASSERT_THAT(ptrace(PTRACE_GETREGS, leader_tid, 0, ®s), + SyscallSucceeds()); + EXPECT_EQ(kAMD64UserCS, regs.cs); + } +#endif // defined(__x86_64__) + + // PTRACE_O_TRACEEXIT should have been inherited across execve. Send SIGKILL, + // which should end the PTRACE_EVENT_EXEC-stop or signal-delivery-stop and + // leave the child in PTRACE_EVENT_EXIT-stop. + ASSERT_THAT(kill(leader_tid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(leader_tid, &status, 0), + SyscallSucceedsWithValue(leader_tid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8); + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg), + SyscallSucceeds()); + EXPECT_TRUE(WIFSIGNALED(eventmsg) && WTERMSIG(eventmsg) == SIGKILL) + << " eventmsg " << eventmsg; + + // End the PTRACE_EVENT_EXIT stop, allowing the child to exit. + ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(leader_tid, &status, 0), + SyscallSucceedsWithValue(leader_tid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunExecveChild() { + // Enable tracing, then raise SIGSTOP and expect our parent to suppress it. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + RaiseSignal(SIGSTOP); + MaybeSave(); + + // Call execve() in a non-leader thread. As long as execve() succeeds, what + // exactly we execve() shouldn't really matter, since the tracer should kill + // us after execve() completes. + ScopedThread t([&] { + ExecveArray const owned_child_argv = {"/proc/self/exe", + "--this_flag_shouldnt_exist"}; + char* const* const child_argv = owned_child_argv.get(); + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve? (thread)"); + }); + t.Join(); + TEST_CHECK_MSG(false, "Survived execve? (main)"); + _exit(1); +} + +INSTANTIATE_TEST_SUITE_P(TraceExec, PtraceExecveTest, ::testing::Bool()); + +// This test has expectations on when syscall-enter/exit-stops occur that are +// violated if saving occurs, since saving interrupts all syscalls, causing +// premature syscall-exit. +TEST(PtraceTest, + ExitWhenParentIsNotTracer_Syscall_TraceVfork_TraceVforkDone_NoRandomSave) { + constexpr int kExitTraceeExitCode = 99; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Block SIGCHLD so it doesn't interrupt wait4. + sigset_t mask; + TEST_PCHECK(sigemptyset(&mask) == 0); + TEST_PCHECK(sigaddset(&mask, SIGCHLD) == 0); + TEST_PCHECK(sigprocmask(SIG_SETMASK, &mask, nullptr) == 0); + MaybeSave(); + + // Enable tracing, then raise SIGSTOP and expect our parent to suppress it. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + RaiseSignal(SIGSTOP); + MaybeSave(); + + // Spawn a vfork child that exits immediately, and reap it. Don't save + // after vfork since the parent expects to see wait4 as the next syscall. + pid_t const pid = vfork(); + if (pid == 0) { + _exit(kExitTraceeExitCode); + } + TEST_PCHECK_MSG(pid > 0, "vfork failed"); + + int status; + TEST_PCHECK(wait4(pid, &status, 0, nullptr) > 0); + MaybeSave(); + TEST_CHECK(WIFEXITED(status) && WEXITSTATUS(status) == kExitTraceeExitCode); + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(child_pid, SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Enable PTRACE_O_TRACEVFORK so we can get the ID of the grandchild, + // PTRACE_O_TRACEVFORKDONE so we can observe PTRACE_EVENT_VFORK_DONE, and + // PTRACE_O_TRACESYSGOOD so syscall-enter/exit-stops are unambiguously + // indicated by a stop signal of SIGTRAP|0x80 rather than just SIGTRAP. + ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, + PTRACE_O_TRACEVFORK | PTRACE_O_TRACEVFORKDONE | + PTRACE_O_TRACESYSGOOD), + SyscallSucceeds()); + + // Suppress the SIGSTOP and wait for the child to report PTRACE_EVENT_VFORK. + // Get the new process' ID from the event. + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK << 8), status >> 8); + unsigned long eventmsg; + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg), + SyscallSucceeds()); + pid_t const grandchild_pid = eventmsg; + + // The grandchild should be traced by us and in signal-delivery-stop by + // SIGSTOP due to PTRACE_O_TRACEVFORK. This allows us to wait on it even + // though we're not its parent. + ASSERT_THAT(waitpid(grandchild_pid, &status, 0), + SyscallSucceedsWithValue(grandchild_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Resume the child with PTRACE_SYSCALL. Since the grandchild is still in + // signal-delivery-stop, the child should remain in vfork() waiting for the + // grandchild to exec or exit. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); + ASSERT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Suppress the grandchild's SIGSTOP and wait for the grandchild to exit. Pass + // WNOWAIT to waitid() so that we don't acknowledge the grandchild's exit yet. + ASSERT_THAT(ptrace(PTRACE_CONT, grandchild_pid, 0, 0), SyscallSucceeds()); + siginfo_t siginfo = {}; + ASSERT_THAT(waitid(P_PID, grandchild_pid, &siginfo, WEXITED | WNOWAIT), + SyscallSucceeds()); + EXPECT_EQ(SIGCHLD, siginfo.si_signo); + EXPECT_EQ(CLD_EXITED, siginfo.si_code); + EXPECT_EQ(kExitTraceeExitCode, siginfo.si_status); + EXPECT_EQ(grandchild_pid, siginfo.si_pid); + EXPECT_EQ(getuid(), siginfo.si_uid); + + // The child should now be in PTRACE_EVENT_VFORK_DONE stop. The event + // message should still be the grandchild's PID. + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK_DONE << 8), status >> 8); + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg), + SyscallSucceeds()); + EXPECT_EQ(grandchild_pid, eventmsg); + + // Resume the child with PTRACE_SYSCALL again and expect it to enter + // syscall-exit-stop for vfork() or clone(), either of which should return the + // grandchild's PID from the syscall. Aside from PTRACE_O_TRACESYSGOOD, + // syscall-stops are distinguished from signal-delivery-stop by + // PTRACE_GETSIGINFO returning a siginfo for which si_code == SIGTRAP or + // SIGTRAP|0x80. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) + << " status " << status; + ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo), + SyscallSucceeds()); + EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80)) + << "si_code = " << siginfo.si_code; + + { + struct user_regs_struct regs = {}; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov), + SyscallSucceeds()); +#if defined(__x86_64__) + EXPECT_TRUE(regs.orig_rax == SYS_vfork || regs.orig_rax == SYS_clone) + << "orig_rax = " << regs.orig_rax; + EXPECT_EQ(grandchild_pid, regs.rax); +#elif defined(__aarch64__) + EXPECT_TRUE(regs.regs[8] == SYS_clone) << "regs[8] = " << regs.regs[8]; + EXPECT_EQ(grandchild_pid, regs.regs[0]); +#endif // defined(__x86_64__) + } + + // After this point, the child will be making wait4 syscalls that will be + // interrupted by saving, so saving is not permitted. Note that this is + // explicitly released below once the grandchild exits. + DisableSave ds; + + // Resume the child with PTRACE_SYSCALL again and expect it to enter + // syscall-enter-stop for wait4(). + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) + << " status " << status; + ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo), + SyscallSucceeds()); + EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80)) + << "si_code = " << siginfo.si_code; +#ifdef __x86_64__ + { + EXPECT_THAT(ptrace(PTRACE_PEEKUSER, child_pid, + offsetof(struct user_regs_struct, orig_rax), 0), + SyscallSucceedsWithValue(SYS_wait4)); + } +#endif // defined(__x86_64__) + + // Resume the child with PTRACE_SYSCALL again. Since the grandchild is + // waiting for the tracer (us) to acknowledge its exit first, wait4 should + // block. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); + ASSERT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Acknowledge the grandchild's exit. + ASSERT_THAT(waitpid(grandchild_pid, &status, 0), + SyscallSucceedsWithValue(grandchild_pid)); + ds.reset(); + + // Now the child should enter syscall-exit-stop for wait4, returning with the + // grandchild's PID. + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) + << " status " << status; + { + struct user_regs_struct regs = {}; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov), + SyscallSucceeds()); +#if defined(__x86_64__) + EXPECT_EQ(SYS_wait4, regs.orig_rax); + EXPECT_EQ(grandchild_pid, regs.rax); +#elif defined(__aarch64__) + EXPECT_EQ(SYS_wait4, regs.regs[8]); + EXPECT_EQ(grandchild_pid, regs.regs[0]); +#endif // defined(__x86_64__) + } + + // Detach from the child and wait for it to exit. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +// These tests requires knowledge of architecture-specific syscall convention. +#ifdef __x86_64__ +TEST(PtraceTest, Int3) { + SKIP_IF(PlatformSupportInt3() == PlatformSupport::NotSupported); + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Enable tracing. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + + // Interrupt 3 - trap to debugger + asm("int3"); + + _exit(56); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << " status " << status; + + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + + // The child should validate the injected return value and then exit normally. + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 56) + << " status " << status; +} + +TEST(PtraceTest, Sysemu_PokeUser) { + constexpr int kSysemuHelperFirstExitCode = 126; + constexpr uint64_t kSysemuInjectedExitGroupReturn = 42; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Enable tracing, then raise SIGSTOP and expect our parent to suppress it. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + RaiseSignal(SIGSTOP); + + // Try to exit_group, expecting the tracer to skip the syscall and set its + // own return value. + int const rv = syscall(SYS_exit_group, kSysemuHelperFirstExitCode); + TEST_PCHECK_MSG(rv == kSysemuInjectedExitGroupReturn, + "exit_group returned incorrect value"); + + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop + // for its first exit_group syscall. + ASSERT_THAT(ptrace(kPtraceSysemu, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << " status " << status; + + struct user_regs_struct regs = {}; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + EXPECT_EQ(SYS_exit_group, regs.orig_rax); + EXPECT_EQ(-ENOSYS, regs.rax); + EXPECT_EQ(kSysemuHelperFirstExitCode, regs.rdi); + + // Replace the exit_group return value, then resume the child, which should + // automatically skip the syscall. + ASSERT_THAT( + ptrace(PTRACE_POKEUSER, child_pid, offsetof(struct user_regs_struct, rax), + kSysemuInjectedExitGroupReturn), + SyscallSucceeds()); + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + + // The child should validate the injected return value and then exit normally. + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +// This test also cares about syscall-exit-stop. +TEST(PtraceTest, ERESTART_NoRandomSave) { + constexpr int kSigno = SIGUSR1; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Ignore, but unblock, kSigno. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + TEST_PCHECK(sigfillset(&sa.sa_mask) == 0); + TEST_PCHECK(sigaction(kSigno, &sa, nullptr) == 0); + MaybeSave(); + TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0); + MaybeSave(); + + // Enable tracing, then raise SIGSTOP and expect our parent to suppress it. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + RaiseSignal(SIGSTOP); + + // Invoke the pause syscall, which normally should not return until we + // receive a signal that "either terminates the process or causes the + // invocation of a signal-catching function". + pause(); + + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // After this point, the child's pause syscall will be interrupted by saving, + // so saving is not permitted. Note that this is explicitly released below + // once the child is stopped. + DisableSave ds; + + // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop for + // its pause syscall. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << " status " << status; + + struct user_regs_struct regs = {}; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + EXPECT_EQ(SYS_pause, regs.orig_rax); + EXPECT_EQ(-ENOSYS, regs.rax); + + // Resume the child with PTRACE_SYSCALL and expect it to block in the pause + // syscall. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); + ASSERT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Send the child kSigno, causing it to return ERESTARTNOHAND and enter + // syscall-exit-stop from the pause syscall. + constexpr int ERESTARTNOHAND = 514; + ASSERT_THAT(kill(child_pid, kSigno), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << " status " << status; + ds.reset(); + + ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + EXPECT_EQ(SYS_pause, regs.orig_rax); + EXPECT_EQ(-ERESTARTNOHAND, regs.rax); + + // Replace the return value from pause with 0, causing pause to not be + // restarted despite kSigno being ignored. + ASSERT_THAT(ptrace(PTRACE_POKEUSER, child_pid, + offsetof(struct user_regs_struct, rax), 0), + SyscallSucceeds()); + + // Detach from the child and wait for it to exit. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} +#endif // defined(__x86_64__) + +TEST(PtraceTest, Seize_Interrupt_Listen) { + volatile long child_should_spin = 1; + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + while (child_should_spin) { + SleepSafe(absl::Seconds(1)); + } + _exit(1); + } + + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Attach to the child with PTRACE_SEIZE; doing so should not stop the child. + ASSERT_THAT(ptrace(PTRACE_SEIZE, child_pid, 0, 0), SyscallSucceeds()); + int status; + EXPECT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Stop the child with PTRACE_INTERRUPT. + ASSERT_THAT(ptrace(PTRACE_INTERRUPT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGTRAP | (kPtraceEventStop << 8), status >> 8); + + // Unset child_should_spin to verify that the child never leaves the spin + // loop. + ASSERT_THAT(ptrace(PTRACE_POKEDATA, child_pid, &child_should_spin, 0), + SyscallSucceeds()); + + // Send SIGSTOP to the child, then resume it, allowing it to proceed to + // signal-delivery-stop. + ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds()); + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Release the child from signal-delivery-stop without suppressing the + // SIGSTOP, causing it to enter group-stop. + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, SIGSTOP), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGSTOP | (kPtraceEventStop << 8), status >> 8); + + // "The state of the tracee after PTRACE_LISTEN is somewhat of a gray area: it + // is not in any ptrace-stop (ptrace commands won't work on it, and it will + // deliver waitpid(2) notifications), but it also may be considered 'stopped' + // because it is not executing instructions (is not scheduled), and if it was + // in group-stop before PTRACE_LISTEN, it will not respond to signals until + // SIGCONT is received." - ptrace(2). + ASSERT_THAT(ptrace(PTRACE_LISTEN, child_pid, 0, 0), SyscallSucceeds()); + EXPECT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), + SyscallFailsWithErrno(ESRCH)); + EXPECT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(kill(child_pid, SIGTERM), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); + EXPECT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Send SIGCONT to the child, causing it to leave group-stop and re-trap due + // to PTRACE_LISTEN. + EXPECT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGTRAP | (kPtraceEventStop << 8), status >> 8); + + // Detach the child and expect it to exit due to the SIGTERM we sent while + // it was stopped by PTRACE_LISTEN. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGTERM) + << " status " << status; +} + +TEST(PtraceTest, Interrupt_Listen_RequireSeize) { + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + raise(SIGSTOP); + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // PTRACE_INTERRUPT and PTRACE_LISTEN should fail since the child wasn't + // attached with PTRACE_SEIZE, leaving the child in signal-delivery-stop. + EXPECT_THAT(ptrace(PTRACE_INTERRUPT, child_pid, 0, 0), + SyscallFailsWithErrno(EIO)); + EXPECT_THAT(ptrace(PTRACE_LISTEN, child_pid, 0, 0), + SyscallFailsWithErrno(EIO)); + + // Suppress SIGSTOP and detach from the child, expecting it to exit normally. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST(PtraceTest, SeizeSetOptions) { + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + while (true) { + SleepSafe(absl::Seconds(1)); + } + } + + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Attach to the child with PTRACE_SEIZE while setting PTRACE_O_TRACESYSGOOD. + ASSERT_THAT(ptrace(PTRACE_SEIZE, child_pid, 0, PTRACE_O_TRACESYSGOOD), + SyscallSucceeds()); + + // Stop the child with PTRACE_INTERRUPT. + ASSERT_THAT(ptrace(PTRACE_INTERRUPT, child_pid, 0, 0), SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGTRAP | (kPtraceEventStop << 8), status >> 8); + + // Resume the child with PTRACE_SYSCALL and wait for it to enter + // syscall-enter-stop. The stop signal status from the syscall stop should be + // SIGTRAP|0x80, reflecting PTRACE_O_TRACESYSGOOD. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) + << " status " << status; + + // Clean up the child. + ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + if (WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) { + // "SIGKILL kills even within system calls (syscall-exit-stop is not + // generated prior to death by SIGKILL). The net effect is that SIGKILL + // always kills the process (all its threads), even if some threads of the + // process are ptraced." - ptrace(2). This is technically true, but... + // + // When we send SIGKILL to the child, kernel/signal.c:complete_signal() => + // signal_wake_up(resume=1) kicks the tracee out of the syscall-enter-stop. + // The pending SIGKILL causes the syscall to be skipped, but the child + // thread still reports syscall-exit before checking for pending signals; in + // current kernels, this is + // arch/x86/entry/common.c:syscall_return_slowpath() => + // syscall_slow_exit_work() => + // include/linux/tracehook.h:tracehook_report_syscall_exit() => + // ptrace_report_syscall() => kernel/signal.c:ptrace_notify() => + // ptrace_do_notify() => ptrace_stop(). + // + // ptrace_stop() sets the task's state to TASK_TRACED and the task's + // exit_code to SIGTRAP|0x80 (passed by ptrace_report_syscall()), then calls + // freezable_schedule(). freezable_schedule() eventually reaches + // __schedule(), which detects signal_pending_state() due to the pending + // SIGKILL, sets the task's state back to TASK_RUNNING, and returns without + // descheduling. Thus, the task never enters syscall-exit-stop. However, if + // our wait4() => kernel/exit.c:wait_task_stopped() racily observes the + // TASK_TRACED state and the non-zero exit code set by ptrace_stop() before + // __schedule() sets the state back to TASK_RUNNING, it will return the + // task's exit_code as status W_STOPCODE(SIGTRAP|0x80). So we get a spurious + // syscall-exit-stop notification, and need to wait4() again for task exit. + // + // gVisor is not susceptible to this race because + // kernel.Task.waitCollectTraceeStopLocked() checks specifically for an + // active ptraceStop, which is not initiated if SIGKILL is pending. + std::cout << "Observed syscall-exit after SIGKILL" << std::endl; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + } + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (absl::GetFlag(FLAGS_ptrace_test_execve_child)) { + gvisor::testing::RunExecveChild(); + } + + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc new file mode 100644 index 000000000..f9392b9e0 --- /dev/null +++ b/test/syscalls/linux/pty.cc @@ -0,0 +1,1627 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <linux/capability.h> +#include <linux/major.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <termios.h> +#include <unistd.h> + +#include <iostream> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/strings/str_cat.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/pty_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::AnyOf; +using ::testing::Contains; +using ::testing::Eq; +using ::testing::Not; + +// Tests Unix98 pseudoterminals. +// +// These tests assume that /dev/ptmx exists and is associated with a devpts +// filesystem mounted at /dev/pts/. While a Linux distribution could +// theoretically place those anywhere, glibc expects those locations, so they +// are effectively fixed. + +// Minor device number for an unopened ptmx file. +constexpr int kPtmxMinor = 2; + +// The timeout when polling for data from a pty. When data is written to one end +// of a pty, Linux asynchronously makes it available to the other end, so we +// have to wait. +constexpr absl::Duration kTimeout = absl::Seconds(20); + +// The maximum line size in bytes returned per read from a pty file. +constexpr int kMaxLineSize = 4096; + +constexpr char kMasterPath[] = "/dev/ptmx"; + +// glibc defines its own, different, version of struct termios. We care about +// what the kernel does, not glibc. +#define KERNEL_NCCS 19 +struct kernel_termios { + tcflag_t c_iflag; + tcflag_t c_oflag; + tcflag_t c_cflag; + tcflag_t c_lflag; + cc_t c_line; + cc_t c_cc[KERNEL_NCCS]; +}; + +bool operator==(struct kernel_termios const& a, + struct kernel_termios const& b) { + return memcmp(&a, &b, sizeof(a)) == 0; +} + +// Returns the termios-style control character for the passed character. +// +// e.g., for Ctrl-C, i.e., ^C, call ControlCharacter('C'). +// +// Standard control characters are ASCII bytes 0 through 31. +constexpr char ControlCharacter(char c) { + // A is 1, B is 2, etc. + return c - 'A' + 1; +} + +// Returns the printable character the given control character represents. +constexpr char FromControlCharacter(char c) { return c + 'A' - 1; } + +// Returns true if c is a control character. +// +// Standard control characters are ASCII bytes 0 through 31. +constexpr bool IsControlCharacter(char c) { return c <= 31; } + +struct Field { + const char* name; + uint64_t mask; + uint64_t value; +}; + +// ParseFields returns a string representation of value, using the names in +// fields. +std::string ParseFields(const Field* fields, size_t len, uint64_t value) { + bool first = true; + std::string s; + for (size_t i = 0; i < len; i++) { + const Field f = fields[i]; + if ((value & f.mask) == f.value) { + if (!first) { + s += "|"; + } + s += f.name; + first = false; + value &= ~f.mask; + } + } + + if (value) { + if (!first) { + s += "|"; + } + absl::StrAppend(&s, value); + } + + return s; +} + +const Field kIflagFields[] = { + {"IGNBRK", IGNBRK, IGNBRK}, {"BRKINT", BRKINT, BRKINT}, + {"IGNPAR", IGNPAR, IGNPAR}, {"PARMRK", PARMRK, PARMRK}, + {"INPCK", INPCK, INPCK}, {"ISTRIP", ISTRIP, ISTRIP}, + {"INLCR", INLCR, INLCR}, {"IGNCR", IGNCR, IGNCR}, + {"ICRNL", ICRNL, ICRNL}, {"IUCLC", IUCLC, IUCLC}, + {"IXON", IXON, IXON}, {"IXANY", IXANY, IXANY}, + {"IXOFF", IXOFF, IXOFF}, {"IMAXBEL", IMAXBEL, IMAXBEL}, + {"IUTF8", IUTF8, IUTF8}, +}; + +const Field kOflagFields[] = { + {"OPOST", OPOST, OPOST}, {"OLCUC", OLCUC, OLCUC}, + {"ONLCR", ONLCR, ONLCR}, {"OCRNL", OCRNL, OCRNL}, + {"ONOCR", ONOCR, ONOCR}, {"ONLRET", ONLRET, ONLRET}, + {"OFILL", OFILL, OFILL}, {"OFDEL", OFDEL, OFDEL}, + {"NL0", NLDLY, NL0}, {"NL1", NLDLY, NL1}, + {"CR0", CRDLY, CR0}, {"CR1", CRDLY, CR1}, + {"CR2", CRDLY, CR2}, {"CR3", CRDLY, CR3}, + {"TAB0", TABDLY, TAB0}, {"TAB1", TABDLY, TAB1}, + {"TAB2", TABDLY, TAB2}, {"TAB3", TABDLY, TAB3}, + {"BS0", BSDLY, BS0}, {"BS1", BSDLY, BS1}, + {"FF0", FFDLY, FF0}, {"FF1", FFDLY, FF1}, + {"VT0", VTDLY, VT0}, {"VT1", VTDLY, VT1}, + {"XTABS", XTABS, XTABS}, +}; + +#ifndef IBSHIFT +// Shift from CBAUD to CIBAUD. +#define IBSHIFT 16 +#endif + +const Field kCflagFields[] = { + {"B0", CBAUD, B0}, + {"B50", CBAUD, B50}, + {"B75", CBAUD, B75}, + {"B110", CBAUD, B110}, + {"B134", CBAUD, B134}, + {"B150", CBAUD, B150}, + {"B200", CBAUD, B200}, + {"B300", CBAUD, B300}, + {"B600", CBAUD, B600}, + {"B1200", CBAUD, B1200}, + {"B1800", CBAUD, B1800}, + {"B2400", CBAUD, B2400}, + {"B4800", CBAUD, B4800}, + {"B9600", CBAUD, B9600}, + {"B19200", CBAUD, B19200}, + {"B38400", CBAUD, B38400}, + {"CS5", CSIZE, CS5}, + {"CS6", CSIZE, CS6}, + {"CS7", CSIZE, CS7}, + {"CS8", CSIZE, CS8}, + {"CSTOPB", CSTOPB, CSTOPB}, + {"CREAD", CREAD, CREAD}, + {"PARENB", PARENB, PARENB}, + {"PARODD", PARODD, PARODD}, + {"HUPCL", HUPCL, HUPCL}, + {"CLOCAL", CLOCAL, CLOCAL}, + {"B57600", CBAUD, B57600}, + {"B115200", CBAUD, B115200}, + {"B230400", CBAUD, B230400}, + {"B460800", CBAUD, B460800}, + {"B500000", CBAUD, B500000}, + {"B576000", CBAUD, B576000}, + {"B921600", CBAUD, B921600}, + {"B1000000", CBAUD, B1000000}, + {"B1152000", CBAUD, B1152000}, + {"B1500000", CBAUD, B1500000}, + {"B2000000", CBAUD, B2000000}, + {"B2500000", CBAUD, B2500000}, + {"B3000000", CBAUD, B3000000}, + {"B3500000", CBAUD, B3500000}, + {"B4000000", CBAUD, B4000000}, + {"CMSPAR", CMSPAR, CMSPAR}, + {"CRTSCTS", CRTSCTS, CRTSCTS}, + {"IB0", CIBAUD, B0 << IBSHIFT}, + {"IB50", CIBAUD, B50 << IBSHIFT}, + {"IB75", CIBAUD, B75 << IBSHIFT}, + {"IB110", CIBAUD, B110 << IBSHIFT}, + {"IB134", CIBAUD, B134 << IBSHIFT}, + {"IB150", CIBAUD, B150 << IBSHIFT}, + {"IB200", CIBAUD, B200 << IBSHIFT}, + {"IB300", CIBAUD, B300 << IBSHIFT}, + {"IB600", CIBAUD, B600 << IBSHIFT}, + {"IB1200", CIBAUD, B1200 << IBSHIFT}, + {"IB1800", CIBAUD, B1800 << IBSHIFT}, + {"IB2400", CIBAUD, B2400 << IBSHIFT}, + {"IB4800", CIBAUD, B4800 << IBSHIFT}, + {"IB9600", CIBAUD, B9600 << IBSHIFT}, + {"IB19200", CIBAUD, B19200 << IBSHIFT}, + {"IB38400", CIBAUD, B38400 << IBSHIFT}, + {"IB57600", CIBAUD, B57600 << IBSHIFT}, + {"IB115200", CIBAUD, B115200 << IBSHIFT}, + {"IB230400", CIBAUD, B230400 << IBSHIFT}, + {"IB460800", CIBAUD, B460800 << IBSHIFT}, + {"IB500000", CIBAUD, B500000 << IBSHIFT}, + {"IB576000", CIBAUD, B576000 << IBSHIFT}, + {"IB921600", CIBAUD, B921600 << IBSHIFT}, + {"IB1000000", CIBAUD, B1000000 << IBSHIFT}, + {"IB1152000", CIBAUD, B1152000 << IBSHIFT}, + {"IB1500000", CIBAUD, B1500000 << IBSHIFT}, + {"IB2000000", CIBAUD, B2000000 << IBSHIFT}, + {"IB2500000", CIBAUD, B2500000 << IBSHIFT}, + {"IB3000000", CIBAUD, B3000000 << IBSHIFT}, + {"IB3500000", CIBAUD, B3500000 << IBSHIFT}, + {"IB4000000", CIBAUD, B4000000 << IBSHIFT}, +}; + +const Field kLflagFields[] = { + {"ISIG", ISIG, ISIG}, {"ICANON", ICANON, ICANON}, + {"XCASE", XCASE, XCASE}, {"ECHO", ECHO, ECHO}, + {"ECHOE", ECHOE, ECHOE}, {"ECHOK", ECHOK, ECHOK}, + {"ECHONL", ECHONL, ECHONL}, {"NOFLSH", NOFLSH, NOFLSH}, + {"TOSTOP", TOSTOP, TOSTOP}, {"ECHOCTL", ECHOCTL, ECHOCTL}, + {"ECHOPRT", ECHOPRT, ECHOPRT}, {"ECHOKE", ECHOKE, ECHOKE}, + {"FLUSHO", FLUSHO, FLUSHO}, {"PENDIN", PENDIN, PENDIN}, + {"IEXTEN", IEXTEN, IEXTEN}, {"EXTPROC", EXTPROC, EXTPROC}, +}; + +std::string FormatCC(char c) { + if (isgraph(c)) { + return std::string(1, c); + } else if (c == ' ') { + return " "; + } else if (c == '\t') { + return "\\t"; + } else if (c == '\r') { + return "\\r"; + } else if (c == '\n') { + return "\\n"; + } else if (c == '\0') { + return "\\0"; + } else if (IsControlCharacter(c)) { + return absl::StrCat("^", std::string(1, FromControlCharacter(c))); + } + return absl::StrCat("\\x", absl::Hex(c)); +} + +std::ostream& operator<<(std::ostream& os, struct kernel_termios const& a) { + os << "{ c_iflag = " + << ParseFields(kIflagFields, ABSL_ARRAYSIZE(kIflagFields), a.c_iflag); + os << ", c_oflag = " + << ParseFields(kOflagFields, ABSL_ARRAYSIZE(kOflagFields), a.c_oflag); + os << ", c_cflag = " + << ParseFields(kCflagFields, ABSL_ARRAYSIZE(kCflagFields), a.c_cflag); + os << ", c_lflag = " + << ParseFields(kLflagFields, ABSL_ARRAYSIZE(kLflagFields), a.c_lflag); + os << ", c_line = " << a.c_line; + os << ", c_cc = { [VINTR] = '" << FormatCC(a.c_cc[VINTR]); + os << "', [VQUIT] = '" << FormatCC(a.c_cc[VQUIT]); + os << "', [VERASE] = '" << FormatCC(a.c_cc[VERASE]); + os << "', [VKILL] = '" << FormatCC(a.c_cc[VKILL]); + os << "', [VEOF] = '" << FormatCC(a.c_cc[VEOF]); + os << "', [VTIME] = '" << static_cast<int>(a.c_cc[VTIME]); + os << "', [VMIN] = " << static_cast<int>(a.c_cc[VMIN]); + os << ", [VSWTC] = '" << FormatCC(a.c_cc[VSWTC]); + os << "', [VSTART] = '" << FormatCC(a.c_cc[VSTART]); + os << "', [VSTOP] = '" << FormatCC(a.c_cc[VSTOP]); + os << "', [VSUSP] = '" << FormatCC(a.c_cc[VSUSP]); + os << "', [VEOL] = '" << FormatCC(a.c_cc[VEOL]); + os << "', [VREPRINT] = '" << FormatCC(a.c_cc[VREPRINT]); + os << "', [VDISCARD] = '" << FormatCC(a.c_cc[VDISCARD]); + os << "', [VWERASE] = '" << FormatCC(a.c_cc[VWERASE]); + os << "', [VLNEXT] = '" << FormatCC(a.c_cc[VLNEXT]); + os << "', [VEOL2] = '" << FormatCC(a.c_cc[VEOL2]); + os << "'}"; + return os; +} + +// Return the default termios settings for a new terminal. +struct kernel_termios DefaultTermios() { + struct kernel_termios t = {}; + t.c_iflag = IXON | ICRNL; + t.c_oflag = OPOST | ONLCR; + t.c_cflag = B38400 | CSIZE | CS8 | CREAD; + t.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN; + t.c_line = 0; + t.c_cc[VINTR] = ControlCharacter('C'); + t.c_cc[VQUIT] = ControlCharacter('\\'); + t.c_cc[VERASE] = '\x7f'; + t.c_cc[VKILL] = ControlCharacter('U'); + t.c_cc[VEOF] = ControlCharacter('D'); + t.c_cc[VTIME] = '\0'; + t.c_cc[VMIN] = 1; + t.c_cc[VSWTC] = '\0'; + t.c_cc[VSTART] = ControlCharacter('Q'); + t.c_cc[VSTOP] = ControlCharacter('S'); + t.c_cc[VSUSP] = ControlCharacter('Z'); + t.c_cc[VEOL] = '\0'; + t.c_cc[VREPRINT] = ControlCharacter('R'); + t.c_cc[VDISCARD] = ControlCharacter('O'); + t.c_cc[VWERASE] = ControlCharacter('W'); + t.c_cc[VLNEXT] = ControlCharacter('V'); + t.c_cc[VEOL2] = '\0'; + return t; +} + +// PollAndReadFd tries to read count bytes from buf within timeout. +// +// Returns a partial read if some bytes were read. +// +// fd must be non-blocking. +PosixErrorOr<size_t> PollAndReadFd(int fd, void* buf, size_t count, + absl::Duration timeout) { + absl::Time end = absl::Now() + timeout; + + size_t completed = 0; + absl::Duration remaining; + while ((remaining = end - absl::Now()) > absl::ZeroDuration()) { + struct pollfd pfd = {fd, POLLIN, 0}; + int ret = RetryEINTR(poll)(&pfd, 1, absl::ToInt64Milliseconds(remaining)); + if (ret < 0) { + return PosixError(errno, "poll failed"); + } else if (ret == 0) { + // Timed out. + continue; + } else if (ret != 1) { + return PosixError(EINVAL, absl::StrCat("Bad poll ret ", ret)); + } + + ssize_t n = + ReadFd(fd, static_cast<char*>(buf) + completed, count - completed); + if (n < 0) { + if (errno == EAGAIN) { + // Linux sometimes returns EAGAIN from this read, despite the fact that + // poll returned success. Let's just do what do as we are told and try + // again. + continue; + } + return PosixError(errno, "read failed"); + } + completed += n; + if (completed >= count) { + return completed; + } + } + + if (completed) { + return completed; + } + return PosixError(ETIMEDOUT, "Poll timed out"); +} + +TEST(PtyTrunc, Truncate) { + // Opening PTYs with O_TRUNC shouldn't cause an error, but calls to + // (f)truncate should. + FileDescriptor master = + ASSERT_NO_ERRNO_AND_VALUE(Open(kMasterPath, O_RDWR | O_TRUNC)); + int n = ASSERT_NO_ERRNO_AND_VALUE(SlaveID(master)); + std::string spath = absl::StrCat("/dev/pts/", n); + FileDescriptor slave = + ASSERT_NO_ERRNO_AND_VALUE(Open(spath, O_RDWR | O_NONBLOCK | O_TRUNC)); + + EXPECT_THAT(truncate(kMasterPath, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(truncate(spath.c_str(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(ftruncate(master.get(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(ftruncate(slave.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(BasicPtyTest, StatUnopenedMaster) { + struct stat s; + ASSERT_THAT(stat(kMasterPath, &s), SyscallSucceeds()); + + EXPECT_EQ(s.st_rdev, makedev(TTYAUX_MAJOR, kPtmxMinor)); + EXPECT_EQ(s.st_size, 0); + EXPECT_EQ(s.st_blocks, 0); + + // ptmx attached to a specific devpts mount uses block size 1024. See + // fs/devpts/inode.c:devpts_fill_super. + // + // The global ptmx device uses the block size of the filesystem it is created + // on (which is usually 4096 for disk filesystems). + EXPECT_THAT(s.st_blksize, AnyOf(Eq(1024), Eq(4096))); +} + +// Waits for count bytes to be readable from fd. Unlike poll, which can return +// before all data is moved into a pty's read buffer, this function waits for +// all count bytes to become readable. +PosixErrorOr<int> WaitUntilReceived(int fd, int count) { + int buffered = -1; + absl::Duration remaining; + absl::Time end = absl::Now() + kTimeout; + while ((remaining = end - absl::Now()) > absl::ZeroDuration()) { + if (ioctl(fd, FIONREAD, &buffered) < 0) { + return PosixError(errno, "failed FIONREAD ioctl"); + } + if (buffered >= count) { + return buffered; + } + absl::SleepFor(absl::Milliseconds(500)); + } + return PosixError( + ETIMEDOUT, + absl::StrFormat( + "FIONREAD timed out, receiving only %d of %d expected bytes", + buffered, count)); +} + +// Verifies that there is nothing left to read from fd. +void ExpectFinished(const FileDescriptor& fd) { + // Nothing more to read. + char c; + EXPECT_THAT(ReadFd(fd.get(), &c, 1), SyscallFailsWithErrno(EAGAIN)); +} + +// Verifies that we can read expected bytes from fd into buf. +void ExpectReadable(const FileDescriptor& fd, int expected, char* buf) { + size_t n = ASSERT_NO_ERRNO_AND_VALUE( + PollAndReadFd(fd.get(), buf, expected, kTimeout)); + EXPECT_EQ(expected, n); +} + +TEST(BasicPtyTest, OpenMasterSlave) { + FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master)); +} + +// The slave entry in /dev/pts/ disappears when the master is closed, even if +// the slave is still open. +TEST(BasicPtyTest, SlaveEntryGoneAfterMasterClose) { + FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master)); + + // Get pty index. + int index = -1; + ASSERT_THAT(ioctl(master.get(), TIOCGPTN, &index), SyscallSucceeds()); + + std::string path = absl::StrCat("/dev/pts/", index); + + struct stat st; + EXPECT_THAT(stat(path.c_str(), &st), SyscallSucceeds()); + + master.reset(); + + EXPECT_THAT(stat(path.c_str(), &st), SyscallFailsWithErrno(ENOENT)); +} + +TEST(BasicPtyTest, Getdents) { + FileDescriptor master1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + int index1 = -1; + ASSERT_THAT(ioctl(master1.get(), TIOCGPTN, &index1), SyscallSucceeds()); + FileDescriptor slave1 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master1)); + + FileDescriptor master2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + int index2 = -1; + ASSERT_THAT(ioctl(master2.get(), TIOCGPTN, &index2), SyscallSucceeds()); + FileDescriptor slave2 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master2)); + + // The directory contains ptmx, index1, and index2. (Plus any additional PTYs + // unrelated to this test.) + + std::vector<std::string> contents = + ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true)); + EXPECT_THAT(contents, Contains(absl::StrCat(index1))); + EXPECT_THAT(contents, Contains(absl::StrCat(index2))); + + master2.reset(); + + // The directory contains ptmx and index1, but not index2 since the master is + // closed. (Plus any additional PTYs unrelated to this test.) + + contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true)); + EXPECT_THAT(contents, Contains(absl::StrCat(index1))); + EXPECT_THAT(contents, Not(Contains(absl::StrCat(index2)))); + + // N.B. devpts supports legacy "single-instance" mode and new "multi-instance" + // mode. In legacy mode, devpts does not contain a "ptmx" device (the distro + // must use mknod to create it somewhere, presumably /dev/ptmx). + // Multi-instance mode does include a "ptmx" device tied to that mount. + // + // We don't check for the presence or absence of "ptmx", as distros vary in + // their usage of the two modes. +} + +class PtyTest : public ::testing::Test { + protected: + void SetUp() override { + master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK)); + slave_ = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master_)); + } + + void DisableCanonical() { + struct kernel_termios t = {}; + EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds()); + t.c_lflag &= ~ICANON; + EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + } + + void EnableCanonical() { + struct kernel_termios t = {}; + EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds()); + t.c_lflag |= ICANON; + EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + } + + // Master and slave ends of the PTY. Non-blocking. + FileDescriptor master_; + FileDescriptor slave_; +}; + +// Master to slave sanity test. +TEST_F(PtyTest, WriteMasterToSlave) { + // N.B. by default, the slave reads nothing until the master writes a newline. + constexpr char kBuf[] = "hello\n"; + + EXPECT_THAT(WriteFd(master_.get(), kBuf, sizeof(kBuf) - 1), + SyscallSucceedsWithValue(sizeof(kBuf) - 1)); + + // Linux moves data from the master to the slave via async work scheduled via + // tty_flip_buffer_push. Since it is asynchronous, the data may not be + // available for reading immediately. Instead we must poll and assert that it + // becomes available "soon". + + char buf[sizeof(kBuf)] = {}; + ExpectReadable(slave_, sizeof(buf) - 1, buf); + + EXPECT_EQ(memcmp(buf, kBuf, sizeof(kBuf)), 0); +} + +// Slave to master sanity test. +TEST_F(PtyTest, WriteSlaveToMaster) { + // N.B. by default, the master reads nothing until the slave writes a newline, + // and the master gets a carriage return. + constexpr char kInput[] = "hello\n"; + constexpr char kExpected[] = "hello\r\n"; + + EXPECT_THAT(WriteFd(slave_.get(), kInput, sizeof(kInput) - 1), + SyscallSucceedsWithValue(sizeof(kInput) - 1)); + + // Linux moves data from the master to the slave via async work scheduled via + // tty_flip_buffer_push. Since it is asynchronous, the data may not be + // available for reading immediately. Instead we must poll and assert that it + // becomes available "soon". + + char buf[sizeof(kExpected)] = {}; + ExpectReadable(master_, sizeof(buf) - 1, buf); + + EXPECT_EQ(memcmp(buf, kExpected, sizeof(kExpected)), 0); +} + +TEST_F(PtyTest, WriteInvalidUTF8) { + char c = 0xff; + ASSERT_THAT(syscall(__NR_write, master_.get(), &c, sizeof(c)), + SyscallSucceedsWithValue(sizeof(c))); +} + +// Both the master and slave report the standard default termios settings. +// +// Note that TCGETS on the master actually redirects to the slave (see comment +// on MasterTermiosUnchangable). +TEST_F(PtyTest, DefaultTermios) { + struct kernel_termios t = {}; + EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds()); + EXPECT_EQ(t, DefaultTermios()); + + EXPECT_THAT(ioctl(master_.get(), TCGETS, &t), SyscallSucceeds()); + EXPECT_EQ(t, DefaultTermios()); +} + +// Changing termios from the master actually affects the slave. +// +// TCSETS on the master actually redirects to the slave (see comment on +// MasterTermiosUnchangable). +TEST_F(PtyTest, TermiosAffectsSlave) { + struct kernel_termios master_termios = {}; + EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds()); + master_termios.c_lflag ^= ICANON; + EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds()); + + struct kernel_termios slave_termios = {}; + EXPECT_THAT(ioctl(slave_.get(), TCGETS, &slave_termios), SyscallSucceeds()); + EXPECT_EQ(master_termios, slave_termios); +} + +// The master end of the pty has termios: +// +// struct kernel_termios t = { +// .c_iflag = 0; +// .c_oflag = 0; +// .c_cflag = B38400 | CS8 | CREAD; +// .c_lflag = 0; +// .c_cc = /* same as DefaultTermios */ +// } +// +// (From drivers/tty/pty.c:unix98_pty_init) +// +// All termios control ioctls on the master actually redirect to the slave +// (drivers/tty/tty_ioctl.c:tty_mode_ioctl), making it impossible to change the +// master termios. +// +// Verify this by setting ICRNL (which rewrites input \r to \n) and verify that +// it has no effect on the master. +TEST_F(PtyTest, MasterTermiosUnchangable) { + struct kernel_termios master_termios = {}; + EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds()); + master_termios.c_lflag |= ICRNL; + EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds()); + + char c = '\r'; + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + ExpectReadable(master_, 1, &c); + EXPECT_EQ(c, '\r'); // ICRNL had no effect! + + ExpectFinished(master_); +} + +// ICRNL rewrites input \r to \n. +TEST_F(PtyTest, TermiosICRNL) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= ICRNL; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + char c = '\r'; + ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + ExpectReadable(slave_, 1, &c); + EXPECT_EQ(c, '\n'); + + ExpectFinished(slave_); +} + +// ONLCR rewrites output \n to \r\n. +TEST_F(PtyTest, TermiosONLCR) { + struct kernel_termios t = DefaultTermios(); + t.c_oflag |= ONLCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + char c = '\n'; + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + // Extra byte for NUL for EXPECT_STREQ. + char buf[3] = {}; + ExpectReadable(master_, 2, buf); + EXPECT_STREQ(buf, "\r\n"); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, TermiosIGNCR) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= IGNCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + char c = '\r'; + ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + // Nothing to read. + ASSERT_THAT(PollAndReadFd(slave_.get(), &c, 1, kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); +} + +// Test that we can successfully poll for readable data from the slave. +TEST_F(PtyTest, TermiosPollSlave) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= IGNCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + absl::Notification notify; + int sfd = slave_.get(); + ScopedThread th([sfd, ¬ify]() { + notify.Notify(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {sfd, POLLIN, 0}; + EXPECT_THAT( + RetryEINTR(poll)(&poll_fd, 1, absl::ToInt64Milliseconds(kTimeout)), + SyscallSucceedsWithValue(1)); + + // Should trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); + }); + + notify.WaitForNotification(); + // Sleep ensures that poll begins waiting before we write to the FD. + absl::SleepFor(absl::Seconds(1)); + + char s[] = "foo\n"; + ASSERT_THAT(WriteFd(master_.get(), s, strlen(s) + 1), SyscallSucceeds()); +} + +// Test that we can successfully poll for readable data from the master. +TEST_F(PtyTest, TermiosPollMaster) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= IGNCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(master_.get(), TCSETS, &t), SyscallSucceeds()); + + absl::Notification notify; + int mfd = master_.get(); + ScopedThread th([mfd, ¬ify]() { + notify.Notify(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {mfd, POLLIN, 0}; + EXPECT_THAT( + RetryEINTR(poll)(&poll_fd, 1, absl::ToInt64Milliseconds(kTimeout)), + SyscallSucceedsWithValue(1)); + + // Should trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); + }); + + notify.WaitForNotification(); + // Sleep ensures that poll begins waiting before we write to the FD. + absl::SleepFor(absl::Seconds(1)); + + char s[] = "foo\n"; + ASSERT_THAT(WriteFd(slave_.get(), s, strlen(s) + 1), SyscallSucceeds()); +} + +TEST_F(PtyTest, TermiosINLCR) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= INLCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + char c = '\n'; + ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + ExpectReadable(slave_, 1, &c); + EXPECT_EQ(c, '\r'); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, TermiosONOCR) { + struct kernel_termios t = DefaultTermios(); + t.c_oflag |= ONOCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + // The terminal is at column 0, so there should be no CR to read. + char c = '\r'; + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + // Nothing to read. + ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + + // This time the column is greater than 0, so we should be able to read the CR + // out of the other end. + constexpr char kInput[] = "foo\r"; + constexpr int kInputSize = sizeof(kInput) - 1; + ASSERT_THAT(WriteFd(slave_.get(), kInput, kInputSize), + SyscallSucceedsWithValue(kInputSize)); + + char buf[kInputSize] = {}; + ExpectReadable(master_, kInputSize, buf); + + EXPECT_EQ(memcmp(buf, kInput, kInputSize), 0); + + ExpectFinished(master_); + + // Terminal should be at column 0 again, so no CR can be read. + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + // Nothing to read. + ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); +} + +TEST_F(PtyTest, TermiosOCRNL) { + struct kernel_termios t = DefaultTermios(); + t.c_oflag |= OCRNL; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + // The terminal is at column 0, so there should be no CR to read. + char c = '\r'; + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + ExpectReadable(master_, 1, &c); + EXPECT_EQ(c, '\n'); + + ExpectFinished(master_); +} + +// Tests that VEOL is disabled when we start, and that we can set it to enable +// it. +TEST_F(PtyTest, VEOLTermination) { + // Write a few bytes ending with '\0', and confirm that we can't read. + constexpr char kInput[] = "hello"; + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + char buf[sizeof(kInput)] = {}; + ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + + // Set the EOL character to '=' and write it. + constexpr char delim = '='; + struct kernel_termios t = DefaultTermios(); + t.c_cc[VEOL] = delim; + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + + // Now we can read, as sending EOL caused the line to become available. + ExpectReadable(slave_, sizeof(kInput), buf); + EXPECT_EQ(memcmp(buf, kInput, sizeof(kInput)), 0); + + ExpectReadable(slave_, 1, buf); + EXPECT_EQ(buf[0], '='); + + ExpectFinished(slave_); +} + +// Tests that we can write more than the 4096 character limit, then a +// terminating character, then read out just the first 4095 bytes plus the +// terminator. +TEST_F(PtyTest, CanonBigWrite) { + constexpr int kWriteLen = kMaxLineSize + 4; + char input[kWriteLen]; + memset(input, 'M', kWriteLen - 1); + input[kWriteLen - 1] = '\n'; + ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen), + SyscallSucceedsWithValue(kWriteLen)); + + // We can read the line. + char buf[kMaxLineSize] = {}; + ExpectReadable(slave_, kMaxLineSize, buf); + + ExpectFinished(slave_); +} + +// Tests that data written in canonical mode can be read immediately once +// switched to noncanonical mode. +TEST_F(PtyTest, SwitchCanonToNoncanon) { + // Write a few bytes without a terminating character, switch to noncanonical + // mode, and read them. + constexpr char kInput[] = "hello"; + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + + // Nothing available yet. + char buf[sizeof(kInput)] = {}; + ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + + DisableCanonical(); + + ExpectReadable(slave_, sizeof(kInput), buf); + EXPECT_STREQ(buf, kInput); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchCanonToNonCanonNewline) { + // Write a few bytes with a terminating character. + constexpr char kInput[] = "hello\n"; + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + + DisableCanonical(); + + // We can read the line. + char buf[sizeof(kInput)] = {}; + ExpectReadable(slave_, sizeof(kInput), buf); + EXPECT_STREQ(buf, kInput); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchNoncanonToCanonNewlineBig) { + DisableCanonical(); + + // Write more than the maximum line size, then write a delimiter. + constexpr int kWriteLen = 4100; + char input[kWriteLen]; + memset(input, 'M', kWriteLen); + ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen), + SyscallSucceedsWithValue(kWriteLen)); + // Wait for the input queue to fill. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1)); + constexpr char delim = '\n'; + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + + EnableCanonical(); + + // We can read the line. + char buf[kMaxLineSize] = {}; + ExpectReadable(slave_, kMaxLineSize - 1, buf); + + // We can also read the remaining characters. + ExpectReadable(slave_, 6, buf); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchNoncanonToCanonNoNewline) { + DisableCanonical(); + + // Write a few bytes without a terminating character. + // mode, and read them. + constexpr char kInput[] = "hello"; + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput) - 1), + SyscallSucceedsWithValue(sizeof(kInput) - 1)); + + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput) - 1)); + EnableCanonical(); + + // We can read the line. + char buf[sizeof(kInput)] = {}; + ExpectReadable(slave_, sizeof(kInput) - 1, buf); + EXPECT_STREQ(buf, kInput); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchNoncanonToCanonNoNewlineBig) { + DisableCanonical(); + + // Write a few bytes without a terminating character. + // mode, and read them. + constexpr int kWriteLen = 4100; + char input[kWriteLen]; + memset(input, 'M', kWriteLen); + ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen), + SyscallSucceedsWithValue(kWriteLen)); + + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1)); + EnableCanonical(); + + // We can read the line. + char buf[kMaxLineSize] = {}; + ExpectReadable(slave_, kMaxLineSize - 1, buf); + + ExpectFinished(slave_); +} + +// Tests that we can write over the 4095 noncanonical limit, then read out +// everything. +TEST_F(PtyTest, NoncanonBigWrite) { + DisableCanonical(); + + // Write well over the 4095 internal buffer limit. + constexpr char kInput = 'M'; + constexpr int kInputSize = kMaxLineSize * 2; + for (int i = 0; i < kInputSize; i++) { + // This makes too many syscalls for save/restore. + const DisableSave ds; + ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + } + + // We should be able to read out everything. Sleep a bit so that Linux has a + // chance to move data from the master to the slave. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1)); + for (int i = 0; i < kInputSize; i++) { + // This makes too many syscalls for save/restore. + const DisableSave ds; + char c; + ExpectReadable(slave_, 1, &c); + ASSERT_EQ(c, kInput); + } + + ExpectFinished(slave_); +} + +// ICANON doesn't make input available until a line delimiter is typed. +// +// Test newline. +TEST_F(PtyTest, TermiosICANONNewline) { + char input[3] = {'a', 'b', 'c'}; + ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)), + SyscallSucceedsWithValue(sizeof(input))); + + // Extra bytes for newline (written later) and NUL for EXPECT_STREQ. + char buf[5] = {}; + + // Nothing available yet. + ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + + char delim = '\n'; + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + + // Now it is available. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(input) + 1)); + ExpectReadable(slave_, sizeof(input) + 1, buf); + EXPECT_STREQ(buf, "abc\n"); + + ExpectFinished(slave_); +} + +// ICANON doesn't make input available until a line delimiter is typed. +// +// Test EOF (^D). +TEST_F(PtyTest, TermiosICANONEOF) { + char input[3] = {'a', 'b', 'c'}; + ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)), + SyscallSucceedsWithValue(sizeof(input))); + + // Extra byte for NUL for EXPECT_STREQ. + char buf[4] = {}; + + // Nothing available yet. + ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + char delim = ControlCharacter('D'); + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + + // Now it is available. Note that ^D is not included. + ExpectReadable(slave_, sizeof(input), buf); + EXPECT_STREQ(buf, "abc"); + + ExpectFinished(slave_); +} + +// ICANON limits us to 4096 bytes including a terminating character. Anything +// after and 4095th character is discarded (although still processed for +// signals and echoing). +TEST_F(PtyTest, CanonDiscard) { + constexpr char kInput = 'M'; + constexpr int kInputSize = 4100; + constexpr int kIter = 3; + + // A few times write more than the 4096 character maximum, then a newline. + constexpr char delim = '\n'; + for (int i = 0; i < kIter; i++) { + // This makes too many syscalls for save/restore. + const DisableSave ds; + for (int i = 0; i < kInputSize; i++) { + ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + } + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + } + + // There should be multiple truncated lines available to read. + for (int i = 0; i < kIter; i++) { + char buf[kInputSize] = {}; + ExpectReadable(slave_, kMaxLineSize, buf); + EXPECT_EQ(buf[kMaxLineSize - 1], delim); + EXPECT_EQ(buf[kMaxLineSize - 2], kInput); + } + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, CanonMultiline) { + constexpr char kInput1[] = "GO\n"; + constexpr char kInput2[] = "BLUE\n"; + + // Write both lines. + ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1), + SyscallSucceedsWithValue(sizeof(kInput1) - 1)); + ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1), + SyscallSucceedsWithValue(sizeof(kInput2) - 1)); + + // Get the first line. + char line1[8] = {}; + ExpectReadable(slave_, sizeof(kInput1) - 1, line1); + EXPECT_STREQ(line1, kInput1); + + // Get the second line. + char line2[8] = {}; + ExpectReadable(slave_, sizeof(kInput2) - 1, line2); + EXPECT_STREQ(line2, kInput2); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchNoncanonToCanonMultiline) { + DisableCanonical(); + + constexpr char kInput1[] = "GO\n"; + constexpr char kInput2[] = "BLUE\n"; + constexpr char kExpected[] = "GO\nBLUE\n"; + + // Write both lines. + ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1), + SyscallSucceedsWithValue(sizeof(kInput1) - 1)); + ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1), + SyscallSucceedsWithValue(sizeof(kInput2) - 1)); + + ASSERT_NO_ERRNO( + WaitUntilReceived(slave_.get(), sizeof(kInput1) + sizeof(kInput2) - 2)); + EnableCanonical(); + + // Get all together as one line. + char line[9] = {}; + ExpectReadable(slave_, 8, line); + EXPECT_STREQ(line, kExpected); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchTwiceMultiline) { + std::string kInputs[] = {"GO\n", "BLUE\n", "!"}; + std::string kExpected = "GO\nBLUE\n!"; + + // Write each line. + for (const std::string& input : kInputs) { + ASSERT_THAT(WriteFd(master_.get(), input.c_str(), input.size()), + SyscallSucceedsWithValue(input.size())); + } + + DisableCanonical(); + // All written characters have to make it into the input queue before + // canonical mode is re-enabled. If the final '!' character hasn't been + // enqueued before canonical mode is re-enabled, it won't be readable. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kExpected.size())); + EnableCanonical(); + + // Get all together as one line. + char line[10] = {}; + ExpectReadable(slave_, 9, line); + EXPECT_STREQ(line, kExpected.c_str()); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, QueueSize) { + // Write the line. + constexpr char kInput1[] = "GO\n"; + ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1), + SyscallSucceedsWithValue(sizeof(kInput1) - 1)); + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1)); + + // Ensure that writing more (beyond what is readable) does not impact the + // readable size. + char input[kMaxLineSize]; + memset(input, 'M', kMaxLineSize); + ASSERT_THAT(WriteFd(master_.get(), input, kMaxLineSize), + SyscallSucceedsWithValue(kMaxLineSize)); + int inputBufSize = ASSERT_NO_ERRNO_AND_VALUE( + WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1)); + EXPECT_EQ(inputBufSize, sizeof(kInput1) - 1); +} + +TEST_F(PtyTest, PartialBadBuffer) { + // Allocate 2 pages. + void* addr = mmap(nullptr, 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(addr, MAP_FAILED); + char* buf = reinterpret_cast<char*>(addr); + + // Guard the 2nd page for our read to run into. + ASSERT_THAT( + mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, PROT_NONE), + SyscallSucceeds()); + + // Leave only one free byte in the buffer. + char* bad_buffer = buf + kPageSize - 1; + + // Write to the master. + constexpr char kBuf[] = "hello\n"; + constexpr size_t size = sizeof(kBuf) - 1; + EXPECT_THAT(WriteFd(master_.get(), kBuf, size), + SyscallSucceedsWithValue(size)); + + // Read from the slave into bad_buffer. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), size)); + EXPECT_THAT(ReadFd(slave_.get(), bad_buffer, size), + SyscallFailsWithErrno(EFAULT)); + + EXPECT_THAT(munmap(addr, 2 * kPageSize), SyscallSucceeds()) << addr; +} + +TEST_F(PtyTest, SimpleEcho) { + constexpr char kInput[] = "Mr. Eko"; + EXPECT_THAT(WriteFd(master_.get(), kInput, strlen(kInput)), + SyscallSucceedsWithValue(strlen(kInput))); + + char buf[100] = {}; + ExpectReadable(master_, strlen(kInput), buf); + + EXPECT_STREQ(buf, kInput); + ExpectFinished(master_); +} + +TEST_F(PtyTest, GetWindowSize) { + struct winsize ws; + ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &ws), SyscallSucceeds()); + EXPECT_EQ(ws.ws_row, 0); + EXPECT_EQ(ws.ws_col, 0); +} + +TEST_F(PtyTest, SetSlaveWindowSize) { + constexpr uint16_t kRows = 343; + constexpr uint16_t kCols = 2401; + struct winsize ws = {.ws_row = kRows, .ws_col = kCols}; + ASSERT_THAT(ioctl(slave_.get(), TIOCSWINSZ, &ws), SyscallSucceeds()); + + struct winsize retrieved_ws = {}; + ASSERT_THAT(ioctl(master_.get(), TIOCGWINSZ, &retrieved_ws), + SyscallSucceeds()); + EXPECT_EQ(retrieved_ws.ws_row, kRows); + EXPECT_EQ(retrieved_ws.ws_col, kCols); +} + +TEST_F(PtyTest, SetMasterWindowSize) { + constexpr uint16_t kRows = 343; + constexpr uint16_t kCols = 2401; + struct winsize ws = {.ws_row = kRows, .ws_col = kCols}; + ASSERT_THAT(ioctl(master_.get(), TIOCSWINSZ, &ws), SyscallSucceeds()); + + struct winsize retrieved_ws = {}; + ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &retrieved_ws), + SyscallSucceeds()); + EXPECT_EQ(retrieved_ws.ws_row, kRows); + EXPECT_EQ(retrieved_ws.ws_col, kCols); +} + +class JobControlTest : public ::testing::Test { + protected: + void SetUp() override { + master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK)); + slave_ = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master_)); + + // Make this a session leader, which also drops the controlling terminal. + // In the gVisor test environment, this test will be run as the session + // leader already (as the sentry init process). + if (!IsRunningOnGvisor()) { + ASSERT_THAT(setsid(), SyscallSucceeds()); + } + } + + // Master and slave ends of the PTY. Non-blocking. + FileDescriptor master_; + FileDescriptor slave_; +}; + +TEST_F(JobControlTest, SetTTYMaster) { + ASSERT_THAT(ioctl(master_.get(), TIOCSCTTY, 0), SyscallSucceeds()); +} + +TEST_F(JobControlTest, SetTTY) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); +} + +TEST_F(JobControlTest, SetTTYNonLeader) { + // Fork a process that won't be the session leader. + pid_t child = fork(); + if (!child) { + // We shouldn't be able to set the terminal. + TEST_PCHECK(ioctl(slave_.get(), TIOCSCTTY, 0)); + _exit(0); + } + + int wstatus; + ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child)); + ASSERT_EQ(wstatus, 0); +} + +TEST_F(JobControlTest, SetTTYBadArg) { + // Despite the man page saying arg should be 0 here, Linux doesn't actually + // check. + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 1), SyscallSucceeds()); +} + +TEST_F(JobControlTest, SetTTYDifferentSession) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + // Fork, join a new session, and try to steal the parent's controlling + // terminal, which should fail. + pid_t child = fork(); + if (!child) { + TEST_PCHECK(setsid() >= 0); + // We shouldn't be able to steal the terminal. + TEST_PCHECK(ioctl(slave_.get(), TIOCSCTTY, 1)); + _exit(0); + } + + int wstatus; + ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child)); + ASSERT_EQ(wstatus, 0); +} + +TEST_F(JobControlTest, ReleaseTTY) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + // Make sure we're ignoring SIGHUP, which will be sent to this process once we + // disconnect they TTY. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + struct sigaction old_sa; + EXPECT_THAT(sigaction(SIGHUP, &sa, &old_sa), SyscallSucceeds()); + EXPECT_THAT(ioctl(slave_.get(), TIOCNOTTY), SyscallSucceeds()); + EXPECT_THAT(sigaction(SIGHUP, &old_sa, NULL), SyscallSucceeds()); +} + +TEST_F(JobControlTest, ReleaseUnsetTTY) { + ASSERT_THAT(ioctl(slave_.get(), TIOCNOTTY), SyscallFailsWithErrno(ENOTTY)); +} + +TEST_F(JobControlTest, ReleaseWrongTTY) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + ASSERT_THAT(ioctl(master_.get(), TIOCNOTTY), SyscallFailsWithErrno(ENOTTY)); +} + +TEST_F(JobControlTest, ReleaseTTYNonLeader) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + pid_t child = fork(); + if (!child) { + TEST_PCHECK(!ioctl(slave_.get(), TIOCNOTTY)); + _exit(0); + } + + int wstatus; + ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child)); + ASSERT_EQ(wstatus, 0); +} + +TEST_F(JobControlTest, ReleaseTTYDifferentSession) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + pid_t child = fork(); + if (!child) { + // Join a new session, then try to disconnect. + TEST_PCHECK(setsid() >= 0); + TEST_PCHECK(ioctl(slave_.get(), TIOCNOTTY)); + _exit(0); + } + + int wstatus; + ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child)); + ASSERT_EQ(wstatus, 0); +} + +// Used by the child process spawned in ReleaseTTYSignals to track received +// signals. +static int received; + +void sig_handler(int signum) { received |= signum; } + +// When the session leader releases its controlling terminal, the foreground +// process group gets SIGHUP, then SIGCONT. This test: +// - Spawns 2 threads +// - Has thread 1 return 0 if it gets both SIGHUP and SIGCONT +// - Has thread 2 leave the foreground process group, and return non-zero if it +// receives any signals. +// - Has the parent thread release its controlling terminal +// - Checks that thread 1 got both signals +// - Checks that thread 2 didn't get any signals. +TEST_F(JobControlTest, ReleaseTTYSignals) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + received = 0; + struct sigaction sa = {}; + sa.sa_handler = sig_handler; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, SIGHUP); + sigaddset(&sa.sa_mask, SIGCONT); + sigprocmask(SIG_BLOCK, &sa.sa_mask, NULL); + + pid_t same_pgrp_child = fork(); + if (!same_pgrp_child) { + // The child will wait for SIGHUP and SIGCONT, then return 0. It begins with + // SIGHUP and SIGCONT blocked. We install signal handlers for those signals, + // then use sigsuspend to wait for those specific signals. + TEST_PCHECK(!sigaction(SIGHUP, &sa, NULL)); + TEST_PCHECK(!sigaction(SIGCONT, &sa, NULL)); + sigset_t mask; + sigfillset(&mask); + sigdelset(&mask, SIGHUP); + sigdelset(&mask, SIGCONT); + while (received != (SIGHUP | SIGCONT)) { + sigsuspend(&mask); + } + _exit(0); + } + + // We don't want to block these anymore. + sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL); + + // This child will return non-zero if either SIGHUP or SIGCONT are received. + pid_t diff_pgrp_child = fork(); + if (!diff_pgrp_child) { + TEST_PCHECK(!setpgid(0, 0)); + TEST_PCHECK(pause()); + _exit(1); + } + + EXPECT_THAT(setpgid(diff_pgrp_child, diff_pgrp_child), SyscallSucceeds()); + + // Make sure we're ignoring SIGHUP, which will be sent to this process once we + // disconnect they TTY. + struct sigaction sighup_sa = {}; + sighup_sa.sa_handler = SIG_IGN; + sighup_sa.sa_flags = 0; + sigemptyset(&sighup_sa.sa_mask); + struct sigaction old_sa; + EXPECT_THAT(sigaction(SIGHUP, &sighup_sa, &old_sa), SyscallSucceeds()); + + // Release the controlling terminal, sending SIGHUP and SIGCONT to all other + // processes in this process group. + EXPECT_THAT(ioctl(slave_.get(), TIOCNOTTY), SyscallSucceeds()); + + EXPECT_THAT(sigaction(SIGHUP, &old_sa, NULL), SyscallSucceeds()); + + // The child in the same process group will get signaled. + int wstatus; + EXPECT_THAT(waitpid(same_pgrp_child, &wstatus, 0), + SyscallSucceedsWithValue(same_pgrp_child)); + EXPECT_EQ(wstatus, 0); + + // The other child will not get signaled. + EXPECT_THAT(waitpid(diff_pgrp_child, &wstatus, WNOHANG), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(kill(diff_pgrp_child, SIGKILL), SyscallSucceeds()); +} + +TEST_F(JobControlTest, GetForegroundProcessGroup) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + pid_t foreground_pgid; + pid_t pid; + ASSERT_THAT(ioctl(slave_.get(), TIOCGPGRP, &foreground_pgid), + SyscallSucceeds()); + ASSERT_THAT(pid = getpid(), SyscallSucceeds()); + + ASSERT_EQ(foreground_pgid, pid); +} + +TEST_F(JobControlTest, GetForegroundProcessGroupNonControlling) { + // At this point there's no controlling terminal, so TIOCGPGRP should fail. + pid_t foreground_pgid; + ASSERT_THAT(ioctl(slave_.get(), TIOCGPGRP, &foreground_pgid), + SyscallFailsWithErrno(ENOTTY)); +} + +// This test: +// - sets itself as the foreground process group +// - creates a child process in a new process group +// - sets that child as the foreground process group +// - kills its child and sets itself as the foreground process group. +TEST_F(JobControlTest, SetForegroundProcessGroup) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + // Ignore SIGTTOU so that we don't stop ourself when calling tcsetpgrp. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + sigaction(SIGTTOU, &sa, NULL); + + // Set ourself as the foreground process group. + ASSERT_THAT(tcsetpgrp(slave_.get(), getpgid(0)), SyscallSucceeds()); + + // Create a new process that just waits to be signaled. + pid_t child = fork(); + if (!child) { + TEST_PCHECK(!pause()); + // We should never reach this. + _exit(1); + } + + // Make the child its own process group, then make it the controlling process + // group of the terminal. + ASSERT_THAT(setpgid(child, child), SyscallSucceeds()); + ASSERT_THAT(tcsetpgrp(slave_.get(), child), SyscallSucceeds()); + + // Sanity check - we're still the controlling session. + ASSERT_EQ(getsid(0), getsid(child)); + + // Signal the child, wait for it to exit, then retake the terminal. + ASSERT_THAT(kill(child, SIGTERM), SyscallSucceeds()); + int wstatus; + ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child)); + ASSERT_TRUE(WIFSIGNALED(wstatus)); + ASSERT_EQ(WTERMSIG(wstatus), SIGTERM); + + // Set ourself as the foreground process. + pid_t pgid; + ASSERT_THAT(pgid = getpgid(0), SyscallSucceeds()); + ASSERT_THAT(tcsetpgrp(slave_.get(), pgid), SyscallSucceeds()); +} + +TEST_F(JobControlTest, SetForegroundProcessGroupWrongTTY) { + pid_t pid = getpid(); + ASSERT_THAT(ioctl(slave_.get(), TIOCSPGRP, &pid), + SyscallFailsWithErrno(ENOTTY)); +} + +TEST_F(JobControlTest, SetForegroundProcessGroupNegPgid) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + pid_t pid = -1; + ASSERT_THAT(ioctl(slave_.get(), TIOCSPGRP, &pid), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(JobControlTest, SetForegroundProcessGroupEmptyProcessGroup) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + // Create a new process, put it in a new process group, make that group the + // foreground process group, then have the process wait. + pid_t child = fork(); + if (!child) { + TEST_PCHECK(!setpgid(0, 0)); + _exit(0); + } + + // Wait for the child to exit. + int wstatus; + EXPECT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child)); + // The child's process group doesn't exist anymore - this should fail. + ASSERT_THAT(ioctl(slave_.get(), TIOCSPGRP, &child), + SyscallFailsWithErrno(ESRCH)); +} + +TEST_F(JobControlTest, SetForegroundProcessGroupDifferentSession) { + ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + int sync_setsid[2]; + int sync_exit[2]; + ASSERT_THAT(pipe(sync_setsid), SyscallSucceeds()); + ASSERT_THAT(pipe(sync_exit), SyscallSucceeds()); + + // Create a new process and put it in a new session. + pid_t child = fork(); + if (!child) { + TEST_PCHECK(setsid() >= 0); + // Tell the parent we're in a new session. + char c = 'c'; + TEST_PCHECK(WriteFd(sync_setsid[1], &c, 1) == 1); + TEST_PCHECK(ReadFd(sync_exit[0], &c, 1) == 1); + _exit(0); + } + + // Wait for the child to tell us it's in a new session. + char c = 'c'; + ASSERT_THAT(ReadFd(sync_setsid[0], &c, 1), SyscallSucceedsWithValue(1)); + + // Child is in a new session, so we can't make it the foregroup process group. + EXPECT_THAT(ioctl(slave_.get(), TIOCSPGRP, &child), + SyscallFailsWithErrno(EPERM)); + + EXPECT_THAT(WriteFd(sync_exit[1], &c, 1), SyscallSucceedsWithValue(1)); + + int wstatus; + EXPECT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child)); + EXPECT_TRUE(WIFEXITED(wstatus)); + EXPECT_EQ(WEXITSTATUS(wstatus), 0); +} + +// Verify that we don't hang when creating a new session from an orphaned +// process group (b/139968068). Calling setsid() creates an orphaned process +// group, as process groups that contain the session's leading process are +// orphans. +// +// We create 2 sessions in this test. The init process in gVisor is considered +// not to be an orphan (see sessions.go), so we have to create a session from +// which to create a session. The latter session is being created from an +// orphaned process group. +TEST_F(JobControlTest, OrphanRegression) { + pid_t session_2_leader = fork(); + if (!session_2_leader) { + TEST_PCHECK(setsid() >= 0); + + pid_t session_3_leader = fork(); + if (!session_3_leader) { + TEST_PCHECK(setsid() >= 0); + + _exit(0); + } + + int wstatus; + TEST_PCHECK(waitpid(session_3_leader, &wstatus, 0) == session_3_leader); + TEST_PCHECK(wstatus == 0); + + _exit(0); + } + + int wstatus; + ASSERT_THAT(waitpid(session_2_leader, &wstatus, 0), + SyscallSucceedsWithValue(session_2_leader)); + ASSERT_EQ(wstatus, 0); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pty_root.cc b/test/syscalls/linux/pty_root.cc new file mode 100644 index 000000000..1d7dbefdb --- /dev/null +++ b/test/syscalls/linux/pty_root.cc @@ -0,0 +1,78 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/ioctl.h> +#include <termios.h> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/pty_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// StealTTY tests whether privileged processes can steal controlling terminals. +// If the stealing process has CAP_SYS_ADMIN in the root user namespace, the +// test ensures that stealing works. If it has non-root CAP_SYS_ADMIN, it +// ensures stealing fails. +TEST(JobControlRootTest, StealTTY) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + bool true_root = true; + if (!IsRunningOnGvisor()) { + // If running in Linux, we may only have CAP_SYS_ADMIN in a non-root user + // namespace (i.e. we are not truly root). We use init_module as a proxy for + // whether we are true root, as it returns EPERM immediately. + ASSERT_THAT(syscall(SYS_init_module, nullptr, 0, nullptr), SyscallFails()); + true_root = errno != EPERM; + + // Make this a session leader, which also drops the controlling terminal. + // In the gVisor test environment, this test will be run as the session + // leader already (as the sentry init process). + ASSERT_THAT(setsid(), SyscallSucceeds()); + } + + FileDescriptor master = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK)); + FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master)); + + // Make slave the controlling terminal. + ASSERT_THAT(ioctl(slave.get(), TIOCSCTTY, 0), SyscallSucceeds()); + + // Fork, join a new session, and try to steal the parent's controlling + // terminal, which should succeed when we have CAP_SYS_ADMIN and pass an arg + // of 1. + pid_t child = fork(); + if (!child) { + ASSERT_THAT(setsid(), SyscallSucceeds()); + // We shouldn't be able to steal the terminal with the wrong arg value. + TEST_PCHECK(ioctl(slave.get(), TIOCSCTTY, 0)); + // We should be able to steal it if we are true root. + TEST_PCHECK(true_root == !ioctl(slave.get(), TIOCSCTTY, 1)); + _exit(0); + } + + int wstatus; + ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child)); + ASSERT_EQ(wstatus, 0); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pwrite64.cc b/test/syscalls/linux/pwrite64.cc new file mode 100644 index 000000000..e69794910 --- /dev/null +++ b/test/syscalls/linux/pwrite64.cc @@ -0,0 +1,83 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <linux/unistd.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// TODO(gvisor.dev/issue/2370): This test is currently very rudimentary. +class Pwrite64 : public ::testing::Test { + void SetUp() override { + name_ = NewTempAbsPath(); + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_CREAT, 0644), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + } + + void TearDown() override { unlink(name_.c_str()); } + + public: + std::string name_; +}; + +TEST_F(Pwrite64, AppendOnly) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds()); + constexpr int64_t kBufSize = 1024; + std::vector<char> buf(kBufSize); + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), 0), + SyscallSucceedsWithValue(buf.size())); + EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(Pwrite64, InvalidArgs) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds()); + constexpr int64_t kBufSize = 1024; + std::vector<char> buf(kBufSize); + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), -1), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(Pwrite64, Overflow) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds()); + constexpr int64_t kBufSize = 1024; + std::vector<char> buf(kBufSize); + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), 0x7fffffffffffffffull), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pwritev2.cc b/test/syscalls/linux/pwritev2.cc new file mode 100644 index 000000000..63b686c62 --- /dev/null +++ b/test/syscalls/linux/pwritev2.cc @@ -0,0 +1,307 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/uio.h> + +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#ifndef SYS_pwritev2 +#if defined(__x86_64__) +#define SYS_pwritev2 328 +#elif defined(__aarch64__) +#define SYS_pwritev2 287 +#else +#error "Unknown architecture" +#endif +#endif // SYS_pwrite2 + +#ifndef RWF_HIPRI +#define RWF_HIPRI 0x1 +#endif // RWF_HIPRI + +#ifndef RWF_DSYNC +#define RWF_DSYNC 0x2 +#endif // RWF_DSYNC + +#ifndef RWF_SYNC +#define RWF_SYNC 0x4 +#endif // RWF_SYNC + +constexpr int kBufSize = 1024; + +void SetContent(std::vector<char>& content) { + for (uint i = 0; i < content.size(); i++) { + content[i] = static_cast<char>((i % 10) + '0'); + } +} + +ssize_t pwritev2(unsigned long fd, const struct iovec* iov, + unsigned long iovcnt, off_t offset, unsigned long flags) { + // syscall on pwritev2 does some weird things (see man syscall and search + // pwritev2), so we insert a 0 to word align the flags argument on native. + return syscall(SYS_pwritev2, fd, iov, iovcnt, offset, 0, flags); +} + +// This test is the base case where we call pwritev (no offset, no flags). +TEST(Writev2Test, BaseCall) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + std::vector<char> content(kBufSize); + SetContent(content); + struct iovec iov[2]; + iov[0].iov_base = content.data(); + iov[0].iov_len = content.size() / 2; + iov[1].iov_base = static_cast<char*>(iov[0].iov_base) + (content.size() / 2); + iov[1].iov_len = content.size() / 2; + + ASSERT_THAT(pwritev2(fd.get(), iov, /*iovcnt=*/2, + /*offset=*/0, /*flags=*/0), + SyscallSucceedsWithValue(kBufSize)); + + std::vector<char> buf(kBufSize); + EXPECT_THAT(read(fd.get(), buf.data(), kBufSize), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_EQ(content, buf); +} + +// This test is where we call pwritev2 with a positive offset and no flags. +TEST(Pwritev2Test, ValidPositiveOffset) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + std::string prefix(kBufSize, '0'); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), prefix, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + std::vector<char> content(kBufSize); + SetContent(content); + struct iovec iov; + iov.iov_base = content.data(); + iov.iov_len = content.size(); + + ASSERT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1, + /*offset=*/prefix.size(), /*flags=*/0), + SyscallSucceedsWithValue(content.size())); + + std::vector<char> buf(prefix.size() + content.size()); + EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + std::vector<char> want(prefix.begin(), prefix.end()); + want.insert(want.end(), content.begin(), content.end()); + EXPECT_EQ(want, buf); +} + +// This test is the base case where we call writev by using -1 as the offset. +// The write should use the file offset, so the test increments the file offset +// prior to call pwritev2. +TEST(Pwritev2Test, NegativeOneOffset) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const std::string prefix = "00"; + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), prefix.data(), TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + ASSERT_THAT(lseek(fd.get(), prefix.size(), SEEK_SET), + SyscallSucceedsWithValue(prefix.size())); + + std::vector<char> content(kBufSize); + SetContent(content); + struct iovec iov; + iov.iov_base = content.data(); + iov.iov_len = content.size(); + + ASSERT_THAT(pwritev2(fd.get(), &iov, /*iovcnt*/ 1, + /*offset=*/static_cast<off_t>(-1), /*flags=*/0), + SyscallSucceedsWithValue(content.size())); + + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(prefix.size() + content.size())); + + std::vector<char> buf(prefix.size() + content.size()); + EXPECT_THAT(pread(fd.get(), buf.data(), buf.size(), /*offset=*/0), + SyscallSucceedsWithValue(buf.size())); + + std::vector<char> want(prefix.begin(), prefix.end()); + want.insert(want.end(), content.begin(), content.end()); + EXPECT_EQ(want, buf); +} + +// pwritev2 requires if the RWF_HIPRI flag is passed, the fd must be opened with +// O_DIRECT. This test implements a correct call with the RWF_HIPRI flag. +TEST(Pwritev2Test, CallWithRWF_HIPRI) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + std::vector<char> content(kBufSize); + SetContent(content); + struct iovec iov; + iov.iov_base = content.data(); + iov.iov_len = content.size(); + + EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1, + /*offset=*/0, /*flags=*/RWF_HIPRI), + SyscallSucceedsWithValue(kBufSize)); + + std::vector<char> buf(content.size()); + EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + EXPECT_EQ(buf, content); +} + +// This test calls pwritev2 with a bad file descriptor. +TEST(Writev2Test, BadFile) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + ASSERT_THAT(pwritev2(/*fd=*/-1, /*iov=*/nullptr, /*iovcnt=*/0, + /*offset=*/0, /*flags=*/0), + SyscallFailsWithErrno(EBADF)); +} + +// This test calls pwrite2 with an invalid offset. +TEST(Pwritev2Test, InvalidOffset) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + char buf[16]; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1, + /*offset=*/static_cast<off_t>(-8), /*flags=*/0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(Pwritev2Test, UnseekableFileValid) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + int pipe_fds[2]; + + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + std::vector<char> content(32, '0'); + SetContent(content); + struct iovec iov; + iov.iov_base = content.data(); + iov.iov_len = content.size(); + + EXPECT_THAT(pwritev2(pipe_fds[1], &iov, /*iovcnt=*/1, + /*offset=*/static_cast<off_t>(-1), /*flags=*/0), + SyscallSucceedsWithValue(content.size())); + + std::vector<char> buf(content.size()); + EXPECT_THAT(read(pipe_fds[0], buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + EXPECT_EQ(content, buf); + + EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +// Calling pwritev2 with a non-negative offset calls pwritev. Calling pwritev +// with an unseekable file is not allowed. A pipe is used for an unseekable +// file. +TEST(Pwritev2Test, UnseekableFileInvalid) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + int pipe_fds[2]; + char buf[16]; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + EXPECT_THAT(pwritev2(pipe_fds[1], &iov, /*iovcnt=*/1, + /*offset=*/2, /*flags=*/0), + SyscallFailsWithErrno(ESPIPE)); + + EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +TEST(Pwritev2Test, ReadOnlyFile) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + char buf[16]; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1, + /*offset=*/0, /*flags=*/0), + SyscallFailsWithErrno(EBADF)); +} + +// This test calls pwritev2 with an invalid flag. +TEST(Pwritev2Test, InvalidFlag) { + SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR | O_DIRECT)); + + char buf[16]; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1, + /*offset=*/0, /*flags=*/0xF0), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc new file mode 100644 index 000000000..05c4ed03f --- /dev/null +++ b/test/syscalls/linux/raw_socket.cc @@ -0,0 +1,819 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <linux/capability.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/ip_icmp.h> +#include <poll.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> +#include <algorithm> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Note: in order to run these tests, /proc/sys/net/ipv4/ping_group_range will +// need to be configured to let the superuser create ping sockets (see icmp(7)). + +namespace gvisor { +namespace testing { + +namespace { + +// Fixture for tests parameterized by protocol. +class RawSocketTest : public ::testing::TestWithParam<std::tuple<int, int>> { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // Sends buf via s_. + void SendBuf(const char* buf, int buf_len); + + // Reads from s_ into recv_buf. + void ReceiveBuf(char* recv_buf, size_t recv_buf_len); + + void ReceiveBufFrom(int sock, char* recv_buf, size_t recv_buf_len); + + int Protocol() { return std::get<0>(GetParam()); } + + int Family() { return std::get<1>(GetParam()); } + + socklen_t AddrLen() { + if (Family() == AF_INET) { + return sizeof(sockaddr_in); + } + return sizeof(sockaddr_in6); + } + + int HdrLen() { + if (Family() == AF_INET) { + return sizeof(struct iphdr); + } + // IPv6 raw sockets don't include the header. + return 0; + } + + // The socket used for both reading and writing. + int s_; + + // The loopback address. + struct sockaddr_storage addr_; +}; + +void RawSocketTest::SetUp() { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + ASSERT_THAT(socket(Family(), SOCK_RAW, Protocol()), + SyscallFailsWithErrno(EPERM)); + GTEST_SKIP(); + } + + ASSERT_THAT(s_ = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); + + addr_ = {}; + + // We don't set ports because raw sockets don't have a notion of ports. + if (Family() == AF_INET) { + struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr_); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + } else { + struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr_); + sin6->sin6_family = AF_INET6; + sin6->sin6_addr = in6addr_loopback; + } +} + +void RawSocketTest::TearDown() { + // TearDown will be run even if we skip the test. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + EXPECT_THAT(close(s_), SyscallSucceeds()); + } +} + +// We should be able to create multiple raw sockets for the same protocol. +// BasicRawSocket::Setup creates the first one, so we only have to create one +// more here. +TEST_P(RawSocketTest, MultipleCreation) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int s2; + ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); + + ASSERT_THAT(close(s2), SyscallSucceeds()); +} + +// Test that shutting down an unconnected socket fails. +TEST_P(RawSocketTest, FailShutdownWithoutConnect) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); + ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); +} + +// Shutdown is a no-op for raw sockets (and datagram sockets in general). +TEST_P(RawSocketTest, ShutdownWriteNoop) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "noop"; + ASSERT_THAT(RetryEINTR(write)(s_, kBuf, sizeof(kBuf)), + SyscallSucceedsWithValue(sizeof(kBuf))); +} + +// Shutdown is a no-op for raw sockets (and datagram sockets in general). +TEST_P(RawSocketTest, ShutdownReadNoop) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "gdg"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + std::vector<char> c(sizeof(kBuf) + HdrLen()); + ASSERT_THAT(read(s_, c.data(), c.size()), SyscallSucceedsWithValue(c.size())); +} + +// Test that listen() fails. +TEST_P(RawSocketTest, FailListen) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP)); +} + +// Test that accept() fails. +TEST_P(RawSocketTest, FailAccept) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + struct sockaddr saddr; + socklen_t addrlen; + ASSERT_THAT(accept(s_, &saddr, &addrlen), SyscallFailsWithErrno(ENOTSUP)); +} + +// Test that getpeername() returns nothing before connect(). +TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + struct sockaddr saddr; + socklen_t addrlen = sizeof(saddr); + ASSERT_THAT(getpeername(s_, &saddr, &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +// Test that getpeername() returns something after connect(). +TEST_P(RawSocketTest, GetPeerName) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + struct sockaddr saddr; + socklen_t addrlen = sizeof(saddr); + ASSERT_THAT(getpeername(s_, &saddr, &addrlen), + SyscallFailsWithErrno(ENOTCONN)); + ASSERT_GT(addrlen, 0); +} + +// Test that the socket is writable immediately. +TEST_P(RawSocketTest, PollWritableImmediately) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + struct pollfd pfd = {}; + pfd.fd = s_; + pfd.events = POLLOUT; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); +} + +// Test that the socket isn't readable before receiving anything. +TEST_P(RawSocketTest, PollNotReadableInitially) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Try to receive data with MSG_DONTWAIT, which returns immediately if there's + // nothing to be read. + char buf[117]; + ASSERT_THAT(RetryEINTR(recv)(s_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Test that the socket becomes readable once something is written to it. +TEST_P(RawSocketTest, PollTriggeredOnWrite) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Write something so that there's data to be read. + // Arbitrary. + constexpr char kBuf[] = "JP5"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + struct pollfd pfd = {}; + pfd.fd = s_; + pfd.events = POLLIN; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); +} + +// Test that we can connect() to a valid IP (loopback). +TEST_P(RawSocketTest, ConnectToLoopback) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); +} + +// Test that calling send() without connect() fails. +TEST_P(RawSocketTest, SendWithoutConnectFails) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Arbitrary. + constexpr char kBuf[] = "Endgame was good"; + ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), + SyscallFailsWithErrno(EDESTADDRREQ)); +} + +// Bind to localhost. +TEST_P(RawSocketTest, BindToLocalhost) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); +} + +// Bind to a different address. +TEST_P(RawSocketTest, BindToInvalid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + struct sockaddr_storage bind_addr = addr_; + if (Family() == AF_INET) { + struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&bind_addr); + sin->sin_addr = {1}; // 1.0.0.0 - An address that we can't bind to. + } else { + struct sockaddr_in6* sin6 = + reinterpret_cast<struct sockaddr_in6*>(&bind_addr); + memset(&sin6->sin6_addr.s6_addr, 0, sizeof(sin6->sin6_addr.s6_addr)); + sin6->sin6_addr.s6_addr[0] = 1; // 1: - An address that we can't bind to. + } + ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr), + AddrLen()), SyscallFailsWithErrno(EADDRNOTAVAIL)); +} + +// Send and receive an packet. +TEST_P(RawSocketTest, SendAndReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Arbitrary. + constexpr char kBuf[] = "TB12"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + // Receive the packet and make sure it's identical. + std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); +} + +// We should be able to create multiple raw sockets for the same protocol and +// receive the same packet on both. +TEST_P(RawSocketTest, MultipleSocketReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int s2; + ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "TB10"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + // Receive it on socket 1. + std::vector<char> recv_buf1(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf1.data(), recv_buf1.size())); + + // Receive it on socket 2. + std::vector<char> recv_buf2(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s2, recv_buf2.data(), + recv_buf2.size())); + + EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(), + recv_buf2.data() + HdrLen(), sizeof(kBuf)), + 0); + + ASSERT_THAT(close(s2), SyscallSucceeds()); +} + +// Test that connect sends packets to the right place. +TEST_P(RawSocketTest, SendAndReceiveViaConnect) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "JH4"; + ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), + SyscallSucceedsWithValue(sizeof(kBuf))); + + // Receive the packet and make sure it's identical. + std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); +} + +// Bind to localhost, then send and receive packets. +TEST_P(RawSocketTest, BindSendAndReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "DR16"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + // Receive the packet and make sure it's identical. + std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); +} + +// Bind and connect to localhost and send/receive packets. +TEST_P(RawSocketTest, BindConnectSendAndReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "DG88"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + // Receive the packet and make sure it's identical. + std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); +} + +// Check that setting SO_RCVBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum receive buf size by trying to set it to zero. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_RCVBUF above max is clamped to the maximum +// receive buffer size. +TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover max buf size by trying to set the largest possible buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. +TEST_P(RawSocketTest, SetSocketRecvBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover max buf size by trying to set a really large buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set a zero size receive buffer + // size. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + ASSERT_EQ(quarter_sz, val); +} + +// Check that setting SO_SNDBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawSocketTest, SetSocketSendBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_SNDBUF above max is clamped to the maximum +// send buffer size. +TEST_P(RawSocketTest, SetSocketSendBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. +TEST_P(RawSocketTest, SetSocketSendBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack + // matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + + ASSERT_EQ(quarter_sz, val); +} + +// Test that receive buffer limits are not enforced when the recv buffer is +// empty. +TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + + int min = 0; + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + { + // Send data of size min and verify that it's received. + std::vector<char> buf(min); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + + // Receive the packet and make sure it's identical. + std::vector<char> recv_buf(buf.size() + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ( + memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), + 0); + } + + { + // Send data of size min + 1 and verify that its received. Both linux and + // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer + // is currently empty. + std::vector<char> buf(min + 1); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + // Receive the packet and make sure it's identical. + std::vector<char> recv_buf(buf.size() + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ( + memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), + 0); + } +} + +TEST_P(RawSocketTest, RecvBufLimits) { + // TCP stack generates RSTs for unknown endpoints and it complicates the test + // as we have to deal with the RST packets as well. For testing the raw socket + // endpoints buffer limit enforcement we can just test for UDP. + // + // We don't use SKIP_IF here because root_test_runner explicitly fails if a + // test is skipped. + if (Protocol() == IPPROTO_TCP) { + return; + } + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), + SyscallSucceeds()); + + int min = 0; + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + // Now set the limit to min * 2. + int new_rcv_buf_sz = min * 4; + if (!IsRunningOnGvisor()) { + // Linux doubles the value specified so just set to min. + new_rcv_buf_sz = min * 2; + } + + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, + sizeof(new_rcv_buf_sz)), + SyscallSucceeds()); + int rcv_buf_sz = 0; + { + socklen_t rcv_buf_len = sizeof(rcv_buf_sz); + ASSERT_THAT( + getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len), + SyscallSucceeds()); + } + + // Set a receive timeout so that we don't block forever on reads if the test + // fails. + struct timeval tv { + .tv_sec = 1, .tv_usec = 0, + }; + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + { + std::vector<char> buf(min); + RandomizeBuffer(buf.data(), buf.size()); + + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + int sent = 4; + if (IsRunningOnGvisor()) { + // Linux seems to drop the 4th packet even though technically it should + // fit in the receive buffer. + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + sent++; + } + + // Verify that the expected number of packets are available to be read. + for (int i = 0; i < sent - 1; i++) { + // Receive the packet and make sure it's identical. + std::vector<char> recv_buf(buf.size() + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), + buf.size()), + 0); + } + + // Assert that the last packet is dropped because the receive buffer should + // be full after the first four packets. + std::vector<char> recv_buf(buf.size() + HdrLen()); + struct iovec iov = {}; + iov.iov_base = static_cast<void*>(const_cast<char*>(recv_buf.data())); + iov.iov_len = buf.size(); + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + } +} + +void RawSocketTest::SendBuf(const char* buf, int buf_len) { + // It's safe to use const_cast here because sendmsg won't modify the iovec or + // address. + struct iovec iov = {}; + iov.iov_base = static_cast<void*>(const_cast<char*>(buf)); + iov.iov_len = static_cast<size_t>(buf_len); + struct msghdr msg = {}; + msg.msg_name = static_cast<void*>(&addr_); + msg.msg_namelen = AddrLen(); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + ASSERT_THAT(sendmsg(s_, &msg, 0), SyscallSucceedsWithValue(buf_len)); +} + +void RawSocketTest::ReceiveBuf(char* recv_buf, size_t recv_buf_len) { + ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s_, recv_buf, recv_buf_len)); +} + +void RawSocketTest::ReceiveBufFrom(int sock, char* recv_buf, + size_t recv_buf_len) { + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sock, recv_buf, recv_buf_len)); +} + +INSTANTIATE_TEST_SUITE_P(AllInetTests, RawSocketTest, + ::testing::Combine( + ::testing::Values(IPPROTO_TCP, IPPROTO_UDP), + ::testing::Values(AF_INET, AF_INET6))); + +// AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to. +TEST(RawSocketTest, IPv6ProtoRaw) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW), + SyscallSucceeds()); + + // Verify that writing yields EINVAL. + char buf[] = "This is such a weird little edge case"; + struct sockaddr_in6 sin6 = {}; + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = in6addr_loopback; + ASSERT_THAT(sendto(sock, buf, sizeof(buf), 0 /* flags */, + reinterpret_cast<struct sockaddr*>(&sin6), sizeof(sin6)), + SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc new file mode 100644 index 000000000..5bb14d57c --- /dev/null +++ b/test/syscalls/linux/raw_socket_hdrincl.cc @@ -0,0 +1,406 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <linux/capability.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <cstring> + +#include "gtest/gtest.h" +#include "absl/base/internal/endian.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Tests for IPPROTO_RAW raw sockets, which implies IP_HDRINCL. +class RawHDRINCL : public ::testing::Test { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // Returns a valid looback IP header with no payload. + struct iphdr LoopbackHeader(); + + // Fills in buf with an IP header, UDP header, and payload. Returns false if + // buf_size isn't large enough to hold everything. + bool FillPacket(char* buf, size_t buf_size, int port, const char* payload, + uint16_t payload_size); + + // The socket used for both reading and writing. + int socket_; + + // The loopback address. + struct sockaddr_in addr_; +}; + +void RawHDRINCL::SetUp() { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + ASSERT_THAT(socket(AF_INET, SOCK_RAW, IPPROTO_RAW), + SyscallFailsWithErrno(EPERM)); + GTEST_SKIP(); + } + + ASSERT_THAT(socket_ = socket(AF_INET, SOCK_RAW, IPPROTO_RAW), + SyscallSucceeds()); + + addr_ = {}; + + addr_.sin_port = IPPROTO_IP; + addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr_.sin_family = AF_INET; +} + +void RawHDRINCL::TearDown() { + // TearDown will be run even if we skip the test. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + EXPECT_THAT(close(socket_), SyscallSucceeds()); + } +} + +struct iphdr RawHDRINCL::LoopbackHeader() { + struct iphdr hdr = {}; + hdr.ihl = 5; + hdr.version = 4; + hdr.tos = 0; + hdr.tot_len = absl::gbswap_16(sizeof(hdr)); + hdr.id = 0; + hdr.frag_off = 0; + hdr.ttl = 7; + hdr.protocol = 1; + hdr.daddr = htonl(INADDR_LOOPBACK); + // hdr.check is set by the network stack. + // hdr.tot_len is set by the network stack. + // hdr.saddr is set by the network stack. + return hdr; +} + +bool RawHDRINCL::FillPacket(char* buf, size_t buf_size, int port, + const char* payload, uint16_t payload_size) { + if (buf_size < sizeof(struct iphdr) + sizeof(struct udphdr) + payload_size) { + return false; + } + + struct iphdr ip = LoopbackHeader(); + ip.protocol = IPPROTO_UDP; + + struct udphdr udp = {}; + udp.source = absl::gbswap_16(port); + udp.dest = absl::gbswap_16(port); + udp.len = absl::gbswap_16(sizeof(udp) + payload_size); + udp.check = 0; + + memcpy(buf, reinterpret_cast<char*>(&ip), sizeof(ip)); + memcpy(buf + sizeof(ip), reinterpret_cast<char*>(&udp), sizeof(udp)); + memcpy(buf + sizeof(ip) + sizeof(udp), payload, payload_size); + + return true; +} + +// We should be able to create multiple IPPROTO_RAW sockets. RawHDRINCL::Setup +// creates the first one, so we only have to create one more here. +TEST_F(RawHDRINCL, MultipleCreation) { + int s2; + ASSERT_THAT(s2 = socket(AF_INET, SOCK_RAW, IPPROTO_RAW), SyscallSucceeds()); + + ASSERT_THAT(close(s2), SyscallSucceeds()); +} + +// Test that shutting down an unconnected socket fails. +TEST_F(RawHDRINCL, FailShutdownWithoutConnect) { + ASSERT_THAT(shutdown(socket_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); + ASSERT_THAT(shutdown(socket_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); +} + +// Test that listen() fails. +TEST_F(RawHDRINCL, FailListen) { + ASSERT_THAT(listen(socket_, 1), SyscallFailsWithErrno(ENOTSUP)); +} + +// Test that accept() fails. +TEST_F(RawHDRINCL, FailAccept) { + struct sockaddr saddr; + socklen_t addrlen; + ASSERT_THAT(accept(socket_, &saddr, &addrlen), + SyscallFailsWithErrno(ENOTSUP)); +} + +// Test that the socket is writable immediately. +TEST_F(RawHDRINCL, PollWritableImmediately) { + struct pollfd pfd = {}; + pfd.fd = socket_; + pfd.events = POLLOUT; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 0), SyscallSucceedsWithValue(1)); +} + +// Test that the socket isn't readable. +TEST_F(RawHDRINCL, NotReadable) { + // Try to receive data with MSG_DONTWAIT, which returns immediately if there's + // nothing to be read. + char buf[117]; + ASSERT_THAT(RetryEINTR(recv)(socket_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Test that we can connect() to a valid IP (loopback). +TEST_F(RawHDRINCL, ConnectToLoopback) { + ASSERT_THAT(connect(socket_, reinterpret_cast<struct sockaddr*>(&addr_), + sizeof(addr_)), + SyscallSucceeds()); +} + +TEST_F(RawHDRINCL, SendWithoutConnectSucceeds) { + struct iphdr hdr = LoopbackHeader(); + ASSERT_THAT(send(socket_, &hdr, sizeof(hdr), 0), + SyscallSucceedsWithValue(sizeof(hdr))); +} + +// HDRINCL implies write-only. Verify that we can't read a packet sent to +// loopback. +TEST_F(RawHDRINCL, NotReadableAfterWrite) { + ASSERT_THAT(connect(socket_, reinterpret_cast<struct sockaddr*>(&addr_), + sizeof(addr_)), + SyscallSucceeds()); + + // Construct a packet with an IP header, UDP header, and payload. + constexpr char kPayload[] = "odst"; + char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)]; + ASSERT_TRUE(FillPacket(packet, sizeof(packet), 40000 /* port */, kPayload, + sizeof(kPayload))); + + socklen_t addrlen = sizeof(addr_); + ASSERT_NO_FATAL_FAILURE( + sendto(socket_, reinterpret_cast<void*>(&packet), sizeof(packet), 0, + reinterpret_cast<struct sockaddr*>(&addr_), addrlen)); + + struct pollfd pfd = {}; + pfd.fd = socket_; + pfd.events = POLLIN; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0)); +} + +TEST_F(RawHDRINCL, WriteTooSmall) { + ASSERT_THAT(connect(socket_, reinterpret_cast<struct sockaddr*>(&addr_), + sizeof(addr_)), + SyscallSucceeds()); + + // This is smaller than the size of an IP header. + constexpr char kBuf[] = "JP5"; + ASSERT_THAT(send(socket_, kBuf, sizeof(kBuf), 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Bind to localhost. +TEST_F(RawHDRINCL, BindToLocalhost) { + ASSERT_THAT( + bind(socket_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)), + SyscallSucceeds()); +} + +// Bind to a different address. +TEST_F(RawHDRINCL, BindToInvalid) { + struct sockaddr_in bind_addr = {}; + bind_addr.sin_family = AF_INET; + bind_addr.sin_addr = {1}; // 1.0.0.0 - An address that we can't bind to. + ASSERT_THAT(bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr)), + SyscallFailsWithErrno(EADDRNOTAVAIL)); +} + +// Send and receive a packet. +TEST_F(RawHDRINCL, SendAndReceive) { + int port = 40000; + if (!IsRunningOnGvisor()) { + port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE( + PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false))); + } + + // IPPROTO_RAW sockets are write-only. We'll have to open another socket to + // read what we write. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); + + // Construct a packet with an IP header, UDP header, and payload. + constexpr char kPayload[] = "toto"; + char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)]; + ASSERT_TRUE( + FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload))); + + socklen_t addrlen = sizeof(addr_); + ASSERT_NO_FATAL_FAILURE(sendto(socket_, &packet, sizeof(packet), 0, + reinterpret_cast<struct sockaddr*>(&addr_), + addrlen)); + + // Receive the payload. + char recv_buf[sizeof(packet)]; + struct sockaddr_in src; + socklen_t src_size = sizeof(src); + ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), 0, + reinterpret_cast<struct sockaddr*>(&src), &src_size), + SyscallSucceedsWithValue(sizeof(packet))); + EXPECT_EQ( + memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr), + sizeof(kPayload)), + 0); + // The network stack should have set the source address. + EXPECT_EQ(src.sin_family, AF_INET); + EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK); + // The packet ID should not be 0, as the packet has DF=0. + struct iphdr* iphdr = reinterpret_cast<struct iphdr*>(recv_buf); + EXPECT_NE(iphdr->id, 0); +} + +// Send and receive a packet where the sendto address is not the same as the +// provided destination. +TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) { + int port = 40000; + if (!IsRunningOnGvisor()) { + port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE( + PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false))); + } + + // IPPROTO_RAW sockets are write-only. We'll have to open another socket to + // read what we write. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); + + // Construct a packet with an IP header, UDP header, and payload. + constexpr char kPayload[] = "toto"; + char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)]; + ASSERT_TRUE( + FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload))); + // Overwrite the IP destination address with an IP we can't get to. + struct iphdr iphdr = {}; + memcpy(&iphdr, packet, sizeof(iphdr)); + iphdr.daddr = 42; + memcpy(packet, &iphdr, sizeof(iphdr)); + + socklen_t addrlen = sizeof(addr_); + ASSERT_NO_FATAL_FAILURE(sendto(socket_, &packet, sizeof(packet), 0, + reinterpret_cast<struct sockaddr*>(&addr_), + addrlen)); + + // Receive the payload, since sendto should replace the bad destination with + // localhost. + char recv_buf[sizeof(packet)]; + struct sockaddr_in src; + socklen_t src_size = sizeof(src); + ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), 0, + reinterpret_cast<struct sockaddr*>(&src), &src_size), + SyscallSucceedsWithValue(sizeof(packet))); + EXPECT_EQ( + memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr), + sizeof(kPayload)), + 0); + // The network stack should have set the source address. + EXPECT_EQ(src.sin_family, AF_INET); + EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK); + // The packet ID should not be 0, as the packet has DF=0. + struct iphdr recv_iphdr = {}; + memcpy(&recv_iphdr, recv_buf, sizeof(recv_iphdr)); + EXPECT_NE(recv_iphdr.id, 0); + // The destination address should be localhost, not the bad IP we set + // initially. + EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK); +} + +// Send and receive a packet w/ the IP_HDRINCL option set. +TEST_F(RawHDRINCL, SendAndReceiveIPHdrIncl) { + int port = 40000; + if (!IsRunningOnGvisor()) { + port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE( + PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false))); + } + + FileDescriptor recv_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); + + FileDescriptor send_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); + + // Enable IP_HDRINCL option so that we can build and send w/ an IP + // header. + constexpr int kSockOptOn = 1; + ASSERT_THAT(setsockopt(send_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + // This is not strictly required but we do it to make sure that setting + // IP_HDRINCL on a non IPPROTO_RAW socket does not prevent it from receiving + // packets. + ASSERT_THAT(setsockopt(recv_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Construct a packet with an IP header, UDP header, and payload. + constexpr char kPayload[] = "toto"; + char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)]; + ASSERT_TRUE( + FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload))); + + socklen_t addrlen = sizeof(addr_); + ASSERT_NO_FATAL_FAILURE(sendto(send_sock.get(), &packet, sizeof(packet), 0, + reinterpret_cast<struct sockaddr*>(&addr_), + addrlen)); + + // Receive the payload. + char recv_buf[sizeof(packet)]; + struct sockaddr_in src; + socklen_t src_size = sizeof(src); + ASSERT_THAT(recvfrom(recv_sock.get(), recv_buf, sizeof(recv_buf), 0, + reinterpret_cast<struct sockaddr*>(&src), &src_size), + SyscallSucceedsWithValue(sizeof(packet))); + EXPECT_EQ( + memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr), + sizeof(kPayload)), + 0); + // The network stack should have set the source address. + EXPECT_EQ(src.sin_family, AF_INET); + EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK); + struct iphdr iphdr = {}; + memcpy(&iphdr, recv_buf, sizeof(iphdr)); + EXPECT_NE(iphdr.id, 0); + + // Also verify that the packet we just sent was not delivered to the + // IPPROTO_RAW socket. + { + char recv_buf[sizeof(packet)]; + struct sockaddr_in src; + socklen_t src_size = sizeof(src); + ASSERT_THAT(recvfrom(socket_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT, + reinterpret_cast<struct sockaddr*>(&src), &src_size), + SyscallFailsWithErrno(EAGAIN)); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/raw_socket_icmp.cc b/test/syscalls/linux/raw_socket_icmp.cc new file mode 100644 index 000000000..3de898df7 --- /dev/null +++ b/test/syscalls/linux/raw_socket_icmp.cc @@ -0,0 +1,514 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <linux/capability.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <cstdint> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// The size of an empty ICMP packet and IP header together. +constexpr size_t kEmptyICMPSize = 28; + +// ICMP raw sockets get their own special tests because Linux automatically +// responds to ICMP echo requests, and thus a single echo request sent via +// loopback leads to 2 received ICMP packets. + +class RawSocketICMPTest : public ::testing::Test { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // Checks that both an ICMP echo request and reply are received. Calls should + // be wrapped in ASSERT_NO_FATAL_FAILURE. + void ExpectICMPSuccess(const struct icmphdr& icmp); + + // Sends icmp via s_. + void SendEmptyICMP(const struct icmphdr& icmp); + + // Sends icmp via s_ to the given address. + void SendEmptyICMPTo(int sock, const struct sockaddr_in& addr, + const struct icmphdr& icmp); + + // Reads from s_ into recv_buf. + void ReceiveICMP(char* recv_buf, size_t recv_buf_len, size_t expected_size, + struct sockaddr_in* src); + + // Reads from sock into recv_buf. + void ReceiveICMPFrom(char* recv_buf, size_t recv_buf_len, + size_t expected_size, struct sockaddr_in* src, int sock); + + // The socket used for both reading and writing. + int s_; + + // The loopback address. + struct sockaddr_in addr_; +}; + +void RawSocketICMPTest::SetUp() { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + ASSERT_THAT(socket(AF_INET, SOCK_RAW, IPPROTO_ICMP), + SyscallFailsWithErrno(EPERM)); + GTEST_SKIP(); + } + + ASSERT_THAT(s_ = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP), SyscallSucceeds()); + + addr_ = {}; + + // "On raw sockets sin_port is set to the IP protocol." - ip(7). + addr_.sin_port = IPPROTO_IP; + addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr_.sin_family = AF_INET; +} + +void RawSocketICMPTest::TearDown() { + // TearDown will be run even if we skip the test. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + EXPECT_THAT(close(s_), SyscallSucceeds()); + } +} + +// We'll only read an echo in this case, as the kernel won't respond to the +// malformed ICMP checksum. +TEST_F(RawSocketICMPTest, SendAndReceiveBadChecksum) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Prepare and send an ICMP packet. Use arbitrary junk for checksum, sequence, + // and ID. None of that should matter for raw sockets - the kernel should + // still give us the packet. + struct icmphdr icmp; + icmp.type = ICMP_ECHO; + icmp.code = 0; + icmp.checksum = 0; + icmp.un.echo.sequence = 2012; + icmp.un.echo.id = 2014; + ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp)); + + // Veryify that we get the echo, then that there's nothing else to read. + char recv_buf[kEmptyICMPSize]; + struct sockaddr_in src; + ASSERT_NO_FATAL_FAILURE( + ReceiveICMP(recv_buf, sizeof(recv_buf), sizeof(struct icmphdr), &src)); + EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0); + // The packet should be identical to what we sent. + EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), &icmp, sizeof(icmp)), 0); + + // And there should be nothing left to read. + EXPECT_THAT(RetryEINTR(recv)(s_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Send and receive an ICMP packet. +TEST_F(RawSocketICMPTest, SendAndReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Prepare and send an ICMP packet. Use arbitrary junk for sequence and ID. + // None of that should matter for raw sockets - the kernel should still give + // us the packet. + struct icmphdr icmp; + icmp.type = ICMP_ECHO; + icmp.code = 0; + icmp.checksum = 0; + icmp.un.echo.sequence = 2012; + icmp.un.echo.id = 2014; + icmp.checksum = ICMPChecksum(icmp, NULL, 0); + ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp)); + + ASSERT_NO_FATAL_FAILURE(ExpectICMPSuccess(icmp)); +} + +// We should be able to create multiple raw sockets for the same protocol and +// receive the same packet on both. +TEST_F(RawSocketICMPTest, MultipleSocketReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + FileDescriptor s2 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)); + + // Prepare and send an ICMP packet. Use arbitrary junk for sequence and ID. + // None of that should matter for raw sockets - the kernel should still give + // us the packet. + struct icmphdr icmp; + icmp.type = ICMP_ECHO; + icmp.code = 0; + icmp.checksum = 0; + icmp.un.echo.sequence = 2016; + icmp.un.echo.id = 2018; + icmp.checksum = ICMPChecksum(icmp, NULL, 0); + ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp)); + + // Both sockets will receive the echo request and reply in indeterminate + // order, so we'll need to read 2 packets from each. + + // Receive on socket 1. + constexpr int kBufSize = kEmptyICMPSize; + char recv_buf1[2][kBufSize]; + struct sockaddr_in src; + for (int i = 0; i < 2; i++) { + ASSERT_NO_FATAL_FAILURE(ReceiveICMP(recv_buf1[i], + ABSL_ARRAYSIZE(recv_buf1[i]), + sizeof(struct icmphdr), &src)); + EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0); + } + + // Receive on socket 2. + char recv_buf2[2][kBufSize]; + for (int i = 0; i < 2; i++) { + ASSERT_NO_FATAL_FAILURE( + ReceiveICMPFrom(recv_buf2[i], ABSL_ARRAYSIZE(recv_buf2[i]), + sizeof(struct icmphdr), &src, s2.get())); + EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0); + } + + // Ensure both sockets receive identical packets. + int types[] = {ICMP_ECHO, ICMP_ECHOREPLY}; + for (int type : types) { + auto match_type = [=](char buf[kBufSize]) { + struct icmphdr* icmp = + reinterpret_cast<struct icmphdr*>(buf + sizeof(struct iphdr)); + return icmp->type == type; + }; + auto icmp1_it = + std::find_if(std::begin(recv_buf1), std::end(recv_buf1), match_type); + auto icmp2_it = + std::find_if(std::begin(recv_buf2), std::end(recv_buf2), match_type); + ASSERT_NE(icmp1_it, std::end(recv_buf1)); + ASSERT_NE(icmp2_it, std::end(recv_buf2)); + EXPECT_EQ(memcmp(*icmp1_it + sizeof(struct iphdr), + *icmp2_it + sizeof(struct iphdr), sizeof(icmp)), + 0); + } +} + +// A raw ICMP socket and ping socket should both receive the ICMP packets +// intended for the ping socket. +TEST_F(RawSocketICMPTest, RawAndPingSockets) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + FileDescriptor ping_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)); + + // Ping sockets take care of the ICMP ID and checksum. + struct icmphdr icmp; + icmp.type = ICMP_ECHO; + icmp.code = 0; + icmp.un.echo.sequence = *static_cast<unsigned short*>(&icmp.un.echo.sequence); + ASSERT_THAT(RetryEINTR(sendto)(ping_sock.get(), &icmp, sizeof(icmp), 0, + reinterpret_cast<struct sockaddr*>(&addr_), + sizeof(addr_)), + SyscallSucceedsWithValue(sizeof(icmp))); + + // Receive on socket 1, which receives the echo request and reply in + // indeterminate order. + constexpr int kBufSize = kEmptyICMPSize; + char recv_buf1[2][kBufSize]; + struct sockaddr_in src; + for (int i = 0; i < 2; i++) { + ASSERT_NO_FATAL_FAILURE( + ReceiveICMP(recv_buf1[i], kBufSize, sizeof(struct icmphdr), &src)); + EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0); + } + + // Receive on socket 2. Ping sockets only get the echo reply, not the initial + // echo. + char ping_recv_buf[kBufSize]; + ASSERT_THAT(RetryEINTR(recv)(ping_sock.get(), ping_recv_buf, kBufSize, 0), + SyscallSucceedsWithValue(sizeof(struct icmphdr))); + + // Ensure both sockets receive identical echo reply packets. + auto match_type_raw = [=](char buf[kBufSize]) { + struct icmphdr* icmp = + reinterpret_cast<struct icmphdr*>(buf + sizeof(struct iphdr)); + return icmp->type == ICMP_ECHOREPLY; + }; + auto raw_reply_it = + std::find_if(std::begin(recv_buf1), std::end(recv_buf1), match_type_raw); + ASSERT_NE(raw_reply_it, std::end(recv_buf1)); + EXPECT_EQ( + memcmp(*raw_reply_it + sizeof(struct iphdr), ping_recv_buf, sizeof(icmp)), + 0); +} + +// A raw ICMP socket should be able to send a malformed short ICMP Echo Request, +// while ping socket should not. +// Neither should be able to receieve a short malformed packet. +TEST_F(RawSocketICMPTest, ShortEchoRawAndPingSockets) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + FileDescriptor ping_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)); + + struct icmphdr icmp; + icmp.type = ICMP_ECHO; + icmp.code = 0; + icmp.un.echo.sequence = 0; + icmp.un.echo.id = 6789; + icmp.checksum = 0; + icmp.checksum = ICMPChecksum(icmp, NULL, 0); + + // Omit 2 bytes from ICMP packet. + constexpr int kShortICMPSize = sizeof(icmp) - 2; + + // Sending a malformed short ICMP message to a ping socket should fail. + ASSERT_THAT(RetryEINTR(sendto)(ping_sock.get(), &icmp, kShortICMPSize, 0, + reinterpret_cast<struct sockaddr*>(&addr_), + sizeof(addr_)), + SyscallFailsWithErrno(EINVAL)); + + // Sending a malformed short ICMP message to a raw socket should not fail. + ASSERT_THAT(RetryEINTR(sendto)(s_, &icmp, kShortICMPSize, 0, + reinterpret_cast<struct sockaddr*>(&addr_), + sizeof(addr_)), + SyscallSucceedsWithValue(kShortICMPSize)); + + // Neither Ping nor Raw socket should have anything to read. + char recv_buf[kEmptyICMPSize]; + EXPECT_THAT(RetryEINTR(recv)(ping_sock.get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + EXPECT_THAT(RetryEINTR(recv)(s_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// A raw ICMP socket should be able to send a malformed short ICMP Echo Reply, +// while ping socket should not. +// Neither should be able to receieve a short malformed packet. +TEST_F(RawSocketICMPTest, ShortEchoReplyRawAndPingSockets) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + FileDescriptor ping_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)); + + struct icmphdr icmp; + icmp.type = ICMP_ECHOREPLY; + icmp.code = 0; + icmp.un.echo.sequence = 0; + icmp.un.echo.id = 6789; + icmp.checksum = 0; + icmp.checksum = ICMPChecksum(icmp, NULL, 0); + + // Omit 2 bytes from ICMP packet. + constexpr int kShortICMPSize = sizeof(icmp) - 2; + + // Sending a malformed short ICMP message to a ping socket should fail. + ASSERT_THAT(RetryEINTR(sendto)(ping_sock.get(), &icmp, kShortICMPSize, 0, + reinterpret_cast<struct sockaddr*>(&addr_), + sizeof(addr_)), + SyscallFailsWithErrno(EINVAL)); + + // Sending a malformed short ICMP message to a raw socket should not fail. + ASSERT_THAT(RetryEINTR(sendto)(s_, &icmp, kShortICMPSize, 0, + reinterpret_cast<struct sockaddr*>(&addr_), + sizeof(addr_)), + SyscallSucceedsWithValue(kShortICMPSize)); + + // Neither Ping nor Raw socket should have anything to read. + char recv_buf[kEmptyICMPSize]; + EXPECT_THAT(RetryEINTR(recv)(ping_sock.get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + EXPECT_THAT(RetryEINTR(recv)(s_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Test that connect() sends packets to the right place. +TEST_F(RawSocketICMPTest, SendAndReceiveViaConnect) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)), + SyscallSucceeds()); + + // Prepare and send an ICMP packet. Use arbitrary junk for sequence and ID. + // None of that should matter for raw sockets - the kernel should still give + // us the packet. + struct icmphdr icmp; + icmp.type = ICMP_ECHO; + icmp.code = 0; + icmp.checksum = 0; + icmp.un.echo.sequence = 2003; + icmp.un.echo.id = 2004; + icmp.checksum = ICMPChecksum(icmp, NULL, 0); + ASSERT_THAT(send(s_, &icmp, sizeof(icmp), 0), + SyscallSucceedsWithValue(sizeof(icmp))); + + ASSERT_NO_FATAL_FAILURE(ExpectICMPSuccess(icmp)); +} + +// Bind to localhost, then send and receive packets. +TEST_F(RawSocketICMPTest, BindSendAndReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)), + SyscallSucceeds()); + + // Prepare and send an ICMP packet. Use arbitrary junk for checksum, sequence, + // and ID. None of that should matter for raw sockets - the kernel should + // still give us the packet. + struct icmphdr icmp; + icmp.type = ICMP_ECHO; + icmp.code = 0; + icmp.checksum = 0; + icmp.un.echo.sequence = 2004; + icmp.un.echo.id = 2007; + icmp.checksum = ICMPChecksum(icmp, NULL, 0); + ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp)); + + ASSERT_NO_FATAL_FAILURE(ExpectICMPSuccess(icmp)); +} + +// Bind and connect to localhost and send/receive packets. +TEST_F(RawSocketICMPTest, BindConnectSendAndReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)), + SyscallSucceeds()); + ASSERT_THAT( + connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)), + SyscallSucceeds()); + + // Prepare and send an ICMP packet. Use arbitrary junk for checksum, sequence, + // and ID. None of that should matter for raw sockets - the kernel should + // still give us the packet. + struct icmphdr icmp; + icmp.type = ICMP_ECHO; + icmp.code = 0; + icmp.checksum = 0; + icmp.un.echo.sequence = 2010; + icmp.un.echo.id = 7; + icmp.checksum = ICMPChecksum(icmp, NULL, 0); + ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp)); + + ASSERT_NO_FATAL_FAILURE(ExpectICMPSuccess(icmp)); +} + +void RawSocketICMPTest::ExpectICMPSuccess(const struct icmphdr& icmp) { + // We're going to receive both the echo request and reply, but the order is + // indeterminate. + char recv_buf[kEmptyICMPSize]; + struct sockaddr_in src; + bool received_request = false; + bool received_reply = false; + + for (int i = 0; i < 2; i++) { + // Receive the packet. + ASSERT_NO_FATAL_FAILURE(ReceiveICMP(recv_buf, ABSL_ARRAYSIZE(recv_buf), + sizeof(struct icmphdr), &src)); + EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0); + struct icmphdr* recvd_icmp = + reinterpret_cast<struct icmphdr*>(recv_buf + sizeof(struct iphdr)); + switch (recvd_icmp->type) { + case ICMP_ECHO: + EXPECT_FALSE(received_request); + received_request = true; + // The packet should be identical to what we sent. + EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), &icmp, sizeof(icmp)), + 0); + break; + + case ICMP_ECHOREPLY: + EXPECT_FALSE(received_reply); + received_reply = true; + // Most fields should be the same. + EXPECT_EQ(recvd_icmp->code, icmp.code); + EXPECT_EQ(recvd_icmp->un.echo.sequence, icmp.un.echo.sequence); + EXPECT_EQ(recvd_icmp->un.echo.id, icmp.un.echo.id); + // A couple are different. + EXPECT_EQ(recvd_icmp->type, ICMP_ECHOREPLY); + // The checksum computed over the reply should still be valid. + EXPECT_EQ(ICMPChecksum(*recvd_icmp, NULL, 0), 0); + break; + } + } + + ASSERT_TRUE(received_request); + ASSERT_TRUE(received_reply); +} + +void RawSocketICMPTest::SendEmptyICMP(const struct icmphdr& icmp) { + ASSERT_NO_FATAL_FAILURE(SendEmptyICMPTo(s_, addr_, icmp)); +} + +void RawSocketICMPTest::SendEmptyICMPTo(int sock, + const struct sockaddr_in& addr, + const struct icmphdr& icmp) { + // It's safe to use const_cast here because sendmsg won't modify the iovec or + // address. + struct iovec iov = {}; + iov.iov_base = static_cast<void*>(const_cast<struct icmphdr*>(&icmp)); + iov.iov_len = sizeof(icmp); + struct msghdr msg = {}; + msg.msg_name = static_cast<void*>(const_cast<struct sockaddr_in*>(&addr)); + msg.msg_namelen = sizeof(addr); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + ASSERT_THAT(sendmsg(sock, &msg, 0), SyscallSucceedsWithValue(sizeof(icmp))); +} + +void RawSocketICMPTest::ReceiveICMP(char* recv_buf, size_t recv_buf_len, + size_t expected_size, + struct sockaddr_in* src) { + ASSERT_NO_FATAL_FAILURE( + ReceiveICMPFrom(recv_buf, recv_buf_len, expected_size, src, s_)); +} + +void RawSocketICMPTest::ReceiveICMPFrom(char* recv_buf, size_t recv_buf_len, + size_t expected_size, + struct sockaddr_in* src, int sock) { + struct iovec iov = {}; + iov.iov_base = recv_buf; + iov.iov_len = recv_buf_len; + struct msghdr msg = {}; + msg.msg_name = src; + msg.msg_namelen = sizeof(*src); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + // We should receive the ICMP packet plus 20 bytes of IP header. + ASSERT_THAT(recvmsg(sock, &msg, 0), + SyscallSucceedsWithValue(expected_size + sizeof(struct iphdr))); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/read.cc b/test/syscalls/linux/read.cc new file mode 100644 index 000000000..2633ba31b --- /dev/null +++ b/test/syscalls/linux/read.cc @@ -0,0 +1,118 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <unistd.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class ReadTest : public ::testing::Test { + void SetUp() override { + name_ = NewTempAbsPath(); + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_CREAT, 0644), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + } + + void TearDown() override { unlink(name_.c_str()); } + + public: + std::string name_; +}; + +TEST_F(ReadTest, ZeroBuffer) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds()); + + char msg[] = "hello world"; + EXPECT_THAT(PwriteFd(fd, msg, strlen(msg), 0), + SyscallSucceedsWithValue(strlen(msg))); + + char buf[10]; + EXPECT_THAT(ReadFd(fd, buf, 0), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(ReadTest, EmptyFileReturnsZeroAtEOF) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds()); + + char eof_buf[10]; + EXPECT_THAT(ReadFd(fd, eof_buf, 10), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(ReadTest, EofAfterRead) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds()); + + // Write some bytes to be read. + constexpr char kMessage[] = "hello world"; + EXPECT_THAT(PwriteFd(fd, kMessage, sizeof(kMessage), 0), + SyscallSucceedsWithValue(sizeof(kMessage))); + + // Read all of the bytes at once. + char buf[sizeof(kMessage)]; + EXPECT_THAT(ReadFd(fd, buf, sizeof(kMessage)), + SyscallSucceedsWithValue(sizeof(kMessage))); + + // Read again with a non-zero buffer and expect EOF. + char eof_buf[10]; + EXPECT_THAT(ReadFd(fd, eof_buf, 10), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(ReadTest, DevNullReturnsEof) { + int fd; + ASSERT_THAT(fd = open("/dev/null", O_RDONLY), SyscallSucceeds()); + std::vector<char> buf(1); + EXPECT_THAT(ReadFd(fd, buf.data(), 1), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +const int kReadSize = 128 * 1024; + +// Do not allow random save as it could lead to partial reads. +TEST_F(ReadTest, CanReadFullyFromDevZero_NoRandomSave) { + int fd; + ASSERT_THAT(fd = open("/dev/zero", O_RDONLY), SyscallSucceeds()); + + std::vector<char> buf(kReadSize, 1); + EXPECT_THAT(ReadFd(fd, buf.data(), kReadSize), + SyscallSucceedsWithValue(kReadSize)); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_EQ(std::vector<char>(kReadSize, 0), buf); +} + +TEST_F(ReadTest, ReadDirectoryFails) { + const FileDescriptor file = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY)); + std::vector<char> buf(1); + EXPECT_THAT(ReadFd(file.get(), buf.data(), 1), SyscallFailsWithErrno(EISDIR)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/readahead.cc b/test/syscalls/linux/readahead.cc new file mode 100644 index 000000000..09703b5c1 --- /dev/null +++ b/test/syscalls/linux/readahead.cc @@ -0,0 +1,91 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ReadaheadTest, InvalidFD) { + EXPECT_THAT(readahead(-1, 1, 1), SyscallFailsWithErrno(EBADF)); +} + +TEST(ReadaheadTest, InvalidOffset) { + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + EXPECT_THAT(readahead(fd.get(), -1, 1), SyscallFailsWithErrno(EINVAL)); +} + +TEST(ReadaheadTest, ValidOffset) { + constexpr char kData[] = "123"; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + + // N.B. The implementation of readahead is filesystem-specific, and a file + // backed by ram may return EINVAL because there is nothing to be read. + EXPECT_THAT(readahead(fd.get(), 1, 1), AnyOf(SyscallSucceedsWithValue(0), + SyscallFailsWithErrno(EINVAL))); +} + +TEST(ReadaheadTest, PastEnd) { + constexpr char kData[] = "123"; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + // See above. + EXPECT_THAT(readahead(fd.get(), 2, 2), AnyOf(SyscallSucceedsWithValue(0), + SyscallFailsWithErrno(EINVAL))); +} + +TEST(ReadaheadTest, CrossesEnd) { + constexpr char kData[] = "123"; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + // See above. + EXPECT_THAT(readahead(fd.get(), 4, 2), AnyOf(SyscallSucceedsWithValue(0), + SyscallFailsWithErrno(EINVAL))); +} + +TEST(ReadaheadTest, WriteOnly) { + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_WRONLY)); + EXPECT_THAT(readahead(fd.get(), 0, 1), SyscallFailsWithErrno(EBADF)); +} + +TEST(ReadaheadTest, InvalidSize) { + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + EXPECT_THAT(readahead(fd.get(), 0, -1), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/readv.cc b/test/syscalls/linux/readv.cc new file mode 100644 index 000000000..baaf9f757 --- /dev/null +++ b/test/syscalls/linux/readv.cc @@ -0,0 +1,294 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/syscalls/linux/readv_common.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class ReadvTest : public FileTest { + void SetUp() override { + FileTest::SetUp(); + + ASSERT_THAT(write(test_file_fd_.get(), kReadvTestData, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + ASSERT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(write(test_pipe_[1], kReadvTestData, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + } +}; + +TEST_F(ReadvTest, ReadOneBufferPerByte_File) { + ReadOneBufferPerByte(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadOneBufferPerByte_Pipe) { + ReadOneBufferPerByte(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadOneHalfAtATime_File) { + ReadOneHalfAtATime(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadOneHalfAtATime_Pipe) { + ReadOneHalfAtATime(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadAllOneBuffer_File) { + ReadAllOneBuffer(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadAllOneBuffer_Pipe) { ReadAllOneBuffer(test_pipe_[0]); } + +TEST_F(ReadvTest, ReadAllOneLargeBuffer_File) { + ReadAllOneLargeBuffer(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadAllOneLargeBuffer_Pipe) { + ReadAllOneLargeBuffer(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadBuffersOverlapping_File) { + ReadBuffersOverlapping(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadBuffersOverlapping_Pipe) { + ReadBuffersOverlapping(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadBuffersDiscontinuous_File) { + ReadBuffersDiscontinuous(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadBuffersDiscontinuous_Pipe) { + ReadBuffersDiscontinuous(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadIovecsCompletelyFilled_File) { + ReadIovecsCompletelyFilled(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadIovecsCompletelyFilled_Pipe) { + ReadIovecsCompletelyFilled(test_pipe_[0]); +} + +TEST_F(ReadvTest, BadFileDescriptor) { + char buffer[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = 1024; + + ASSERT_THAT(readv(-1, iov, 1024), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(ReadvTest, BadIovecsPointer_File) { + ASSERT_THAT(readv(test_file_fd_.get(), nullptr, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvTest, BadIovecsPointer_Pipe) { + ASSERT_THAT(readv(test_pipe_[0], nullptr, 1), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvTest, BadIovecBase_File) { + struct iovec iov[1]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvTest, BadIovecBase_Pipe) { + struct iovec iov[1]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvTest, ZeroIovecs_File) { + struct iovec iov[1]; + iov[0].iov_base = 0; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), SyscallSucceeds()); +} + +TEST_F(ReadvTest, ZeroIovecs_Pipe) { + struct iovec iov[1]; + iov[0].iov_base = 0; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallSucceeds()); +} + +TEST_F(ReadvTest, NotReadable_File) { + char buffer[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = 1024; + + std::string wronly_file = NewTempAbsPath(); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(wronly_file, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR)); + ASSERT_THAT(readv(fd.get(), iov, 1), SyscallFailsWithErrno(EBADF)); + fd.reset(); // Close before unlinking. + ASSERT_THAT(unlink(wronly_file.c_str()), SyscallSucceeds()); +} + +TEST_F(ReadvTest, NotReadable_Pipe) { + char buffer[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_pipe_[1], iov, 1), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(ReadvTest, DirNotReadable) { + char buffer[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = 1024; + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY)); + ASSERT_THAT(readv(fd.get(), iov, 1), SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(ReadvTest, OffsetIncremented) { + char* buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = kReadvTestDataSize; + + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + ASSERT_THAT(lseek(test_file_fd_.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(kReadvTestDataSize)); + + free(buffer); +} + +TEST_F(ReadvTest, EndOfFile) { + char* buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + free(buffer); + + buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + iov[0].iov_base = buffer; + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), SyscallSucceedsWithValue(0)); + free(buffer); +} + +TEST_F(ReadvTest, WouldBlock_Pipe) { + struct iovec iov[1]; + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_pipe_[0], iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + free(iov[0].iov_base); + + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallFailsWithErrno(EAGAIN)); + free(iov[0].iov_base); +} + +TEST_F(ReadvTest, ZeroBuffer) { + char buf[10]; + struct iovec iov[1]; + iov[0].iov_base = buf; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallSucceedsWithValue(0)); +} + +TEST_F(ReadvTest, NullIovecInNonemptyArray) { + std::vector<char> buf(kReadvTestDataSize); + struct iovec iov[2]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 0; + iov[1].iov_base = buf.data(); + iov[1].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 2), + SyscallSucceedsWithValue(kReadvTestDataSize)); +} + +TEST_F(ReadvTest, IovecOutsideTaskAddressRangeInNonemptyArray) { + std::vector<char> buf(kReadvTestDataSize); + struct iovec iov[2]; + iov[0].iov_base = reinterpret_cast<void*>(~static_cast<uintptr_t>(0)); + iov[0].iov_len = 0; + iov[1].iov_base = buf.data(); + iov[1].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 2), + SyscallFailsWithErrno(EFAULT)); +} + +// This test depends on the maximum extent of a single readv() syscall, so +// we can't tolerate interruption from saving. +TEST(ReadvTestNoFixture, TruncatedAtMax_NoRandomSave) { + // Ensure that we won't be interrupted by ITIMER_PROF. This is particularly + // important in environments where automated profiling tools may start + // ITIMER_PROF automatically. + struct itimerval itv = {}; + auto const cleanup_itimer = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_PROF, itv)); + + // From Linux's include/linux/fs.h. + size_t const MAX_RW_COUNT = INT_MAX & ~(kPageSize - 1); + + // Create an iovec array with 3 segments pointing to consecutive parts of a + // buffer. The first covers all but the last three pages, and should be + // written to in its entirety. The second covers the last page before + // MAX_RW_COUNT and the first page after; only the first page should be + // written to. The third covers the last page of the buffer, and should be + // skipped entirely. + size_t const kBufferSize = MAX_RW_COUNT + 2 * kPageSize; + size_t const kFirstOffset = MAX_RW_COUNT - kPageSize; + size_t const kSecondOffset = MAX_RW_COUNT + kPageSize; + // The buffer is too big to fit on the stack. + std::vector<char> buf(kBufferSize); + struct iovec iov[3]; + iov[0].iov_base = buf.data(); + iov[0].iov_len = kFirstOffset; + iov[1].iov_base = buf.data() + kFirstOffset; + iov[1].iov_len = kSecondOffset - kFirstOffset; + iov[2].iov_base = buf.data() + kSecondOffset; + iov[2].iov_len = kBufferSize - kSecondOffset; + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + EXPECT_THAT(readv(fd.get(), iov, 3), SyscallSucceedsWithValue(MAX_RW_COUNT)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/readv_common.cc b/test/syscalls/linux/readv_common.cc new file mode 100644 index 000000000..2694dc64f --- /dev/null +++ b/test/syscalls/linux/readv_common.cc @@ -0,0 +1,220 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// MatchesStringLength checks that a tuple argument of (struct iovec *, int) +// corresponding to an iovec array and its length, contains data that matches +// the string length strlen. +MATCHER_P(MatchesStringLength, strlen, "") { + struct iovec* iovs = arg.first; + int niov = arg.second; + int offset = 0; + for (int i = 0; i < niov; i++) { + offset += iovs[i].iov_len; + } + if (offset != static_cast<int>(strlen)) { + *result_listener << offset; + return false; + } + return true; +} + +// MatchesStringValue checks that a tuple argument of (struct iovec *, int) +// corresponding to an iovec array and its length, contains data that matches +// the string value str. +MATCHER_P(MatchesStringValue, str, "") { + struct iovec* iovs = arg.first; + int len = strlen(str); + int niov = arg.second; + int offset = 0; + for (int i = 0; i < niov; i++) { + struct iovec iov = iovs[i]; + if (len < offset) { + *result_listener << "strlen " << len << " < offset " << offset; + return false; + } + if (strncmp(static_cast<char*>(iov.iov_base), &str[offset], iov.iov_len)) { + absl::string_view iovec_string(static_cast<char*>(iov.iov_base), + iov.iov_len); + *result_listener << iovec_string << " @offset " << offset; + return false; + } + offset += iov.iov_len; + } + return true; +} + +extern const char kReadvTestData[] = + "127.0.0.1 localhost" + "" + "# The following lines are desirable for IPv6 capable hosts" + "::1 ip6-localhost ip6-loopback" + "fe00::0 ip6-localnet" + "ff00::0 ip6-mcastprefix" + "ff02::1 ip6-allnodes" + "ff02::2 ip6-allrouters" + "ff02::3 ip6-allhosts" + "192.168.1.100 a" + "93.184.216.34 foo.bar.example.com xcpu"; +extern const size_t kReadvTestDataSize = sizeof(kReadvTestData); + +static void ReadAllOneProvidedBuffer(int fd, std::vector<char>* buffer) { + struct iovec iovs[1]; + iovs[0].iov_base = buffer->data(); + iovs[0].iov_len = kReadvTestDataSize; + + ASSERT_THAT(readv(fd, iovs, 1), SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs, 1); + EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); +} + +void ReadAllOneBuffer(int fd) { + std::vector<char> buffer(kReadvTestDataSize); + ReadAllOneProvidedBuffer(fd, &buffer); +} + +void ReadAllOneLargeBuffer(int fd) { + std::vector<char> buffer(10 * kReadvTestDataSize); + ReadAllOneProvidedBuffer(fd, &buffer); +} + +void ReadOneHalfAtATime(int fd) { + int len0 = kReadvTestDataSize / 2; + int len1 = kReadvTestDataSize - len0; + std::vector<char> buffer0(len0); + std::vector<char> buffer1(len1); + + struct iovec iovs[2]; + iovs[0].iov_base = buffer0.data(); + iovs[0].iov_len = len0; + iovs[1].iov_base = buffer1.data(); + iovs[1].iov_len = len1; + + ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs, 2); + EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); +} + +void ReadOneBufferPerByte(int fd) { + std::vector<char> buffer(kReadvTestDataSize); + std::vector<struct iovec> iovs(kReadvTestDataSize); + char* buffer_ptr = buffer.data(); + struct iovec* iovs_ptr = iovs.data(); + + for (int i = 0; i < static_cast<int>(kReadvTestDataSize); i++) { + struct iovec iov = { + .iov_base = &buffer_ptr[i], + .iov_len = 1, + }; + iovs_ptr[i] = iov; + } + + ASSERT_THAT(readv(fd, iovs_ptr, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs.data(), kReadvTestDataSize); + EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); +} + +void ReadBuffersOverlapping(int fd) { + // overlap the first overlap_bytes. + int overlap_bytes = 8; + std::vector<char> buffer(kReadvTestDataSize); + + // overlapping causes us to get more data. + int expected_size = kReadvTestDataSize + overlap_bytes; + std::vector<char> expected(expected_size); + char* expected_ptr = expected.data(); + memcpy(expected_ptr, &kReadvTestData[overlap_bytes], overlap_bytes); + memcpy(&expected_ptr[overlap_bytes], &kReadvTestData[overlap_bytes], + kReadvTestDataSize - overlap_bytes); + + struct iovec iovs[2]; + iovs[0].iov_base = buffer.data(); + iovs[0].iov_len = overlap_bytes; + iovs[1].iov_base = buffer.data(); + iovs[1].iov_len = kReadvTestDataSize; + + ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs, 2); + EXPECT_THAT(iovec_desc, MatchesStringLength(expected_size)); + EXPECT_THAT(iovec_desc, MatchesStringValue(expected_ptr)); +} + +void ReadBuffersDiscontinuous(int fd) { + // Each iov is 1 byte separated by 1 byte. + std::vector<char> buffer(kReadvTestDataSize * 2); + std::vector<struct iovec> iovs(kReadvTestDataSize); + + char* buffer_ptr = buffer.data(); + struct iovec* iovs_ptr = iovs.data(); + + for (int i = 0; i < static_cast<int>(kReadvTestDataSize); i++) { + struct iovec iov = { + .iov_base = &buffer_ptr[i * 2], + .iov_len = 1, + }; + iovs_ptr[i] = iov; + } + + ASSERT_THAT(readv(fd, iovs_ptr, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs.data(), kReadvTestDataSize); + EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); +} + +void ReadIovecsCompletelyFilled(int fd) { + int half = kReadvTestDataSize / 2; + std::vector<char> buffer(kReadvTestDataSize); + char* buffer_ptr = buffer.data(); + memset(buffer.data(), '\0', kReadvTestDataSize); + + struct iovec iovs[2]; + iovs[0].iov_base = buffer.data(); + iovs[0].iov_len = half; + iovs[1].iov_base = &buffer_ptr[half]; + iovs[1].iov_len = half; + + ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(half * 2)); + + std::pair<struct iovec*, int> iovec_desc(iovs, 2); + EXPECT_THAT(iovec_desc, MatchesStringLength(half * 2)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); + + char* str = static_cast<char*>(iovs[0].iov_base); + str[iovs[0].iov_len - 1] = '\0'; + ASSERT_EQ(half - 1, strlen(str)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/readv_common.h b/test/syscalls/linux/readv_common.h new file mode 100644 index 000000000..2fa40c35f --- /dev/null +++ b/test/syscalls/linux/readv_common.h @@ -0,0 +1,61 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_READV_COMMON_H_ +#define GVISOR_TEST_SYSCALLS_READV_COMMON_H_ + +#include <stddef.h> + +namespace gvisor { +namespace testing { + +// A NUL-terminated string containing the data used by tests using the following +// test helpers. +extern const char kReadvTestData[]; + +// The size of kReadvTestData, including the terminating NUL. +extern const size_t kReadvTestDataSize; + +// ReadAllOneBuffer asserts that it can read kReadvTestData from an fd using +// exactly one iovec. +void ReadAllOneBuffer(int fd); + +// ReadAllOneLargeBuffer asserts that it can read kReadvTestData from an fd +// using exactly one iovec containing an overly large buffer. +void ReadAllOneLargeBuffer(int fd); + +// ReadOneHalfAtATime asserts that it can read test_data_from an fd using +// exactly two iovecs that are roughly equivalent in size. +void ReadOneHalfAtATime(int fd); + +// ReadOneBufferPerByte asserts that it can read kReadvTestData from an fd +// using one iovec per byte. +void ReadOneBufferPerByte(int fd); + +// ReadBuffersOverlapping asserts that it can read kReadvTestData from an fd +// where two iovecs are overlapping. +void ReadBuffersOverlapping(int fd); + +// ReadBuffersDiscontinuous asserts that it can read kReadvTestData from an fd +// where each iovec is discontinuous from the next by 1 byte. +void ReadBuffersDiscontinuous(int fd); + +// ReadIovecsCompletelyFilled asserts that the previous iovec is completely +// filled before moving onto the next. +void ReadIovecsCompletelyFilled(int fd); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_READV_COMMON_H_ diff --git a/test/syscalls/linux/readv_socket.cc b/test/syscalls/linux/readv_socket.cc new file mode 100644 index 000000000..dd6fb7008 --- /dev/null +++ b/test/syscalls/linux/readv_socket.cc @@ -0,0 +1,212 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/readv_common.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class ReadvSocketTest : public ::testing::Test { + public: + void SetUp() override { + test_unix_stream_socket_[0] = -1; + test_unix_stream_socket_[1] = -1; + test_unix_dgram_socket_[0] = -1; + test_unix_dgram_socket_[1] = -1; + test_unix_seqpacket_socket_[0] = -1; + test_unix_seqpacket_socket_[1] = -1; + + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, test_unix_stream_socket_), + SyscallSucceeds()); + ASSERT_THAT(fcntl(test_unix_stream_socket_[0], F_SETFL, O_NONBLOCK), + SyscallSucceeds()); + ASSERT_THAT(socketpair(AF_UNIX, SOCK_DGRAM, 0, test_unix_dgram_socket_), + SyscallSucceeds()); + ASSERT_THAT(fcntl(test_unix_dgram_socket_[0], F_SETFL, O_NONBLOCK), + SyscallSucceeds()); + ASSERT_THAT( + socketpair(AF_UNIX, SOCK_SEQPACKET, 0, test_unix_seqpacket_socket_), + SyscallSucceeds()); + ASSERT_THAT(fcntl(test_unix_seqpacket_socket_[0], F_SETFL, O_NONBLOCK), + SyscallSucceeds()); + + ASSERT_THAT( + write(test_unix_stream_socket_[1], kReadvTestData, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + ASSERT_THAT( + write(test_unix_dgram_socket_[1], kReadvTestData, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + ASSERT_THAT(write(test_unix_seqpacket_socket_[1], kReadvTestData, + kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + } + + void TearDown() override { + close(test_unix_stream_socket_[0]); + close(test_unix_stream_socket_[1]); + + close(test_unix_dgram_socket_[0]); + close(test_unix_dgram_socket_[1]); + + close(test_unix_seqpacket_socket_[0]); + close(test_unix_seqpacket_socket_[1]); + } + + int test_unix_stream_socket_[2]; + int test_unix_dgram_socket_[2]; + int test_unix_seqpacket_socket_[2]; +}; + +TEST_F(ReadvSocketTest, ReadOneBufferPerByte_StreamSocket) { + ReadOneBufferPerByte(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadOneBufferPerByte_DgramSocket) { + ReadOneBufferPerByte(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadOneBufferPerByte_SeqPacketSocket) { + ReadOneBufferPerByte(test_unix_seqpacket_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadOneHalfAtATime_StreamSocket) { + ReadOneHalfAtATime(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadOneHalfAtATime_DgramSocket) { + ReadOneHalfAtATime(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadAllOneBuffer_StreamSocket) { + ReadAllOneBuffer(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadAllOneBuffer_DgramSocket) { + ReadAllOneBuffer(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadAllOneLargeBuffer_StreamSocket) { + ReadAllOneLargeBuffer(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadAllOneLargeBuffer_DgramSocket) { + ReadAllOneLargeBuffer(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadBuffersOverlapping_StreamSocket) { + ReadBuffersOverlapping(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadBuffersOverlapping_DgramSocket) { + ReadBuffersOverlapping(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadBuffersDiscontinuous_StreamSocket) { + ReadBuffersDiscontinuous(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadBuffersDiscontinuous_DgramSocket) { + ReadBuffersDiscontinuous(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadIovecsCompletelyFilled_StreamSocket) { + ReadIovecsCompletelyFilled(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadIovecsCompletelyFilled_DgramSocket) { + ReadIovecsCompletelyFilled(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, BadIovecsPointer_StreamSocket) { + ASSERT_THAT(readv(test_unix_stream_socket_[0], nullptr, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvSocketTest, BadIovecsPointer_DgramSocket) { + ASSERT_THAT(readv(test_unix_dgram_socket_[0], nullptr, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvSocketTest, BadIovecBase_StreamSocket) { + struct iovec iov[1]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvSocketTest, BadIovecBase_DgramSocket) { + struct iovec iov[1]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvSocketTest, ZeroIovecs_StreamSocket) { + struct iovec iov[1]; + iov[0].iov_base = 0; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), SyscallSucceeds()); +} + +TEST_F(ReadvSocketTest, ZeroIovecs_DgramSocket) { + struct iovec iov[1]; + iov[0].iov_base = 0; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), SyscallSucceeds()); +} + +TEST_F(ReadvSocketTest, WouldBlock_StreamSocket) { + struct iovec iov[1]; + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + free(iov[0].iov_base); + + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), + SyscallFailsWithErrno(EAGAIN)); + free(iov[0].iov_base); +} + +TEST_F(ReadvSocketTest, WouldBlock_DgramSocket) { + struct iovec iov[1]; + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + free(iov[0].iov_base); + + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), + SyscallFailsWithErrno(EAGAIN)); + free(iov[0].iov_base); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rename.cc b/test/syscalls/linux/rename.cc new file mode 100644 index 000000000..833c0dc4f --- /dev/null +++ b/test/syscalls/linux/rename.cc @@ -0,0 +1,394 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdio.h> + +#include <string> + +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(RenameTest, RootToAnything) { + ASSERT_THAT(rename("/", "/bin"), SyscallFailsWithErrno(EBUSY)); +} + +TEST(RenameTest, AnythingToRoot) { + ASSERT_THAT(rename("/bin", "/"), SyscallFailsWithErrno(EBUSY)); +} + +TEST(RenameTest, SourceIsAncestorOfTarget) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto subdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + ASSERT_THAT(rename(dir.path().c_str(), subdir.path().c_str()), + SyscallFailsWithErrno(EINVAL)); + + // Try an even deeper directory. + auto deep_subdir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(subdir.path())); + ASSERT_THAT(rename(dir.path().c_str(), deep_subdir.path().c_str()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(RenameTest, TargetIsAncestorOfSource) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto subdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + ASSERT_THAT(rename(subdir.path().c_str(), dir.path().c_str()), + SyscallFailsWithErrno(ENOTEMPTY)); + + // Try an even deeper directory. + auto deep_subdir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(subdir.path())); + ASSERT_THAT(rename(deep_subdir.path().c_str(), dir.path().c_str()), + SyscallFailsWithErrno(ENOTEMPTY)); +} + +TEST(RenameTest, FileToSelf) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + EXPECT_THAT(rename(f.path().c_str(), f.path().c_str()), SyscallSucceeds()); +} + +TEST(RenameTest, DirectoryToSelf) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(rename(f.path().c_str(), f.path().c_str()), SyscallSucceeds()); +} + +TEST(RenameTest, FileToSameDirectory) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + std::string const newpath = NewTempAbsPath(); + ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = f.release(); + f.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, DirectoryToSameDirectory) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::string const newpath = NewTempAbsPath(); + ASSERT_THAT(rename(dir.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = dir.release(); + dir.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, FileToParentDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path())); + std::string const newpath = NewTempAbsPathInDir(dir1.path()); + ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = f.release(); + f.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, DirectoryToParentDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + auto dir3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir2.path())); + EXPECT_THAT(IsDirectory(dir3.path()), IsPosixErrorOkAndHolds(true)); + std::string const newpath = NewTempAbsPathInDir(dir1.path()); + ASSERT_THAT(rename(dir3.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = dir3.release(); + dir3.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); + EXPECT_THAT(IsDirectory(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, FileToChildDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = f.release(); + f.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, DirectoryToChildDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + auto dir3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + ASSERT_THAT(rename(dir3.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = dir3.release(); + dir3.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); + EXPECT_THAT(IsDirectory(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, DirectoryToOwnChildDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + ASSERT_THAT(rename(dir1.path().c_str(), newpath.c_str()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(RenameTest, FileOverwritesFile) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir.path(), "first", TempPath::kDefaultFileMode)); + auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir.path(), "second", TempPath::kDefaultFileMode)); + ASSERT_THAT(rename(f1.path().c_str(), f2.path().c_str()), SyscallSucceeds()); + EXPECT_THAT(Exists(f1.path()), IsPosixErrorOkAndHolds(false)); + + f1.release(); + std::string f2_contents; + ASSERT_NO_ERRNO(GetContents(f2.path(), &f2_contents)); + EXPECT_EQ("first", f2_contents); +} + +TEST(RenameTest, DirectoryOverwritesDirectoryLinkCount) { + auto parent1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(Links(parent1.path()), IsPosixErrorOkAndHolds(2)); + + auto parent2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(Links(parent2.path()), IsPosixErrorOkAndHolds(2)); + + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent1.path())); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent2.path())); + + EXPECT_THAT(Links(parent1.path()), IsPosixErrorOkAndHolds(3)); + EXPECT_THAT(Links(parent2.path()), IsPosixErrorOkAndHolds(3)); + + ASSERT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()), + SyscallSucceeds()); + + EXPECT_THAT(Links(parent1.path()), IsPosixErrorOkAndHolds(2)); + EXPECT_THAT(Links(parent2.path()), IsPosixErrorOkAndHolds(3)); +} + +TEST(RenameTest, FileDoesNotExist) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string source = JoinPath(dir.path(), "source"); + const std::string dest = JoinPath(dir.path(), "dest"); + ASSERT_THAT(rename(source.c_str(), dest.c_str()), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(RenameTest, FileDoesNotOverwriteDirectory) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(rename(f.path().c_str(), dir.path().c_str()), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(RenameTest, DirectoryDoesNotOverwriteFile) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(rename(dir.path().c_str(), f.path().c_str()), + SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(RenameTest, DirectoryOverwritesEmptyDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()), + SyscallSucceeds()); + EXPECT_THAT(Exists(dir1.path()), IsPosixErrorOkAndHolds(false)); + dir1.release(); + EXPECT_THAT(Exists(JoinPath(dir2.path(), Basename(f.path()))), + IsPosixErrorOkAndHolds(true)); + f.release(); +} + +TEST(RenameTest, FailsWithDots) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir1_dot = absl::StrCat(dir1.path(), "/."); + auto dir2_dot = absl::StrCat(dir2.path(), "/."); + auto dir1_dot_dot = absl::StrCat(dir1.path(), "/.."); + auto dir2_dot_dot = absl::StrCat(dir2.path(), "/.."); + + // Try with dot paths in the first argument + EXPECT_THAT(rename(dir1_dot.c_str(), dir2.path().c_str()), + SyscallFailsWithErrno(EBUSY)); + EXPECT_THAT(rename(dir1_dot_dot.c_str(), dir2.path().c_str()), + SyscallFailsWithErrno(EBUSY)); + + // Try with dot paths in the second argument + EXPECT_THAT(rename(dir1.path().c_str(), dir2_dot.c_str()), + SyscallFailsWithErrno(EBUSY)); + EXPECT_THAT(rename(dir1.path().c_str(), dir2_dot_dot.c_str()), + SyscallFailsWithErrno(EBUSY)); +} + +TEST(RenameTest, DirectoryDoesNotOverwriteNonemptyDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path())); + ASSERT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()), + SyscallFailsWithErrno(ENOTEMPTY)); +} + +TEST(RenameTest, FailsWhenOldParentNotWritable) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + // dir1 is not writable. + ASSERT_THAT(chmod(dir1.path().c_str(), 0555), SyscallSucceeds()); + + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + EXPECT_THAT(rename(f1.path().c_str(), newpath.c_str()), + SyscallFailsWithErrno(EACCES)); +} + +TEST(RenameTest, FailsWhenNewParentNotWritable) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + // dir2 is not writable. + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555)); + + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + EXPECT_THAT(rename(f1.path().c_str(), newpath.c_str()), + SyscallFailsWithErrno(EACCES)); +} + +// Equivalent to FailsWhenNewParentNotWritable, but with a destination file +// to overwrite. +TEST(RenameTest, OverwriteFailsWhenNewParentNotWritable) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + + // dir2 is not writable. + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path())); + ASSERT_THAT(chmod(dir2.path().c_str(), 0555), SyscallSucceeds()); + + EXPECT_THAT(rename(f1.path().c_str(), f2.path().c_str()), + SyscallFailsWithErrno(EACCES)); +} + +// If the parent directory of source is not accessible, rename returns EACCES +// because the user cannot determine if source exists. +TEST(RenameTest, FileDoesNotExistWhenNewParentNotExecutable) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + // No execute permission. + auto dir = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0400)); + + const std::string source = JoinPath(dir.path(), "source"); + const std::string dest = JoinPath(dir.path(), "dest"); + ASSERT_THAT(rename(source.c_str(), dest.c_str()), + SyscallFailsWithErrno(EACCES)); +} + +TEST(RenameTest, DirectoryWithOpenFdOverwritesEmptyDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Get an fd on dir1 + int fd; + ASSERT_THAT(fd = open(dir1.path().c_str(), O_DIRECTORY), SyscallSucceeds()); + auto close_f = Cleanup([fd] { + // Close the fd on f. + EXPECT_THAT(close(fd), SyscallSucceeds()); + }); + + EXPECT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()), + SyscallSucceeds()); + + const std::string new_f_path = JoinPath(dir2.path(), Basename(f.path())); + + auto remove_f = Cleanup([&] { + // Delete f in its new location. + ASSERT_NO_ERRNO(Delete(new_f_path)); + f.release(); + }); + + EXPECT_THAT(Exists(dir1.path()), IsPosixErrorOkAndHolds(false)); + dir1.release(); + EXPECT_THAT(Exists(new_f_path), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, FileWithOpenFd) { + TempPath root_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath dir1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path())); + TempPath dir2 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path())); + TempPath dir3 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path())); + + // Create file in dir1. + constexpr char kContents[] = "foo"; + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir1.path(), kContents, TempPath::kDefaultFileMode)); + + // Get fd on file. + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + + // Move f to dir2. + const std::string path2 = NewTempAbsPathInDir(dir2.path()); + ASSERT_THAT(rename(f.path().c_str(), path2.c_str()), SyscallSucceeds()); + + // Read f's kContents. + char buf[sizeof(kContents)]; + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(kContents), 0), + SyscallSucceedsWithValue(sizeof(kContents) - 1)); + EXPECT_EQ(absl::string_view(buf, sizeof(buf) - 1), kContents); + + // Move f to dir3. + const std::string path3 = NewTempAbsPathInDir(dir3.path()); + ASSERT_THAT(rename(path2.c_str(), path3.c_str()), SyscallSucceeds()); + + // Read f's kContents. + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(kContents), 0), + SyscallSucceedsWithValue(sizeof(kContents) - 1)); + EXPECT_EQ(absl::string_view(buf, sizeof(buf) - 1), kContents); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rlimits.cc b/test/syscalls/linux/rlimits.cc new file mode 100644 index 000000000..860f0f688 --- /dev/null +++ b/test/syscalls/linux/rlimits.cc @@ -0,0 +1,75 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/resource.h> +#include <sys/time.h> + +#include "test/util/capability_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(RlimitTest, SetRlimitHigher) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))); + + struct rlimit rl = {}; + EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + // Lower the rlimit first, as it may be equal to /proc/sys/fs/nr_open, in + // which case even users with CAP_SYS_RESOURCE can't raise it. + rl.rlim_cur--; + rl.rlim_max--; + ASSERT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + rl.rlim_max++; + EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); +} + +TEST(RlimitTest, UnprivilegedSetRlimit) { + // Drop privileges if necessary. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))) { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_RESOURCE, false)); + } + + struct rlimit rl = {}; + rl.rlim_cur = 1000; + rl.rlim_max = 20000; + EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + struct rlimit rl2 = {}; + EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl2), SyscallSucceeds()); + EXPECT_EQ(rl.rlim_cur, rl2.rlim_cur); + EXPECT_EQ(rl.rlim_max, rl2.rlim_max); + + rl.rlim_max = 100000; + EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallFailsWithErrno(EPERM)); +} + +TEST(RlimitTest, SetSoftRlimitAboveHard) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))); + + struct rlimit rl = {}; + EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + rl.rlim_cur = rl.rlim_max + 1; + EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rseq.cc b/test/syscalls/linux/rseq.cc new file mode 100644 index 000000000..4bfb1ff56 --- /dev/null +++ b/test/syscalls/linux/rseq.cc @@ -0,0 +1,198 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <signal.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/rseq/test.h" +#include "test/syscalls/linux/rseq/uapi.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Syscall test for rseq (restartable sequences). +// +// We must be very careful about how these tests are written. Each thread may +// only have one struct rseq registration, which may be done automatically at +// thread start (as of 2019-11-13, glibc does *not* support rseq and thus does +// not do so, but other libraries do). +// +// Testing of rseq is thus done primarily in a child process with no +// registration. This means exec'ing a nostdlib binary, as rseq registration can +// only be cleared by execve (or knowing the old rseq address), and glibc (based +// on the current unmerged patches) register rseq before calling main()). + +int RSeq(struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) { + return syscall(kRseqSyscall, rseq, rseq_len, flags, sig); +} + +// Returns true if this kernel supports the rseq syscall. +PosixErrorOr<bool> RSeqSupported() { + // We have to be careful here, there are three possible cases: + // + // 1. rseq is not supported -> ENOSYS + // 2. rseq is supported and not registered -> success, but we should + // unregister. + // 3. rseq is supported and registered -> EINVAL (most likely). + + // The only validation done on new registrations is that rseq is aligned and + // writable. + rseq rseq = {}; + int ret = RSeq(&rseq, sizeof(rseq), 0, 0); + if (ret == 0) { + // Successfully registered, rseq is supported. Unregister. + ret = RSeq(&rseq, sizeof(rseq), kRseqFlagUnregister, 0); + if (ret != 0) { + return PosixError(errno); + } + return true; + } + + switch (errno) { + case ENOSYS: + // Not supported. + return false; + case EINVAL: + // Supported, but already registered. EINVAL returned because we provided + // a different address. + return true; + default: + // Unknown error. + return PosixError(errno); + } +} + +constexpr char kRseqBinary[] = "test/syscalls/linux/rseq/rseq"; + +void RunChildTest(std::string test_case, int want_status) { + std::string path = RunfilePath(kRseqBinary); + + pid_t child_pid = -1; + int execve_errno = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(path, {path, test_case}, {}, &child_pid, &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_EQ(status, want_status); +} + +// Test that rseq must be aligned. +TEST(RseqTest, Unaligned) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestUnaligned, 0); +} + +// Sanity test that registration works. +TEST(RseqTest, Register) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestRegister, 0); +} + +// Registration can't be done twice. +TEST(RseqTest, DoubleRegister) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestDoubleRegister, 0); +} + +// Registration can be done again after unregister. +TEST(RseqTest, RegisterUnregister) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestRegisterUnregister, 0); +} + +// The pointer to rseq must match on register/unregister. +TEST(RseqTest, UnregisterDifferentPtr) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestUnregisterDifferentPtr, 0); +} + +// The signature must match on register/unregister. +TEST(RseqTest, UnregisterDifferentSignature) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestUnregisterDifferentSignature, 0); +} + +// The CPU ID is initialized. +TEST(RseqTest, CPU) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestCPU, 0); +} + +// Critical section is eventually aborted. +TEST(RseqTest, Abort) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbort, 0); +} + +// Abort may be before the critical section. +TEST(RseqTest, AbortBefore) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbortBefore, 0); +} + +// Signature must match. +TEST(RseqTest, AbortSignature) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbortSignature, SIGSEGV); +} + +// Abort must not be in the critical section. +TEST(RseqTest, AbortPreCommit) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbortPreCommit, SIGSEGV); +} + +// rseq.rseq_cs is cleared on abort. +TEST(RseqTest, AbortClearsCS) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbortClearsCS, 0); +} + +// rseq.rseq_cs is cleared on abort outside of critical section. +TEST(RseqTest, InvalidAbortClearsCS) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestInvalidAbortClearsCS, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rseq/BUILD b/test/syscalls/linux/rseq/BUILD new file mode 100644 index 000000000..853258b04 --- /dev/null +++ b/test/syscalls/linux/rseq/BUILD @@ -0,0 +1,61 @@ +# This package contains a standalone rseq test binary. This binary must not +# depend on libc, which might use rseq itself. + +load("//tools:defs.bzl", "cc_flags_supplier", "cc_library", "cc_toolchain", "select_arch") + +package(licenses = ["notice"]) + +genrule( + name = "rseq_binary", + srcs = [ + "critical.h", + "critical_amd64.S", + "critical_arm64.S", + "rseq.cc", + "syscalls.h", + "start_amd64.S", + "start_arm64.S", + "test.h", + "types.h", + "uapi.h", + ], + outs = ["rseq"], + cmd = "$(CC) " + + "$(CC_FLAGS) " + + "-I. " + + "-Wall " + + "-Werror " + + "-O2 " + + "-std=c++17 " + + "-static " + + "-nostdlib " + + "-ffreestanding " + + "-o " + + "$(location rseq) " + + select_arch( + amd64 = "$(location critical_amd64.S) $(location start_amd64.S) ", + arm64 = "$(location critical_arm64.S) $(location start_arm64.S) ", + no_match_error = "unsupported architecture", + ) + + "$(location rseq.cc)", + toolchains = [ + cc_toolchain, + ":no_pie_cc_flags", + ], + visibility = ["//:sandbox"], +) + +cc_flags_supplier( + name = "no_pie_cc_flags", + features = ["-pie"], +) + +cc_library( + name = "lib", + testonly = 1, + hdrs = [ + "test.h", + "uapi.h", + ], + visibility = ["//:sandbox"], +) diff --git a/test/syscalls/linux/rseq/critical.h b/test/syscalls/linux/rseq/critical.h new file mode 100644 index 000000000..ac987a25e --- /dev/null +++ b/test/syscalls/linux/rseq/critical.h @@ -0,0 +1,39 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_CRITICAL_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_CRITICAL_H_ + +#include "test/syscalls/linux/rseq/types.h" +#include "test/syscalls/linux/rseq/uapi.h" + +constexpr uint32_t kRseqSignature = 0x90909090; + +extern "C" { + +extern void rseq_loop(struct rseq* r, struct rseq_cs* cs); +extern void* rseq_loop_early_abort; +extern void* rseq_loop_start; +extern void* rseq_loop_pre_commit; +extern void* rseq_loop_post_commit; +extern void* rseq_loop_abort; + +extern int rseq_getpid(struct rseq* r, struct rseq_cs* cs); +extern void* rseq_getpid_start; +extern void* rseq_getpid_post_commit; +extern void* rseq_getpid_abort; + +} // extern "C" + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_CRITICAL_H_ diff --git a/test/syscalls/linux/rseq/critical_amd64.S b/test/syscalls/linux/rseq/critical_amd64.S new file mode 100644 index 000000000..8c0687e6d --- /dev/null +++ b/test/syscalls/linux/rseq/critical_amd64.S @@ -0,0 +1,66 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Restartable sequences critical sections. + +// Loops continuously until aborted. +// +// void rseq_loop(struct rseq* r, struct rseq_cs* cs) + + .text + .globl rseq_loop + .type rseq_loop, @function + +rseq_loop: + jmp begin + + // Abort block before the critical section. + // Abort signature is 4 nops for simplicity. + .byte 0x90, 0x90, 0x90, 0x90 + .globl rseq_loop_early_abort +rseq_loop_early_abort: + ret + +begin: + // r->rseq_cs = cs + movq %rsi, 8(%rdi) + + // N.B. rseq_cs will be cleared by any preempt, even outside the critical + // section. Thus it must be set in or immediately before the critical section + // to ensure it is not cleared before the section begins. + .globl rseq_loop_start +rseq_loop_start: + jmp rseq_loop_start + + // "Pre-commit": extra instructions inside the critical section. These are + // used as the abort point in TestAbortPreCommit, which is not valid. + .globl rseq_loop_pre_commit +rseq_loop_pre_commit: + // Extra abort signature + nop for TestAbortPostCommit. + .byte 0x90, 0x90, 0x90, 0x90 + nop + + // "Post-commit": never reached in this case. + .globl rseq_loop_post_commit +rseq_loop_post_commit: + + // Abort signature is 4 nops for simplicity. + .byte 0x90, 0x90, 0x90, 0x90 + + .globl rseq_loop_abort +rseq_loop_abort: + ret + + .size rseq_loop,.-rseq_loop + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/critical_arm64.S b/test/syscalls/linux/rseq/critical_arm64.S new file mode 100644 index 000000000..bfe7e8307 --- /dev/null +++ b/test/syscalls/linux/rseq/critical_arm64.S @@ -0,0 +1,66 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Restartable sequences critical sections. + +// Loops continuously until aborted. +// +// void rseq_loop(struct rseq* r, struct rseq_cs* cs) + + .text + .globl rseq_loop + .type rseq_loop, @function + +rseq_loop: + b begin + + // Abort block before the critical section. + // Abort signature. + .byte 0x90, 0x90, 0x90, 0x90 + .globl rseq_loop_early_abort +rseq_loop_early_abort: + ret + +begin: + // r->rseq_cs = cs + str x1, [x0, #8] + + // N.B. rseq_cs will be cleared by any preempt, even outside the critical + // section. Thus it must be set in or immediately before the critical section + // to ensure it is not cleared before the section begins. + .globl rseq_loop_start +rseq_loop_start: + b rseq_loop_start + + // "Pre-commit": extra instructions inside the critical section. These are + // used as the abort point in TestAbortPreCommit, which is not valid. + .globl rseq_loop_pre_commit +rseq_loop_pre_commit: + // Extra abort signature + nop for TestAbortPostCommit. + .byte 0x90, 0x90, 0x90, 0x90 + nop + + // "Post-commit": never reached in this case. + .globl rseq_loop_post_commit +rseq_loop_post_commit: + + // Abort signature. + .byte 0x90, 0x90, 0x90, 0x90 + + .globl rseq_loop_abort +rseq_loop_abort: + ret + + .size rseq_loop,.-rseq_loop + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/rseq.cc b/test/syscalls/linux/rseq/rseq.cc new file mode 100644 index 000000000..f036db26d --- /dev/null +++ b/test/syscalls/linux/rseq/rseq.cc @@ -0,0 +1,366 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/rseq/critical.h" +#include "test/syscalls/linux/rseq/syscalls.h" +#include "test/syscalls/linux/rseq/test.h" +#include "test/syscalls/linux/rseq/types.h" +#include "test/syscalls/linux/rseq/uapi.h" + +namespace gvisor { +namespace testing { + +extern "C" int main(int argc, char** argv, char** envp); + +// Standalone initialization before calling main(). +extern "C" void __init(uintptr_t* sp) { + int argc = sp[0]; + char** argv = reinterpret_cast<char**>(&sp[1]); + char** envp = &argv[argc + 1]; + + // Call main() and exit. + sys_exit_group(main(argc, argv, envp)); + + // sys_exit_group does not return +} + +int strcmp(const char* s1, const char* s2) { + const unsigned char* p1 = reinterpret_cast<const unsigned char*>(s1); + const unsigned char* p2 = reinterpret_cast<const unsigned char*>(s2); + + while (*p1 == *p2) { + if (!*p1) { + return 0; + } + ++p1; + ++p2; + } + return static_cast<int>(*p1) - static_cast<int>(*p2); +} + +int sys_rseq(struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) { + return raw_syscall(kRseqSyscall, rseq, rseq_len, flags, sig); +} + +// Test that rseq must be aligned. +int TestUnaligned() { + constexpr uintptr_t kRequiredAlignment = alignof(rseq); + + char buf[2 * kRequiredAlignment] = {}; + uintptr_t ptr = reinterpret_cast<uintptr_t>(&buf[0]); + if ((ptr & (kRequiredAlignment - 1)) == 0) { + // buf is already aligned. Misalign it. + ptr++; + } + + int ret = sys_rseq(reinterpret_cast<rseq*>(ptr), sizeof(rseq), 0, 0); + if (sys_errno(ret) != EINVAL) { + return 1; + } + return 0; +} + +// Sanity test that registration works. +int TestRegister() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + return 0; +}; + +// Registration can't be done twice. +int TestDoubleRegister() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != EBUSY) { + return 1; + } + + return 0; +}; + +// Registration can be done again after unregister. +int TestRegisterUnregister() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + if (int ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, 0); + sys_errno(ret) != 0) { + return 1; + } + + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + return 0; +}; + +// The pointer to rseq must match on register/unregister. +int TestUnregisterDifferentPtr() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + struct rseq r2 = {}; + if (int ret = sys_rseq(&r2, sizeof(r2), kRseqFlagUnregister, 0); + sys_errno(ret) != EINVAL) { + return 1; + } + + return 0; +}; + +// The signature must match on register/unregister. +int TestUnregisterDifferentSignature() { + constexpr int kSignature = 0; + + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kSignature); sys_errno(ret) != 0) { + return 1; + } + + if (int ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, kSignature + 1); + sys_errno(ret) != EPERM) { + return 1; + } + + return 0; +}; + +// The CPU ID is initialized. +int TestCPU() { + struct rseq r = {}; + r.cpu_id = kRseqCPUIDUninitialized; + + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + if (__atomic_load_n(&r.cpu_id, __ATOMIC_RELAXED) < 0) { + return 1; + } + if (__atomic_load_n(&r.cpu_id_start, __ATOMIC_RELAXED) < 0) { + return 1; + } + + return 0; +}; + +// Critical section is eventually aborted. +int TestAbort() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) - + reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_abort); + + // Loops until abort. If this returns then abort occurred. + rseq_loop(&r, &cs); + + return 0; +}; + +// Abort may be before the critical section. +int TestAbortBefore() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) - + reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_early_abort); + + // Loops until abort. If this returns then abort occurred. + rseq_loop(&r, &cs); + + return 0; +}; + +// Signature must match. +int TestAbortSignature() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) - + reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_abort); + + // Loops until abort. This should SIGSEGV on abort. + rseq_loop(&r, &cs); + + return 1; +}; + +// Abort must not be in the critical section. +int TestAbortPreCommit() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) - + reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_pre_commit); + + // Loops until abort. This should SIGSEGV on abort. + rseq_loop(&r, &cs); + + return 1; +}; + +// rseq.rseq_cs is cleared on abort. +int TestAbortClearsCS() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) - + reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_abort); + + // Loops until abort. If this returns then abort occurred. + rseq_loop(&r, &cs); + + if (__atomic_load_n(&r.rseq_cs, __ATOMIC_RELAXED)) { + return 1; + } + + return 0; +}; + +// rseq.rseq_cs is cleared on abort outside of critical section. +int TestInvalidAbortClearsCS() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) - + reinterpret_cast<uint64_t>(&rseq_loop_start); + cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_abort); + + __atomic_store_n(&r.rseq_cs, &cs, __ATOMIC_RELAXED); + + // When the next abort condition occurs, the kernel will clear cs once it + // determines we aren't in the critical section. + while (1) { + if (!__atomic_load_n(&r.rseq_cs, __ATOMIC_RELAXED)) { + break; + } + } + + return 0; +}; + +// Exit codes: +// 0 - Pass +// 1 - Fail +// 2 - Missing argument +// 3 - Unknown test case +extern "C" int main(int argc, char** argv, char** envp) { + if (argc != 2) { + // Usage: rseq <test case> + return 2; + } + + if (strcmp(argv[1], kRseqTestUnaligned) == 0) { + return TestUnaligned(); + } + if (strcmp(argv[1], kRseqTestRegister) == 0) { + return TestRegister(); + } + if (strcmp(argv[1], kRseqTestDoubleRegister) == 0) { + return TestDoubleRegister(); + } + if (strcmp(argv[1], kRseqTestRegisterUnregister) == 0) { + return TestRegisterUnregister(); + } + if (strcmp(argv[1], kRseqTestUnregisterDifferentPtr) == 0) { + return TestUnregisterDifferentPtr(); + } + if (strcmp(argv[1], kRseqTestUnregisterDifferentSignature) == 0) { + return TestUnregisterDifferentSignature(); + } + if (strcmp(argv[1], kRseqTestCPU) == 0) { + return TestCPU(); + } + if (strcmp(argv[1], kRseqTestAbort) == 0) { + return TestAbort(); + } + if (strcmp(argv[1], kRseqTestAbortBefore) == 0) { + return TestAbortBefore(); + } + if (strcmp(argv[1], kRseqTestAbortSignature) == 0) { + return TestAbortSignature(); + } + if (strcmp(argv[1], kRseqTestAbortPreCommit) == 0) { + return TestAbortPreCommit(); + } + if (strcmp(argv[1], kRseqTestAbortClearsCS) == 0) { + return TestAbortClearsCS(); + } + if (strcmp(argv[1], kRseqTestInvalidAbortClearsCS) == 0) { + return TestInvalidAbortClearsCS(); + } + + return 3; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rseq/start_amd64.S b/test/syscalls/linux/rseq/start_amd64.S new file mode 100644 index 000000000..b9611b276 --- /dev/null +++ b/test/syscalls/linux/rseq/start_amd64.S @@ -0,0 +1,45 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + + .text + .align 4 + .type _start,@function + .globl _start + +_start: + movq %rsp,%rdi + call __init + hlt + + .size _start,.-_start + .section .note.GNU-stack,"",@progbits + + .text + .globl raw_syscall + .type raw_syscall, @function + +raw_syscall: + mov %rdi,%rax // syscall # + mov %rsi,%rdi // arg0 + mov %rdx,%rsi // arg1 + mov %rcx,%rdx // arg2 + mov %r8,%r10 // arg3 (goes in r10 instead of rcx for system calls) + mov %r9,%r8 // arg4 + mov 0x8(%rsp),%r9 // arg5 + syscall + ret + + .size raw_syscall,.-raw_syscall + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/start_arm64.S b/test/syscalls/linux/rseq/start_arm64.S new file mode 100644 index 000000000..693c1c6eb --- /dev/null +++ b/test/syscalls/linux/rseq/start_arm64.S @@ -0,0 +1,45 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + + .text + .align 4 + .type _start,@function + .globl _start + +_start: + mov x29, sp + bl __init + wfi + + .size _start,.-_start + .section .note.GNU-stack,"",@progbits + + .text + .globl raw_syscall + .type raw_syscall, @function + +raw_syscall: + mov x8,x0 // syscall # + mov x0,x1 // arg0 + mov x1,x2 // arg1 + mov x2,x3 // arg2 + mov x3,x4 // arg3 + mov x4,x5 // arg4 + mov x5,x6 // arg5 + svc #0 + ret + + .size raw_syscall,.-raw_syscall + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/syscalls.h b/test/syscalls/linux/rseq/syscalls.h new file mode 100644 index 000000000..c4118e6c5 --- /dev/null +++ b/test/syscalls/linux/rseq/syscalls.h @@ -0,0 +1,69 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_SYSCALLS_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_SYSCALLS_H_ + +#include "test/syscalls/linux/rseq/types.h" + +// Syscall numbers. +#if defined(__x86_64__) +constexpr int kGetpid = 39; +constexpr int kExitGroup = 231; +#elif defined(__aarch64__) +constexpr int kGetpid = 172; +constexpr int kExitGroup = 94; +#else +#error "Unknown architecture" +#endif + +namespace gvisor { +namespace testing { + +// Standalone system call interfaces. +// Note that these are all "raw" system call interfaces which encode +// errors by setting the return value to a small negative number. +// Use sys_errno() to check system call return values for errors. + +// Maximum Linux error number. +constexpr int kMaxErrno = 4095; + +// Errno values. +#define EPERM 1 +#define EFAULT 14 +#define EBUSY 16 +#define EINVAL 22 + +// Get the error number from a raw system call return value. +// Returns a positive error number or 0 if there was no error. +static inline int sys_errno(uintptr_t rval) { + if (rval >= static_cast<uintptr_t>(-kMaxErrno)) { + return -static_cast<int>(rval); + } + return 0; +} + +extern "C" uintptr_t raw_syscall(int number, ...); + +static inline void sys_exit_group(int status) { + raw_syscall(kExitGroup, status); +} +static inline int sys_getpid() { + return static_cast<int>(raw_syscall(kGetpid)); +} + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_SYSCALLS_H_ diff --git a/test/syscalls/linux/rseq/test.h b/test/syscalls/linux/rseq/test.h new file mode 100644 index 000000000..3b7bb74b1 --- /dev/null +++ b/test/syscalls/linux/rseq/test.h @@ -0,0 +1,43 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TEST_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TEST_H_ + +namespace gvisor { +namespace testing { + +// Test cases supported by rseq binary. + +inline constexpr char kRseqTestUnaligned[] = "unaligned"; +inline constexpr char kRseqTestRegister[] = "register"; +inline constexpr char kRseqTestDoubleRegister[] = "double-register"; +inline constexpr char kRseqTestRegisterUnregister[] = "register-unregister"; +inline constexpr char kRseqTestUnregisterDifferentPtr[] = + "unregister-different-ptr"; +inline constexpr char kRseqTestUnregisterDifferentSignature[] = + "unregister-different-signature"; +inline constexpr char kRseqTestCPU[] = "cpu"; +inline constexpr char kRseqTestAbort[] = "abort"; +inline constexpr char kRseqTestAbortBefore[] = "abort-before"; +inline constexpr char kRseqTestAbortSignature[] = "abort-signature"; +inline constexpr char kRseqTestAbortPreCommit[] = "abort-precommit"; +inline constexpr char kRseqTestAbortClearsCS[] = "abort-clears-cs"; +inline constexpr char kRseqTestInvalidAbortClearsCS[] = + "invalid-abort-clears-cs"; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TEST_H_ diff --git a/test/syscalls/linux/rseq/types.h b/test/syscalls/linux/rseq/types.h new file mode 100644 index 000000000..b6afe9817 --- /dev/null +++ b/test/syscalls/linux/rseq/types.h @@ -0,0 +1,31 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_ + +using size_t = __SIZE_TYPE__; +using uintptr_t = __UINTPTR_TYPE__; + +using uint8_t = __UINT8_TYPE__; +using uint16_t = __UINT16_TYPE__; +using uint32_t = __UINT32_TYPE__; +using uint64_t = __UINT64_TYPE__; + +using int8_t = __INT8_TYPE__; +using int16_t = __INT16_TYPE__; +using int32_t = __INT32_TYPE__; +using int64_t = __INT64_TYPE__; + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_ diff --git a/test/syscalls/linux/rseq/uapi.h b/test/syscalls/linux/rseq/uapi.h new file mode 100644 index 000000000..d3e60d0a4 --- /dev/null +++ b/test/syscalls/linux/rseq/uapi.h @@ -0,0 +1,51 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_ + +#include <stdint.h> + +// User-kernel ABI for restartable sequences. + +// Syscall numbers. +#if defined(__x86_64__) +constexpr int kRseqSyscall = 334; +#elif defined(__aarch64__) +constexpr int kRseqSyscall = 293; +#else +#error "Unknown architecture" +#endif // __x86_64__ + +struct rseq_cs { + uint32_t version; + uint32_t flags; + uint64_t start_ip; + uint64_t post_commit_offset; + uint64_t abort_ip; +} __attribute__((aligned(4 * sizeof(uint64_t)))); + +// N.B. alignment is enforced by the kernel. +struct rseq { + uint32_t cpu_id_start; + uint32_t cpu_id; + struct rseq_cs* rseq_cs; + uint32_t flags; +} __attribute__((aligned(4 * sizeof(uint64_t)))); + +constexpr int kRseqFlagUnregister = 1 << 0; + +constexpr int kRseqCPUIDUninitialized = -1; + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_ diff --git a/test/syscalls/linux/rtsignal.cc b/test/syscalls/linux/rtsignal.cc new file mode 100644 index 000000000..ed27e2566 --- /dev/null +++ b/test/syscalls/linux/rtsignal.cc @@ -0,0 +1,171 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <cerrno> +#include <csignal> + +#include "gtest/gtest.h" +#include "test/util/cleanup.h" +#include "test/util/logging.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// saved_info is set by the handler. +siginfo_t saved_info; + +// has_saved_info is set to true by the handler. +volatile bool has_saved_info; + +void SigHandler(int sig, siginfo_t* info, void* context) { + // Copy to the given info. + saved_info = *info; + has_saved_info = true; +} + +void ClearSavedInfo() { + // Clear the cached info. + memset(&saved_info, 0, sizeof(saved_info)); + has_saved_info = false; +} + +PosixErrorOr<Cleanup> SetupSignalHandler(int sig) { + struct sigaction sa; + sa.sa_sigaction = SigHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + return ScopedSigaction(sig, sa); +} + +class RtSignalTest : public ::testing::Test { + protected: + void SetUp() override { + action_cleanup_ = ASSERT_NO_ERRNO_AND_VALUE(SetupSignalHandler(SIGUSR1)); + mask_cleanup_ = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGUSR1)); + } + + void TearDown() override { ClearSavedInfo(); } + + private: + Cleanup action_cleanup_; + Cleanup mask_cleanup_; +}; + +static int rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t* uinfo) { + int ret; + do { + // NOTE(b/25434735): rt_sigqueueinfo(2) could return EAGAIN for RT signals. + ret = syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo); + } while (ret == -1 && errno == EAGAIN); + return ret; +} + +TEST_F(RtSignalTest, InvalidTID) { + siginfo_t uinfo; + // Depending on the kernel version, these calls may fail with + // ESRCH (goobunutu machines) or EPERM (production machines). Thus, + // the test simply ensures that they do fail. + EXPECT_THAT(rt_sigqueueinfo(-1, SIGUSR1, &uinfo), SyscallFails()); + EXPECT_FALSE(has_saved_info); + EXPECT_THAT(rt_sigqueueinfo(0, SIGUSR1, &uinfo), SyscallFails()); + EXPECT_FALSE(has_saved_info); +} + +TEST_F(RtSignalTest, InvalidCodes) { + siginfo_t uinfo; + + // We need a child for the code checks to apply. If the process is delivering + // to itself, then it can use whatever codes it wants and they will go + // through. + pid_t child = fork(); + if (child == 0) { + _exit(1); + } + ASSERT_THAT(child, SyscallSucceeds()); + + // These are not allowed for child processes. + uinfo.si_code = 0; // SI_USER. + EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), + SyscallFailsWithErrno(EPERM)); + uinfo.si_code = 0x80; // SI_KERNEL. + EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), + SyscallFailsWithErrno(EPERM)); + uinfo.si_code = -6; // SI_TKILL. + EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), + SyscallFailsWithErrno(EPERM)); + uinfo.si_code = -1; // SI_QUEUE (allowed). + EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), SyscallSucceeds()); + + // Join the child process. + EXPECT_THAT(waitpid(child, nullptr, 0), SyscallSucceeds()); +} + +TEST_F(RtSignalTest, ValueDelivered) { + siginfo_t uinfo; + uinfo.si_code = -1; // SI_QUEUE (allowed). + uinfo.si_errno = 0x1234; + + EXPECT_EQ(saved_info.si_errno, 0x0); + EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR1, &uinfo), SyscallSucceeds()); + EXPECT_TRUE(has_saved_info); + EXPECT_EQ(saved_info.si_errno, 0x1234); +} + +TEST_F(RtSignalTest, SignoMatch) { + auto action2_cleanup = ASSERT_NO_ERRNO_AND_VALUE(SetupSignalHandler(SIGUSR2)); + auto mask2_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGUSR2)); + + siginfo_t uinfo; + uinfo.si_code = -1; // SI_QUEUE (allowed). + + EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR1, &uinfo), SyscallSucceeds()); + EXPECT_TRUE(has_saved_info); + EXPECT_EQ(saved_info.si_signo, SIGUSR1); + + ClearSavedInfo(); + + EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR2, &uinfo), SyscallSucceeds()); + EXPECT_TRUE(has_saved_info); + EXPECT_EQ(saved_info.si_signo, SIGUSR2); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // These tests depend on delivering SIGUSR1/2 to the main thread (so they can + // synchronously check has_saved_info). Block these so that any other threads + // created by TestInit will also have them blocked. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + sigaddset(&set, SIGUSR2); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/sched.cc b/test/syscalls/linux/sched.cc new file mode 100644 index 000000000..735e99411 --- /dev/null +++ b/test/syscalls/linux/sched.cc @@ -0,0 +1,71 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sched.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// In linux, pid is limited to 29 bits because how futex is implemented. +constexpr int kImpossiblePID = (1 << 29) + 1; + +TEST(SchedGetparamTest, ReturnsZero) { + struct sched_param param; + EXPECT_THAT(sched_getparam(getpid(), ¶m), SyscallSucceeds()); + EXPECT_EQ(param.sched_priority, 0); + EXPECT_THAT(sched_getparam(/*pid=*/0, ¶m), SyscallSucceeds()); + EXPECT_EQ(param.sched_priority, 0); +} + +TEST(SchedGetparamTest, InvalidPIDReturnsEINVAL) { + struct sched_param param; + EXPECT_THAT(sched_getparam(/*pid=*/-1, ¶m), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SchedGetparamTest, ImpossiblePIDReturnsESRCH) { + struct sched_param param; + EXPECT_THAT(sched_getparam(kImpossiblePID, ¶m), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(SchedGetparamTest, NullParamReturnsEINVAL) { + EXPECT_THAT(sched_getparam(0, nullptr), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SchedGetschedulerTest, ReturnsSchedOther) { + EXPECT_THAT(sched_getscheduler(getpid()), + SyscallSucceedsWithValue(SCHED_OTHER)); + EXPECT_THAT(sched_getscheduler(/*pid=*/0), + SyscallSucceedsWithValue(SCHED_OTHER)); +} + +TEST(SchedGetschedulerTest, ReturnsEINVAL) { + EXPECT_THAT(sched_getscheduler(/*pid=*/-1), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SchedGetschedulerTest, ReturnsESRCH) { + EXPECT_THAT(sched_getscheduler(kImpossiblePID), SyscallFailsWithErrno(ESRCH)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sched_yield.cc b/test/syscalls/linux/sched_yield.cc new file mode 100644 index 000000000..5d24f5b58 --- /dev/null +++ b/test/syscalls/linux/sched_yield.cc @@ -0,0 +1,33 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SchedYieldTest, Success) { + EXPECT_THAT(sched_yield(), SyscallSucceeds()); + EXPECT_THAT(sched_yield(), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc new file mode 100644 index 000000000..ce88d90dd --- /dev/null +++ b/test/syscalls/linux/seccomp.cc @@ -0,0 +1,425 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <linux/audit.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <time.h> +#include <ucontext.h> +#include <unistd.h> + +#include <atomic> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/proc_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +#ifndef SYS_SECCOMP +#define SYS_SECCOMP 1 +#endif + +namespace gvisor { +namespace testing { + +namespace { + +// A syscall not implemented by Linux that we don't expect to be called. +#ifdef __x86_64__ +constexpr uint32_t kFilteredSyscall = SYS_vserver; +#elif __aarch64__ +// Use the last of arch_specific_syscalls which are not implemented on arm64. +constexpr uint32_t kFilteredSyscall = __NR_arch_specific_syscall + 15; +#endif + +// Applies a seccomp-bpf filter that returns `filtered_result` for +// `sysno` and allows all other syscalls. Async-signal-safe. +void ApplySeccompFilter(uint32_t sysno, uint32_t filtered_result, + uint32_t flags = 0) { + // "Prior to [PR_SET_SECCOMP], the task must call prctl(PR_SET_NO_NEW_PRIVS, + // 1) or run with CAP_SYS_ADMIN privileges in its namespace." - + // Documentation/prctl/seccomp_filter.txt + // + // prctl(PR_SET_NO_NEW_PRIVS, 1) may be called repeatedly; calls after the + // first are no-ops. + TEST_PCHECK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0); + MaybeSave(); + + struct sock_filter filter[] = { + // A = seccomp_data.arch + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 4), +#if defined(__x86_64__) + // if (A != AUDIT_ARCH_X86_64) goto kill + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4), +#elif defined(__aarch64__) + // if (A != AUDIT_ARCH_AARCH64) goto kill + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_AARCH64, 0, 4), +#else +#error "Unknown architecture" +#endif + // A = seccomp_data.nr + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0), + // if (A != sysno) goto allow + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, sysno, 0, 1), + // return filtered_result + BPF_STMT(BPF_RET | BPF_K, filtered_result), + // allow: return SECCOMP_RET_ALLOW + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + // kill: return SECCOMP_RET_KILL + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog; + prog.len = ABSL_ARRAYSIZE(filter); + prog.filter = filter; + if (flags) { + TEST_CHECK(syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, flags, &prog) == + 0); + } else { + TEST_PCHECK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == 0); + } + MaybeSave(); +} + +// Wrapper for sigaction. Async-signal-safe. +void RegisterSignalHandler(int signum, + void (*handler)(int, siginfo_t*, void*)) { + struct sigaction sa = {}; + sa.sa_sigaction = handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + TEST_PCHECK(sigaction(signum, &sa, nullptr) == 0); + MaybeSave(); +} + +// All of the following tests execute in a subprocess to ensure that each test +// is run in a separate process. This avoids cross-contamination of seccomp +// state between tests, and is necessary to ensure that test processes killed +// by SECCOMP_RET_KILL are single-threaded (since SECCOMP_RET_KILL only kills +// the offending thread, not the whole thread group). + +TEST(SeccompTest, RetKillCausesDeathBySIGSYS) { + pid_t const pid = fork(); + if (pid == 0) { + // Register a signal handler for SIGSYS that we don't expect to be invoked. + RegisterSignalHandler( + SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); + syscall(kFilteredSyscall); + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS) + << "status " << status; +} + +TEST(SeccompTest, RetKillOnlyKillsOneThread) { + Mapping stack = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + + pid_t const pid = fork(); + if (pid == 0) { + // Register a signal handler for SIGSYS that we don't expect to be invoked. + RegisterSignalHandler( + SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); + // Pass CLONE_VFORK to block the original thread in the child process until + // the clone thread exits with SIGSYS. + // + // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's + // x86_64 implementation is safe. See glibc + // sysdeps/unix/sysv/linux/x86_64/clone.S. + clone( + +[](void* arg) { + syscall(kFilteredSyscall); // should kill the thread + _exit(1); // should be unreachable + return 2; // should be very unreachable, shut up the compiler + }, + stack.endptr(), + CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM | + CLONE_VFORK, + nullptr); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +TEST(SeccompTest, RetTrapCausesSIGSYS) { + pid_t const pid = fork(); + if (pid == 0) { + constexpr uint16_t kTrapValue = 0xdead; + RegisterSignalHandler( + SIGSYS, +[](int signo, siginfo_t* info, void* ucv) { + ucontext_t* uc = static_cast<ucontext_t*>(ucv); + // This is a signal handler, so we must stay async-signal-safe. + TEST_CHECK(info->si_signo == SIGSYS); + TEST_CHECK(info->si_code == SYS_SECCOMP); + TEST_CHECK(info->si_errno == kTrapValue); + TEST_CHECK(info->si_call_addr != nullptr); + TEST_CHECK(info->si_syscall == kFilteredSyscall); +#if defined(__x86_64__) + TEST_CHECK(info->si_arch == AUDIT_ARCH_X86_64); + TEST_CHECK(uc->uc_mcontext.gregs[REG_RAX] == kFilteredSyscall); +#elif defined(__aarch64__) + TEST_CHECK(info->si_arch == AUDIT_ARCH_AARCH64); + TEST_CHECK(uc->uc_mcontext.regs[8] == kFilteredSyscall); +#endif // defined(__x86_64__) + _exit(0); + }); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRAP | kTrapValue); + syscall(kFilteredSyscall); + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +#ifdef __x86_64__ + +constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; + +time_t vsyscall_time(time_t* t) { + return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t); +} + +TEST(SeccompTest, SeccompAppliesToVsyscall) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + pid_t const pid = fork(); + if (pid == 0) { + constexpr uint16_t kTrapValue = 0xdead; + RegisterSignalHandler( + SIGSYS, +[](int signo, siginfo_t* info, void* ucv) { + ucontext_t* uc = static_cast<ucontext_t*>(ucv); + // This is a signal handler, so we must stay async-signal-safe. + TEST_CHECK(info->si_signo == SIGSYS); + TEST_CHECK(info->si_code == SYS_SECCOMP); + TEST_CHECK(info->si_errno == kTrapValue); + TEST_CHECK(info->si_call_addr != nullptr); + TEST_CHECK(info->si_syscall == SYS_time); + TEST_CHECK(info->si_arch == AUDIT_ARCH_X86_64); + TEST_CHECK(uc->uc_mcontext.gregs[REG_RAX] == SYS_time); + _exit(0); + }); + ApplySeccompFilter(SYS_time, SECCOMP_RET_TRAP | kTrapValue); + vsyscall_time(nullptr); // Should result in death. + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +TEST(SeccompTest, RetKillVsyscallCausesDeathBySIGSYS) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + pid_t const pid = fork(); + if (pid == 0) { + // Register a signal handler for SIGSYS that we don't expect to be invoked. + RegisterSignalHandler( + SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + ApplySeccompFilter(SYS_time, SECCOMP_RET_KILL); + vsyscall_time(nullptr); // Should result in death. + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS) + << "status " << status; +} + +#endif // defined(__x86_64__) + +TEST(SeccompTest, RetTraceWithoutPtracerReturnsENOSYS) { + pid_t const pid = fork(); + if (pid == 0) { + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRACE); + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +TEST(SeccompTest, RetErrnoReturnsErrno) { + pid_t const pid = fork(); + if (pid == 0) { + // ENOTNAM: "Not a XENIX named type file" + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM); + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOTNAM); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +TEST(SeccompTest, RetAllowAllowsSyscall) { + pid_t const pid = fork(); + if (pid == 0) { + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ALLOW); + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +// This test will validate that TSYNC will apply to all threads. +TEST(SeccompTest, TsyncAppliesToAllThreads) { + Mapping stack = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + + // We don't want to apply this policy to other test runner threads, so fork. + const pid_t pid = fork(); + + if (pid == 0) { + // First check that we receive a ENOSYS before the policy is applied. + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS); + + // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's + // x86_64 implementation is safe. See glibc + // sysdeps/unix/sysv/linux/x86_64/clone.S. + clone( + +[](void* arg) { + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM, + SECCOMP_FILTER_FLAG_TSYNC); + return 0; + }, + stack.endptr(), + CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM | + CLONE_VFORK, + nullptr); + + // Because we're using CLONE_VFORK this thread will be blocked until + // the second thread has released resources to our virtual memory, since + // we're not execing that will happen on _exit. + + // Now verify that the policy applied to this thread too. + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOTNAM); + _exit(0); + } + + ASSERT_THAT(pid, SyscallSucceeds()); + int status = 0; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +// This test will validate that seccomp(2) rejects unsupported flags. +TEST(SeccompTest, SeccompRejectsUnknownFlags) { + constexpr uint32_t kInvalidFlag = 123; + ASSERT_THAT( + syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, kInvalidFlag, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SeccompTest, LeastPermissiveFilterReturnValueApplies) { + // This is RetKillCausesDeathBySIGSYS, plus extra filters before and after the + // one that causes the kill that should be ignored. + pid_t const pid = fork(); + if (pid == 0) { + RegisterSignalHandler( + SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRACE); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM); + syscall(kFilteredSyscall); + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS) + << "status " << status; +} + +// Passed as argv[1] to cause the test binary to invoke kFilteredSyscall and +// exit. Not a real flag since flag parsing happens during initialization, +// which may create threads. +constexpr char kInvokeFilteredSyscallFlag[] = "--seccomp_test_child"; + +TEST(SeccompTest, FiltersPreservedAcrossForkAndExecve) { + ExecveArray const grandchild_argv( + {"/proc/self/exe", kInvokeFilteredSyscallFlag}); + + pid_t const pid = fork(); + if (pid == 0) { + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); + pid_t const grandchild_pid = fork(); + if (grandchild_pid == 0) { + execve(grandchild_argv.get()[0], grandchild_argv.get(), + /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "execve failed"); + } + int status; + TEST_PCHECK(waitpid(grandchild_pid, &status, 0) == grandchild_pid); + TEST_CHECK(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + if (argc >= 2 && + strcmp(argv[1], gvisor::testing::kInvokeFilteredSyscallFlag) == 0) { + syscall(gvisor::testing::kFilteredSyscall); + exit(0); + } + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/select.cc b/test/syscalls/linux/select.cc new file mode 100644 index 000000000..be2364fb8 --- /dev/null +++ b/test/syscalls/linux/select.cc @@ -0,0 +1,168 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/resource.h> +#include <sys/select.h> +#include <sys/time.h> + +#include <climits> +#include <csignal> +#include <cstdio> + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/base_poll_test.h" +#include "test/util/file_descriptor.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/rlimit_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class SelectTest : public BasePollTest { + protected: + void SetUp() override { BasePollTest::SetUp(); } + void TearDown() override { BasePollTest::TearDown(); } +}; + +// See that when there are no FD sets, select behaves like sleep. +TEST_F(SelectTest, NullFds) { + struct timeval timeout = absl::ToTimeval(absl::Milliseconds(10)); + ASSERT_THAT(select(0, nullptr, nullptr, nullptr, &timeout), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_usec, 0); + + timeout = absl::ToTimeval(absl::Milliseconds(10)); + ASSERT_THAT(select(1, nullptr, nullptr, nullptr, &timeout), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_usec, 0); +} + +TEST_F(SelectTest, NegativeNfds) { + EXPECT_THAT(select(-1, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(select(-100000, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(select(INT_MIN, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(SelectTest, ClosedFds) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY)); + + // We can't rely on a file descriptor being closed in a multi threaded + // application so fork to get a clean process. + EXPECT_THAT(InForkedProcess([&] { + int fd_num = fd.get(); + fd.reset(); + + fd_set read_set; + FD_ZERO(&read_set); + FD_SET(fd_num, &read_set); + + struct timeval timeout = + absl::ToTimeval(absl::Milliseconds(10)); + TEST_PCHECK(select(fd_num + 1, &read_set, nullptr, nullptr, + &timeout) != 0); + TEST_PCHECK(errno == EBADF); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST_F(SelectTest, ZeroTimeout) { + struct timeval timeout = {}; + EXPECT_THAT(select(1, nullptr, nullptr, nullptr, &timeout), + SyscallSucceeds()); + // Ignore timeout as its value is now undefined. +} + +// If random S/R interrupts the select, SIGALRM may be delivered before select +// restarts, causing the select to hang forever. +TEST_F(SelectTest, NoTimeout_NoRandomSave) { + // When there's no timeout, select may never return so set a timer. + SetTimer(absl::Milliseconds(100)); + // See that we get interrupted by the timer. + ASSERT_THAT(select(1, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); +} + +TEST_F(SelectTest, InvalidTimeoutNegative) { + struct timeval timeout = absl::ToTimeval(absl::Microseconds(-1)); + EXPECT_THAT(select(1, nullptr, nullptr, nullptr, &timeout), + SyscallFailsWithErrno(EINVAL)); + // Ignore timeout as its value is now undefined. +} + +// Verify that a signal interrupts select. +// +// If random S/R interrupts the select, SIGALRM may be delivered before select +// restarts, causing the select to hang forever. +TEST_F(SelectTest, InterruptedBySignal_NoRandomSave) { + absl::Duration duration(absl::Seconds(5)); + struct timeval timeout = absl::ToTimeval(duration); + SetTimer(absl::Milliseconds(100)); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT(select(1, nullptr, nullptr, nullptr, &timeout), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); + // Ignore timeout as its value is now undefined. +} + +TEST_F(SelectTest, IgnoreBitsAboveNfds) { + // fd_set is a bit array with at least FD_SETSIZE bits. Test that bits + // corresponding to file descriptors above nfds are ignored. + fd_set read_set; + FD_ZERO(&read_set); + constexpr int kNfds = 1; + for (int fd = kNfds; fd < FD_SETSIZE; fd++) { + FD_SET(fd, &read_set); + } + // Pass a zero timeout so that select returns immediately. + struct timeval timeout = {}; + EXPECT_THAT(select(kNfds, &read_set, nullptr, nullptr, &timeout), + SyscallSucceedsWithValue(0)); +} + +// This test illustrates Linux's behavior of 'select' calls passing after +// setrlimit RLIMIT_NOFILE is called. In particular, versions of sshd rely on +// this behavior. See b/122318458. +TEST_F(SelectTest, SetrlimitCallNOFILE) { + fd_set read_set; + FD_ZERO(&read_set); + timeval timeout = {}; + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(NewTempAbsPath(), O_RDONLY | O_CREAT, S_IRUSR)); + + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_NOFILE, 0)); + + FD_SET(fd.get(), &read_set); + // this call with zero timeout should return immediately + EXPECT_THAT(select(fd.get() + 1, &read_set, nullptr, nullptr, &timeout), + SyscallSucceeds()); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/semaphore.cc b/test/syscalls/linux/semaphore.cc new file mode 100644 index 000000000..e9b131ca9 --- /dev/null +++ b/test/syscalls/linux/semaphore.cc @@ -0,0 +1,491 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/ipc.h> +#include <sys/sem.h> +#include <sys/types.h> + +#include <atomic> +#include <cerrno> +#include <ctime> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/memory/memory.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "test/util/capability_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class AutoSem { + public: + explicit AutoSem(int id) : id_(id) {} + ~AutoSem() { + if (id_ >= 0) { + EXPECT_THAT(semctl(id_, 0, IPC_RMID), SyscallSucceeds()); + } + } + + int release() { + int old = id_; + id_ = -1; + return old; + } + + int get() { return id_; } + + private: + int id_ = -1; +}; + +TEST(SemaphoreTest, SemGet) { + // Test creation and lookup. + AutoSem sem(semget(1, 10, IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + EXPECT_THAT(semget(1, 10, IPC_CREAT), SyscallSucceedsWithValue(sem.get())); + EXPECT_THAT(semget(1, 9, IPC_CREAT), SyscallSucceedsWithValue(sem.get())); + + // Creation and lookup failure cases. + EXPECT_THAT(semget(1, 11, IPC_CREAT), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(semget(1, -1, IPC_CREAT), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(semget(1, 10, IPC_CREAT | IPC_EXCL), + SyscallFailsWithErrno(EEXIST)); + EXPECT_THAT(semget(2, 1, 0), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(semget(2, 0, IPC_CREAT), SyscallFailsWithErrno(EINVAL)); + + // Private semaphores never conflict. + AutoSem sem2(semget(IPC_PRIVATE, 1, 0)); + AutoSem sem3(semget(IPC_PRIVATE, 1, 0)); + ASSERT_THAT(sem2.get(), SyscallSucceeds()); + EXPECT_NE(sem.get(), sem2.get()); + ASSERT_THAT(sem3.get(), SyscallSucceeds()); + EXPECT_NE(sem3.get(), sem2.get()); +} + +// Tests simple operations that shouldn't block in a single-thread. +TEST(SemaphoreTest, SemOpSingleNoBlock) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + struct sembuf buf = {}; + buf.sem_op = 1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + + buf.sem_op = -1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + + buf.sem_op = 0; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + + // Error cases with invalid values. + ASSERT_THAT(semop(sem.get() + 1, &buf, 1), SyscallFailsWithErrno(EINVAL)); + + buf.sem_num = 1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EFBIG)); + + ASSERT_THAT(semop(sem.get(), nullptr, 0), SyscallFailsWithErrno(EINVAL)); +} + +// Tests multiple operations that shouldn't block in a single-thread. +TEST(SemaphoreTest, SemOpMultiNoBlock) { + AutoSem sem(semget(IPC_PRIVATE, 4, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + struct sembuf bufs[5] = {}; + bufs[0].sem_num = 0; + bufs[0].sem_op = 10; + bufs[0].sem_flg = 0; + + bufs[1].sem_num = 1; + bufs[1].sem_op = 2; + bufs[1].sem_flg = 0; + + bufs[2].sem_num = 2; + bufs[2].sem_op = 3; + bufs[2].sem_flg = 0; + + bufs[3].sem_num = 0; + bufs[3].sem_op = -5; + bufs[3].sem_flg = 0; + + bufs[4].sem_num = 2; + bufs[4].sem_op = 2; + bufs[4].sem_flg = 0; + + ASSERT_THAT(semop(sem.get(), bufs, ABSL_ARRAYSIZE(bufs)), SyscallSucceeds()); + + ASSERT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(5)); + ASSERT_THAT(semctl(sem.get(), 1, GETVAL), SyscallSucceedsWithValue(2)); + ASSERT_THAT(semctl(sem.get(), 2, GETVAL), SyscallSucceedsWithValue(5)); + ASSERT_THAT(semctl(sem.get(), 3, GETVAL), SyscallSucceedsWithValue(0)); + + for (auto& b : bufs) { + b.sem_op = -b.sem_op; + } + // 0 and 3 order must be reversed, otherwise it will block. + std::swap(bufs[0].sem_op, bufs[3].sem_op); + ASSERT_THAT(RetryEINTR(semop)(sem.get(), bufs, ABSL_ARRAYSIZE(bufs)), + SyscallSucceeds()); + + // All semaphores should be back to 0 now. + for (size_t i = 0; i < 4; ++i) { + ASSERT_THAT(semctl(sem.get(), i, GETVAL), SyscallSucceedsWithValue(0)); + } +} + +// Makes a best effort attempt to ensure that operation would block. +TEST(SemaphoreTest, SemOpBlock) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + std::atomic<int> blocked = ATOMIC_VAR_INIT(1); + ScopedThread th([&sem, &blocked] { + absl::SleepFor(absl::Milliseconds(100)); + ASSERT_EQ(blocked.load(), 1); + + struct sembuf buf = {}; + buf.sem_op = 1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + }); + + struct sembuf buf = {}; + buf.sem_op = -1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + blocked.store(0); +} + +// Tests that IPC_NOWAIT returns with no wait. +TEST(SemaphoreTest, SemOpNoBlock) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + struct sembuf buf = {}; + buf.sem_flg = IPC_NOWAIT; + + buf.sem_op = -1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EAGAIN)); + + buf.sem_op = 1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + + buf.sem_op = 0; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EAGAIN)); +} + +// Test runs 2 threads, one signals the other waits the same number of times. +TEST(SemaphoreTest, SemOpSimple) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + constexpr size_t kLoops = 100; + ScopedThread th([&sem] { + struct sembuf buf = {}; + buf.sem_op = 1; + for (size_t i = 0; i < kLoops; i++) { + // Sleep to prevent making all increments in one shot without letting + // the waiter wait. + absl::SleepFor(absl::Milliseconds(1)); + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + } + }); + + struct sembuf buf = {}; + buf.sem_op = -1; + for (size_t i = 0; i < kLoops; i++) { + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + } +} + +// Tests that semaphore can be removed while there are waiters. +// NoRandomSave: Test relies on timing that random save throws off. +TEST(SemaphoreTest, SemOpRemoveWithWaiter_NoRandomSave) { + AutoSem sem(semget(IPC_PRIVATE, 2, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + ScopedThread th([&sem] { + absl::SleepFor(absl::Milliseconds(250)); + ASSERT_THAT(semctl(sem.release(), 0, IPC_RMID), SyscallSucceeds()); + }); + + // This must happen before IPC_RMID runs above. Otherwise it fails with EINVAL + // instead because the semaphore has already been removed. + struct sembuf buf = {}; + buf.sem_op = -1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), + SyscallFailsWithErrno(EIDRM)); +} + +// Semaphore isn't fair. It will execute any waiter that can satisfy the +// request even if it gets in front of other waiters. +TEST(SemaphoreTest, SemOpBestFitExecution) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + ScopedThread th([&sem] { + struct sembuf buf = {}; + buf.sem_op = -2; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallFails()); + // Ensure that wait will only unblock when the semaphore is removed. On + // EINTR retry it may race with deletion and return EINVAL. + ASSERT_TRUE(errno == EIDRM || errno == EINVAL) << "errno=" << errno; + }); + + // Ensures that '-1' below will unblock even though '-10' above is waiting + // for the same semaphore. + for (size_t i = 0; i < 10; ++i) { + struct sembuf buf = {}; + buf.sem_op = 1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + + absl::SleepFor(absl::Milliseconds(10)); + + buf.sem_op = -1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + } + + ASSERT_THAT(semctl(sem.release(), 0, IPC_RMID), SyscallSucceeds()); +} + +// Executes random operations in multiple threads and verify correctness. +TEST(SemaphoreTest, SemOpRandom) { + // Don't do cooperative S/R tests because there are too many syscalls in + // this test, + const DisableSave ds; + + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + // Protects the seed below. + absl::Mutex mutex; + uint32_t seed = time(nullptr); + + int count = 0; // Tracks semaphore value. + bool done = false; // Tells waiters to stop after signal threads are done. + + // These threads will wait in a loop. + std::unique_ptr<ScopedThread> decs[5]; + for (auto& dec : decs) { + dec = absl::make_unique<ScopedThread>([&sem, &mutex, &count, &seed, &done] { + for (size_t i = 0; i < 500; ++i) { + int16_t val; + { + absl::MutexLock l(&mutex); + if (done) { + return; + } + val = (rand_r(&seed) % 10 + 1); // Rand between 1 and 10. + count -= val; + } + struct sembuf buf = {}; + buf.sem_op = -val; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + absl::SleepFor(absl::Milliseconds(val * 2)); + } + }); + } + + // These threads will wait for zero in a loop. + std::unique_ptr<ScopedThread> zeros[5]; + for (auto& zero : zeros) { + zero = absl::make_unique<ScopedThread>([&sem, &mutex, &done] { + for (size_t i = 0; i < 500; ++i) { + { + absl::MutexLock l(&mutex); + if (done) { + return; + } + } + struct sembuf buf = {}; + buf.sem_op = 0; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + absl::SleepFor(absl::Milliseconds(10)); + } + }); + } + + // These threads will signal in a loop. + std::unique_ptr<ScopedThread> incs[5]; + for (auto& inc : incs) { + inc = absl::make_unique<ScopedThread>([&sem, &mutex, &count, &seed] { + for (size_t i = 0; i < 500; ++i) { + int16_t val; + { + absl::MutexLock l(&mutex); + val = (rand_r(&seed) % 10 + 1); // Rand between 1 and 10. + count += val; + } + struct sembuf buf = {}; + buf.sem_op = val; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + absl::SleepFor(absl::Milliseconds(val * 2)); + } + }); + } + + // First wait for signal threads to be done. + for (auto& inc : incs) { + inc->Join(); + } + + // Now there could be waiters blocked (remember operations are random). + // Notify waiters that we're done and signal semaphore just the right amount. + { + absl::MutexLock l(&mutex); + done = true; + struct sembuf buf = {}; + buf.sem_op = -count; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + } + + // Now all waiters should unblock and exit. + for (auto& dec : decs) { + dec->Join(); + } + for (auto& zero : zeros) { + zero->Join(); + } +} + +TEST(SemaphoreTest, SemOpNamespace) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + AutoSem sem(semget(123, 1, 0600 | IPC_CREAT | IPC_EXCL)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + ScopedThread([]() { + EXPECT_THAT(unshare(CLONE_NEWIPC), SyscallSucceeds()); + AutoSem sem(semget(123, 1, 0600 | IPC_CREAT | IPC_EXCL)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + }); +} + +TEST(SemaphoreTest, SemCtlVal) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + // Semaphore must start with 0. + EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(0)); + + // Increase value and ensure waiters are woken up. + ScopedThread th([&sem] { + struct sembuf buf = {}; + buf.sem_op = -10; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + }); + + ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 9), SyscallSucceeds()); + EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(9)); + + ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 20), SyscallSucceeds()); + const int value = semctl(sem.get(), 0, GETVAL); + // 10 or 20 because it could have raced with waiter above. + EXPECT_TRUE(value == 10 || value == 20) << "value=" << value; + th.Join(); + + // Set it back to 0 and ensure that waiters are woken up. + ScopedThread thZero([&sem] { + struct sembuf buf = {}; + buf.sem_op = 0; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + }); + ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 0), SyscallSucceeds()); + EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(0)); + thZero.Join(); +} + +TEST(SemaphoreTest, SemCtlValAll) { + AutoSem sem(semget(IPC_PRIVATE, 3, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + // Semaphores must start with 0. + uint16_t get[3] = {10, 10, 10}; + EXPECT_THAT(semctl(sem.get(), 1, GETALL, get), SyscallSucceedsWithValue(0)); + for (auto v : get) { + EXPECT_EQ(v, 0); + } + + // SetAll and check that they were set. + uint16_t vals[3] = {0, 10, 20}; + EXPECT_THAT(semctl(sem.get(), 1, SETALL, vals), SyscallSucceedsWithValue(0)); + EXPECT_THAT(semctl(sem.get(), 1, GETALL, get), SyscallSucceedsWithValue(0)); + for (size_t i = 0; i < ABSL_ARRAYSIZE(vals); ++i) { + EXPECT_EQ(get[i], vals[i]); + } + + EXPECT_THAT(semctl(sem.get(), 1, SETALL, nullptr), + SyscallFailsWithErrno(EFAULT)); +} + +TEST(SemaphoreTest, SemCtlGetPid) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 1), SyscallSucceeds()); + EXPECT_THAT(semctl(sem.get(), 0, GETPID), SyscallSucceedsWithValue(getpid())); +} + +TEST(SemaphoreTest, SemCtlGetPidFork) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + const pid_t child_pid = fork(); + if (child_pid == 0) { + TEST_PCHECK(semctl(sem.get(), 0, SETVAL, 1) == 0); + TEST_PCHECK(semctl(sem.get(), 0, GETPID) == getpid()); + + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST(SemaphoreTest, SemIpcSet) { + // Drop CAP_IPC_OWNER which allows us to bypass semaphore permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_OWNER, false)); + + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + struct semid_ds semid = {}; + semid.sem_perm.uid = getuid(); + semid.sem_perm.gid = getgid(); + + // Make semaphore readonly and check that signal fails. + semid.sem_perm.mode = 0400; + EXPECT_THAT(semctl(sem.get(), 0, IPC_SET, &semid), SyscallSucceeds()); + struct sembuf buf = {}; + buf.sem_op = 1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EACCES)); + + // Make semaphore writeonly and check that wait for zero fails. + semid.sem_perm.mode = 0200; + EXPECT_THAT(semctl(sem.get(), 0, IPC_SET, &semid), SyscallSucceeds()); + buf.sem_op = 0; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EACCES)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc new file mode 100644 index 000000000..64123e904 --- /dev/null +++ b/test/syscalls/linux/sendfile.cc @@ -0,0 +1,587 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <linux/unistd.h> +#include <sys/eventfd.h> +#include <sys/sendfile.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/eventfd_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SendFileTest, SendZeroBytes) { + // Create temp files. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, 0), + SyscallSucceedsWithValue(0)); +} + +TEST(SendFileTest, InvalidOffset) { + // Create temp files. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + off_t offset = -1; + EXPECT_THAT(sendfile(outf.get(), inf.get(), &offset, 0), + SyscallFailsWithErrno(EINVAL)); +} + +int memfd_create(const std::string& name, unsigned int flags) { + return syscall(__NR_memfd_create, name.c_str(), flags); +} + +TEST(SendFileTest, Overflow) { + // Create input file. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file. + int fd; + EXPECT_THAT(fd = memfd_create("overflow", 0), SyscallSucceeds()); + const FileDescriptor outf(fd); + + // out_offset + kSize overflows INT64_MAX. + loff_t out_offset = 0x7ffffffffffffffeull; + constexpr int kSize = 3; + EXPECT_THAT(sendfile(outf.get(), inf.get(), &out_offset, kSize), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SendFileTest, SendTrivially) { + // Create temp files. + constexpr char kData[] = "To be, or not to be, that is the question:"; + constexpr int kDataSize = sizeof(kData) - 1; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT(bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallSucceedsWithValue(kDataSize)); + + // Close outf to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kDataSize)); + EXPECT_EQ(kData, absl::string_view(actual, bytes_sent)); +} + +TEST(SendFileTest, SendTriviallyWithBothFilesReadWrite) { + // Create temp files. + constexpr char kData[] = "Whether 'tis nobler in the mind to suffer"; + constexpr int kDataSize = sizeof(kData) - 1; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as readwrite. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + + // Open the output file as readwrite. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT(bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallSucceedsWithValue(kDataSize)); + + // Close outf to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kDataSize)); + EXPECT_EQ(kData, absl::string_view(actual, bytes_sent)); +} + +TEST(SendFileTest, SendAndUpdateFileOffset) { + // Create temp files. + // Test input string length must be > 2 AND even. + constexpr char kData[] = "The slings and arrows of outrageous fortune,"; + constexpr int kDataSize = sizeof(kData) - 1; + constexpr int kHalfDataSize = kDataSize / 2; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + + // Close outf to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kHalfDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ(absl::string_view(kData, kHalfDataSize), + absl::string_view(actual, bytes_sent)); + + // Verify that the input file offset has been updated + ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ( + absl::string_view(kData + kDataSize - bytes_sent, kDataSize - bytes_sent), + absl::string_view(actual, kHalfDataSize)); +} + +TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) { + // Create temp files. + // Test input string length must be > 2 AND divisible by 4. + constexpr char kData[] = "The slings and arrows of outrageous fortune,"; + constexpr int kDataSize = sizeof(kData) - 1; + constexpr int kHalfDataSize = kDataSize / 2; + constexpr int kQuarterDataSize = kHalfDataSize / 2; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Read a quarter of the data from the infile which should update the file + // offset, we don't actually care about the data so it goes into the garbage. + char garbage[kQuarterDataSize]; + ASSERT_THAT(read(inf.get(), &garbage, kQuarterDataSize), + SyscallSucceedsWithValue(kQuarterDataSize)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + + // Close out_fd to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kHalfDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize), + absl::string_view(actual, bytes_sent)); + + // Verify that the input file offset has been updated + ASSERT_THAT(read(inf.get(), &actual, kQuarterDataSize), + SyscallSucceedsWithValue(kQuarterDataSize)); + + EXPECT_EQ( + absl::string_view(kData + kDataSize - kQuarterDataSize, kQuarterDataSize), + absl::string_view(actual, kQuarterDataSize)); +} + +TEST(SendFileTest, SendAndUpdateGivenOffset) { + // Create temp files. + // Test input string length must be >= 4 AND divisible by 4. + constexpr char kData[] = "Or to take Arms against a Sea of troubles,"; + constexpr int kDataSize = sizeof(kData) + 1; + constexpr int kHalfDataSize = kDataSize / 2; + constexpr int kQuarterDataSize = kHalfDataSize / 2; + constexpr int kThreeFourthsDataSize = 3 * kDataSize / 4; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Create offset for sending. + off_t offset = kQuarterDataSize; + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), &offset, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + + // Close out_fd to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kHalfDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize), + absl::string_view(actual, bytes_sent)); + + // Verify that the input file offset has NOT been updated. + ASSERT_THAT(read(inf.get(), &actual, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ(absl::string_view(kData, kHalfDataSize), + absl::string_view(actual, kHalfDataSize)); + + // Verify that the offset pointer has been updated. + EXPECT_EQ(offset, kThreeFourthsDataSize); +} + +TEST(SendFileTest, DoNotSendfileIfOutfileIsAppendOnly) { + // Create temp files. + constexpr char kData[] = "And by opposing end them: to die, to sleep"; + constexpr int kDataSize = sizeof(kData) - 1; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as append only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY | O_APPEND)); + + // Send data and verify that sendfile returns the correct errno. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SendFileTest, AppendCheckOrdering) { + constexpr char kData[] = "And by opposing end them: to die, to sleep"; + constexpr int kDataSize = sizeof(kData) - 1; + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + + const FileDescriptor read = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + const FileDescriptor write = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY)); + const FileDescriptor append = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_APPEND)); + + // Check that read/write file mode is verified before append. + EXPECT_THAT(sendfile(append.get(), read.get(), nullptr, kDataSize), + SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(sendfile(write.get(), write.get(), nullptr, kDataSize), + SyscallFailsWithErrno(EBADF)); +} + +TEST(SendFileTest, DoNotSendfileIfOutfileIsNotWritable) { + // Create temp files. + constexpr char kData[] = "No more; and by a sleep, to say we end"; + constexpr int kDataSize = sizeof(kData) - 1; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as read only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Send data and verify that sendfile returns the correct errno. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallFailsWithErrno(EBADF)); +} + +TEST(SendFileTest, DoNotSendfileIfInfileIsNotReadable) { + // Create temp files. + constexpr char kData[] = "the heart-ache, and the thousand natural shocks"; + constexpr int kDataSize = sizeof(kData) - 1; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as write only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_WRONLY)); + + // Open the output file as write only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct errno. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallFailsWithErrno(EBADF)); +} + +TEST(SendFileTest, DoNotSendANegativeNumberOfBytes) { + // Create temp files. + constexpr char kData[] = "that Flesh is heir to? 'Tis a consummation"; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct errno. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, -1), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SendFileTest, SendTheCorrectNumberOfBytesEvenIfWeTryToSendTooManyBytes) { + // Create temp files. + constexpr char kData[] = "devoutly to be wished. To die, to sleep,"; + constexpr int kDataSize = sizeof(kData) - 1; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize + 100), + SyscallSucceedsWithValue(kDataSize)); + + // Close outf to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kDataSize)); + EXPECT_EQ(kData, absl::string_view(actual, bytes_sent)); +} + +TEST(SendFileTest, SendToNotARegularFile) { + // Make temp input directory and open as read only. + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY)); + + // Make temp output file and open as write only. + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Receive an error since a directory is not a regular file. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SendFileTest, SendPipeWouldBlock) { + // Create temp file. + constexpr char kData[] = + "The fool doth think he is wise, but the wise man knows himself to be a " + "fool."; + constexpr int kDataSize = sizeof(kData) - 1; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Setup the output named pipe. + int fds[2]; + ASSERT_THAT(pipe2(fds, O_NONBLOCK), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill up the pipe's buffer. + int pipe_size = -1; + ASSERT_THAT(pipe_size = fcntl(wfd.get(), F_GETPIPE_SZ), SyscallSucceeds()); + std::vector<char> buf(2 * pipe_size); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(pipe_size)); + + EXPECT_THAT(sendfile(wfd.get(), inf.get(), nullptr, kDataSize), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST(SendFileTest, SendPipeBlocks) { + // Create temp file. + constexpr char kData[] = + "The fault, dear Brutus, is not in our stars, but in ourselves."; + constexpr int kDataSize = sizeof(kData) - 1; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Setup the output named pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill up the pipe's buffer. + int pipe_size = -1; + ASSERT_THAT(pipe_size = fcntl(wfd.get(), F_GETPIPE_SZ), SyscallSucceeds()); + std::vector<char> buf(pipe_size); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(pipe_size)); + + ScopedThread t([&]() { + absl::SleepFor(absl::Milliseconds(100)); + ASSERT_THAT(read(rfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(pipe_size)); + }); + + EXPECT_THAT(sendfile(wfd.get(), inf.get(), nullptr, kDataSize), + SyscallSucceedsWithValue(kDataSize)); +} + +TEST(SendFileTest, SendToSpecialFile) { + // Create temp file. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + constexpr int kSize = 0x7ff; + ASSERT_THAT(ftruncate(inf.get(), kSize), SyscallSucceeds()); + + auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + + // eventfd can accept a number of bytes which is a multiple of 8. + EXPECT_THAT(sendfile(eventfd.get(), inf.get(), nullptr, 0xfffff), + SyscallSucceedsWithValue(kSize & (~7))); +} + +TEST(SendFileTest, SendFileToPipe) { + // Create temp file. + constexpr char kData[] = "<insert-quote-here>"; + constexpr int kDataSize = sizeof(kData) - 1; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Create a pipe for sending to a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Expect to read up to the given size. + std::vector<char> buf(kDataSize); + ScopedThread t([&]() { + absl::SleepFor(absl::Milliseconds(100)); + ASSERT_THAT(read(rfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kDataSize)); + }); + + // Send with twice the size of the file, which should hit EOF. + EXPECT_THAT(sendfile(wfd.get(), inf.get(), nullptr, kDataSize * 2), + SyscallSucceedsWithValue(kDataSize)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sendfile_socket.cc b/test/syscalls/linux/sendfile_socket.cc new file mode 100644 index 000000000..c101fe9d2 --- /dev/null +++ b/test/syscalls/linux/sendfile_socket.cc @@ -0,0 +1,231 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/sendfile.h> +#include <sys/socket.h> +#include <unistd.h> + +#include <iostream> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class SendFileTest : public ::testing::TestWithParam<int> { + protected: + PosixErrorOr<std::unique_ptr<SocketPair>> Sockets(int type) { + // Bind a server socket. + int family = GetParam(); + switch (family) { + case AF_INET: { + if (type == SOCK_STREAM) { + return SocketPairKind{ + "TCP", AF_INET, type, 0, + TCPAcceptBindSocketPairCreator(AF_INET, type, 0, false)} + .Create(); + } else { + return SocketPairKind{ + "UDP", AF_INET, type, 0, + UDPBidirectionalBindSocketPairCreator(AF_INET, type, 0, false)} + .Create(); + } + } + case AF_UNIX: { + if (type == SOCK_STREAM) { + return SocketPairKind{ + "UNIX", AF_UNIX, type, 0, + FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)} + .Create(); + } else { + return SocketPairKind{ + "UNIX", AF_UNIX, type, 0, + FilesystemBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)} + .Create(); + } + } + default: + return PosixError(EINVAL); + } + } +}; + +// Sends large file to exercise the path that read and writes data multiple +// times, esp. when more data is read than can be written. +TEST_P(SendFileTest, SendMultiple) { + std::vector<char> data(5 * 1024 * 1024); + RandomizeBuffer(data.data(), data.size()); + + // Create temp files. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::string_view(data.data(), data.size()), + TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Create sockets. + auto socks = ASSERT_NO_ERRNO_AND_VALUE(Sockets(SOCK_STREAM)); + + // Thread that reads data from socket and dumps to a file. + ScopedThread th([&] { + FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Read until socket is closed. + char buf[10240]; + for (int cnt = 0;; cnt++) { + int r = RetryEINTR(read)(socks->first_fd(), buf, sizeof(buf)); + // We cannot afford to save on every read() call. + if (cnt % 1000 == 0) { + ASSERT_THAT(r, SyscallSucceeds()); + } else { + const DisableSave ds; + ASSERT_THAT(r, SyscallSucceeds()); + } + if (r == 0) { + // EOF + break; + } + int w = RetryEINTR(write)(outf.get(), buf, r); + // We cannot afford to save on every write() call. + if (cnt % 1010 == 0) { + ASSERT_THAT(w, SyscallSucceedsWithValue(r)); + } else { + const DisableSave ds; + ASSERT_THAT(w, SyscallSucceedsWithValue(r)); + } + } + }); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + int cnt = 0; + for (size_t sent = 0; sent < data.size(); cnt++) { + const size_t remain = data.size() - sent; + std::cout << "sendfile, size=" << data.size() << ", sent=" << sent + << ", remain=" << remain << std::endl; + + // Send data and verify that sendfile returns the correct value. + int res = sendfile(socks->second_fd(), inf.get(), nullptr, remain); + // We cannot afford to save on every sendfile() call. + if (cnt % 120 == 0) { + MaybeSave(); + } + if (res == 0) { + // EOF + break; + } + if (res > 0) { + sent += res; + } else { + ASSERT_TRUE(errno == EINTR || errno == EAGAIN) << "errno=" << errno; + } + } + + // Close socket to stop thread. + close(socks->release_second_fd()); + th.Join(); + + // Verify that the output file has the correct data. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + std::vector<char> actual(data.size(), '\0'); + ASSERT_THAT(RetryEINTR(read)(outf.get(), actual.data(), actual.size()), + SyscallSucceedsWithValue(actual.size())); + ASSERT_EQ(memcmp(data.data(), actual.data(), data.size()), 0); +} + +TEST_P(SendFileTest, Shutdown) { + // Create a socket. + auto socks = ASSERT_NO_ERRNO_AND_VALUE(Sockets(SOCK_STREAM)); + + // If this is a TCP socket, then turn off linger. + if (GetParam() == AF_INET) { + struct linger sl; + sl.l_onoff = 1; + sl.l_linger = 0; + ASSERT_THAT( + setsockopt(socks->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)), + SyscallSucceeds()); + } + + // Create a 1m file with random data. + std::vector<char> data(1024 * 1024); + RandomizeBuffer(data.data(), data.size()); + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::string_view(data.data(), data.size()), + TempPath::kDefaultFileMode)); + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Read some data, then shutdown the socket. We don't actually care about + // checking the contents (other tests do that), so we just re-use the same + // buffer as above. + ScopedThread t([&]() { + size_t done = 0; + while (done < data.size()) { + int n = RetryEINTR(read)(socks->first_fd(), data.data(), data.size()); + ASSERT_THAT(n, SyscallSucceeds()); + done += n; + } + // Close the server side socket. + close(socks->release_first_fd()); + }); + + // Continuously stream from the file to the socket. Note we do not assert + // that a specific amount of data has been written at any time, just that some + // data is written. Eventually, we should get a connection reset error. + while (1) { + off_t offset = 0; // Always read from the start. + int n = sendfile(socks->second_fd(), inf.get(), &offset, data.size()); + EXPECT_THAT(n, AnyOf(SyscallFailsWithErrno(ECONNRESET), + SyscallFailsWithErrno(EPIPE), SyscallSucceeds())); + if (n <= 0) { + break; + } + } +} + +TEST_P(SendFileTest, SendpageFromEmptyFileToUDP) { + auto socks = ASSERT_NO_ERRNO_AND_VALUE(Sockets(SOCK_DGRAM)); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + // The value to the count argument has to be so that it is impossible to + // allocate a buffer of this size. In Linux, sendfile transfer at most + // 0x7ffff000 (MAX_RW_COUNT) bytes. + EXPECT_THAT(sendfile(socks->first_fd(), fd.get(), 0x0, 0x8000000000004), + SyscallSucceedsWithValue(0)); +} + +INSTANTIATE_TEST_SUITE_P(AddressFamily, SendFileTest, + ::testing::Values(AF_UNIX, AF_INET)); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc new file mode 100644 index 000000000..c7fdbb924 --- /dev/null +++ b/test/syscalls/linux/shm.cc @@ -0,0 +1,508 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/ipc.h> +#include <sys/mman.h> +#include <sys/shm.h> +#include <sys/types.h> + +#include "absl/time/clock.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using ::testing::_; + +const uint64_t kAllocSize = kPageSize * 128ULL; + +PosixErrorOr<char*> Shmat(int shmid, const void* shmaddr, int shmflg) { + const intptr_t addr = + reinterpret_cast<intptr_t>(shmat(shmid, shmaddr, shmflg)); + if (addr == -1) { + return PosixError(errno, "shmat() failed"); + } + return reinterpret_cast<char*>(addr); +} + +PosixError Shmdt(const char* shmaddr) { + const int ret = shmdt(shmaddr); + if (ret == -1) { + return PosixError(errno, "shmdt() failed"); + } + return NoError(); +} + +template <typename T> +PosixErrorOr<int> Shmctl(int shmid, int cmd, T* buf) { + int ret = shmctl(shmid, cmd, reinterpret_cast<struct shmid_ds*>(buf)); + if (ret == -1) { + return PosixError(errno, "shmctl() failed"); + } + return ret; +} + +// ShmSegment is a RAII object for automatically cleaning up shm segments. +class ShmSegment { + public: + explicit ShmSegment(int id) : id_(id) {} + + ~ShmSegment() { + if (id_ >= 0) { + EXPECT_NO_ERRNO(Rmid()); + id_ = -1; + } + } + + ShmSegment(ShmSegment&& other) : id_(other.release()) {} + + ShmSegment& operator=(ShmSegment&& other) { + id_ = other.release(); + return *this; + } + + ShmSegment(ShmSegment const& other) = delete; + ShmSegment& operator=(ShmSegment const& other) = delete; + + int id() const { return id_; } + + int release() { + int id = id_; + id_ = -1; + return id; + } + + PosixErrorOr<int> Rmid() { + RETURN_IF_ERRNO(Shmctl<void>(id_, IPC_RMID, nullptr)); + return release(); + } + + private: + int id_ = -1; +}; + +PosixErrorOr<int> ShmgetRaw(key_t key, size_t size, int shmflg) { + int id = shmget(key, size, shmflg); + if (id == -1) { + return PosixError(errno, "shmget() failed"); + } + return id; +} + +PosixErrorOr<ShmSegment> Shmget(key_t key, size_t size, int shmflg) { + ASSIGN_OR_RETURN_ERRNO(int id, ShmgetRaw(key, size, shmflg)); + return ShmSegment(id); +} + +TEST(ShmTest, AttachDetach) { + const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_segsz, kAllocSize); + EXPECT_EQ(attr.shm_nattch, 0); + + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_nattch, 1); + + const char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_nattch, 2); + + ASSERT_NO_ERRNO(Shmdt(addr)); + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_nattch, 1); + + ASSERT_NO_ERRNO(Shmdt(addr2)); + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_nattch, 0); +} + +TEST(ShmTest, LookupByKey) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + const ShmSegment shm = + ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777)); + const int id2 = ASSERT_NO_ERRNO_AND_VALUE(ShmgetRaw(key, kAllocSize, 0777)); + EXPECT_EQ(shm.id(), id2); +} + +TEST(ShmTest, DetachedSegmentsPersist) { + const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + addr[0] = 'x'; + ASSERT_NO_ERRNO(Shmdt(addr)); + + // We should be able to re-attach to the same segment and get our data back. + addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + EXPECT_EQ(addr[0], 'x'); + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +TEST(ShmTest, MultipleDetachFails) { + const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + ASSERT_NO_ERRNO(Shmdt(addr)); + EXPECT_THAT(Shmdt(addr), PosixErrorIs(EINVAL, _)); +} + +TEST(ShmTest, IpcStat) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + + const time_t start = time(nullptr); + + const ShmSegment shm = + ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777)); + + const uid_t uid = getuid(); + const gid_t gid = getgid(); + const pid_t pid = getpid(); + + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + + EXPECT_EQ(attr.shm_perm.__key, key); + EXPECT_EQ(attr.shm_perm.uid, uid); + EXPECT_EQ(attr.shm_perm.gid, gid); + EXPECT_EQ(attr.shm_perm.cuid, uid); + EXPECT_EQ(attr.shm_perm.cgid, gid); + EXPECT_EQ(attr.shm_perm.mode, 0777); + + EXPECT_EQ(attr.shm_segsz, kAllocSize); + + EXPECT_EQ(attr.shm_atime, 0); + EXPECT_EQ(attr.shm_dtime, 0); + + // Change time is set on creation. + EXPECT_GE(attr.shm_ctime, start); + + EXPECT_EQ(attr.shm_cpid, pid); + EXPECT_EQ(attr.shm_lpid, 0); + + EXPECT_EQ(attr.shm_nattch, 0); + + // The timestamps only have a resolution of seconds; slow down so we actually + // see the timestamps change. + absl::SleepFor(absl::Seconds(1)); + const time_t pre_attach = time(nullptr); + + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + + EXPECT_GE(attr.shm_atime, pre_attach); + EXPECT_EQ(attr.shm_dtime, 0); + EXPECT_LT(attr.shm_ctime, pre_attach); + EXPECT_EQ(attr.shm_lpid, pid); + EXPECT_EQ(attr.shm_nattch, 1); + + absl::SleepFor(absl::Seconds(1)); + const time_t pre_detach = time(nullptr); + + ASSERT_NO_ERRNO(Shmdt(addr)); + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + + EXPECT_LT(attr.shm_atime, pre_detach); + EXPECT_GE(attr.shm_dtime, pre_detach); + EXPECT_LT(attr.shm_ctime, pre_detach); + EXPECT_EQ(attr.shm_lpid, pid); + EXPECT_EQ(attr.shm_nattch, 0); +} + +TEST(ShmTest, ShmStat) { + // This test relies on the segment we create to be the first one on the + // system, causing it to occupy slot 1. We can't reasonably expect this on a + // general Linux host. + SKIP_IF(!IsRunningOnGvisor()); + + const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(1, SHM_STAT, &attr)); + // This does the same thing as IPC_STAT, so only test that the syscall + // succeeds here. +} + +TEST(ShmTest, IpcInfo) { + struct shminfo info; + ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info)); + + EXPECT_EQ(info.shmmin, 1); // This is always 1, according to the man page. + EXPECT_GT(info.shmmax, info.shmmin); + EXPECT_GT(info.shmmni, 0); + EXPECT_GT(info.shmseg, 0); + EXPECT_GT(info.shmall, 0); +} + +TEST(ShmTest, ShmInfo) { + struct shm_info info; + + // We generally can't know what other processes on a linux machine + // does with shared memory segments, so we can't test specific + // numbers on Linux. When running under gvisor, we're guaranteed to + // be the only ones using shm, so we can easily verify machine-wide + // numbers. + if (IsRunningOnGvisor()) { + ASSERT_NO_ERRNO(Shmctl(0, SHM_INFO, &info)); + EXPECT_EQ(info.used_ids, 0); + EXPECT_EQ(info.shm_tot, 0); + EXPECT_EQ(info.shm_rss, 0); + EXPECT_EQ(info.shm_swp, 0); + } + + const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + + ASSERT_NO_ERRNO(Shmctl(1, SHM_INFO, &info)); + + if (IsRunningOnGvisor()) { + ASSERT_NO_ERRNO(Shmctl(shm.id(), SHM_INFO, &info)); + EXPECT_EQ(info.used_ids, 1); + EXPECT_EQ(info.shm_tot, kAllocSize / kPageSize); + EXPECT_EQ(info.shm_rss, kAllocSize / kPageSize); + EXPECT_EQ(info.shm_swp, 0); // Gvisor currently never swaps. + } + + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +TEST(ShmTest, ShmCtlSet) { + const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + ASSERT_EQ(attr.shm_perm.mode, 0777); + + attr.shm_perm.mode = 0766; + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_SET, &attr)); + + ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr)); + ASSERT_EQ(attr.shm_perm.mode, 0766); + + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +TEST(ShmTest, RemovedSegmentsAreMarkedDeleted) { + ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + const int id = ASSERT_NO_ERRNO_AND_VALUE(shm.Rmid()); + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + EXPECT_NE(attr.shm_perm.mode & SHM_DEST, 0); + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +TEST(ShmTest, RemovedSegmentsAreDestroyed) { + ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + + const uint64_t alloc_pages = kAllocSize / kPageSize; + + struct shm_info info; + ASSERT_NO_ERRNO(Shmctl(0 /*ignored*/, SHM_INFO, &info)); + const uint64_t before = info.shm_tot; + + ASSERT_NO_ERRNO(shm.Rmid()); + ASSERT_NO_ERRNO(Shmdt(addr)); + + ASSERT_NO_ERRNO(Shmctl(0 /*ignored*/, SHM_INFO, &info)); + if (IsRunningOnGvisor()) { + // No guarantees on system-wide shm memory usage on a generic linux host. + const uint64_t after = info.shm_tot; + EXPECT_EQ(after, before - alloc_pages); + } +} + +TEST(ShmTest, AllowsAttachToRemovedSegmentWithRefs) { + ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + const int id = ASSERT_NO_ERRNO_AND_VALUE(shm.Rmid()); + const char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + ASSERT_NO_ERRNO(Shmdt(addr)); + ASSERT_NO_ERRNO(Shmdt(addr2)); +} + +TEST(ShmTest, RemovedSegmentsAreNotDiscoverable) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + ShmSegment shm = + ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777)); + ASSERT_NO_ERRNO(shm.Rmid()); + EXPECT_THAT(Shmget(key, kAllocSize, 0777), PosixErrorIs(ENOENT, _)); +} + +TEST(ShmDeathTest, ReadonlySegment) { + SetupGvisorDeathTest(); + const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, SHM_RDONLY)); + // Reading succeeds. + static_cast<void>(addr[0]); + // Writing fails. + EXPECT_EXIT(addr[0] = 'x', ::testing::KilledBySignal(SIGSEGV), ""); +} + +TEST(ShmDeathTest, SegmentNotAccessibleAfterDetach) { + // This test is susceptible to races with concurrent mmaps running in parallel + // gtest threads since the test relies on the address freed during a shm + // segment destruction to remain unused. We run the test body in a forked + // child to guarantee a single-threaded context to avoid this. + + SetupGvisorDeathTest(); + + const auto rest = [&] { + ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + + // Mark the segment as destroyed so it's automatically cleaned up when we + // crash below. We can't rely on the standard cleanup since the destructor + // will not run after the SIGSEGV. Note that this doesn't destroy the + // segment immediately since we're still attached to it. + ASSERT_NO_ERRNO(shm.Rmid()); + + addr[0] = 'x'; + ASSERT_NO_ERRNO(Shmdt(addr)); + + // This access should cause a SIGSEGV. + addr[0] = 'x'; + }; + + EXPECT_THAT(InForkedProcess(rest), + IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV))); +} + +TEST(ShmTest, RequestingSegmentSmallerThanSHMMINFails) { + struct shminfo info; + ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info)); + const uint64_t size = info.shmmin - 1; + EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777), + PosixErrorIs(EINVAL, _)); +} + +TEST(ShmTest, RequestingSegmentLargerThanSHMMAXFails) { + struct shminfo info; + ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info)); + const uint64_t size = info.shmmax + kPageSize; + EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777), + PosixErrorIs(EINVAL, _)); +} + +TEST(ShmTest, RequestingUnalignedSizeSucceeds) { + EXPECT_NO_ERRNO(Shmget(IPC_PRIVATE, 4097, IPC_CREAT | 0777)); +} + +TEST(ShmTest, RequestingDuplicateCreationFails) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(key, kAllocSize, IPC_CREAT | IPC_EXCL | 0777)); + EXPECT_THAT(Shmget(key, kAllocSize, IPC_CREAT | IPC_EXCL | 0777), + PosixErrorIs(EEXIST, _)); +} + +TEST(ShmTest, NonExistentSegmentsAreNotFound) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + // Do not request creation. + EXPECT_THAT(Shmget(key, kAllocSize, 0777), PosixErrorIs(ENOENT, _)); +} + +TEST(ShmTest, SegmentsSizeFixedOnCreation) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + + // Base segment. + const ShmSegment shm = + ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777)); + + // Ask for the same segment at half size. This succeeds. + const int id2 = + ASSERT_NO_ERRNO_AND_VALUE(ShmgetRaw(key, kAllocSize / 2, 0777)); + + // Ask for the same segment at double size. + EXPECT_THAT(Shmget(key, kAllocSize * 2, 0777), PosixErrorIs(EINVAL, _)); + + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id2, nullptr, 0)); + + // We have 2 different maps... + EXPECT_NE(addr, addr2); + + // ... And both maps are kAllocSize bytes; despite asking for a half-sized + // segment for the second map. + addr[kAllocSize - 1] = 'x'; + addr2[kAllocSize - 1] = 'x'; + + ASSERT_NO_ERRNO(Shmdt(addr)); + ASSERT_NO_ERRNO(Shmdt(addr2)); +} + +TEST(ShmTest, PartialUnmap) { + const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + EXPECT_THAT(munmap(addr + (kAllocSize / 4), kAllocSize / 2), + SyscallSucceeds()); + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +// Check that sentry does not panic when asked for a zero-length private shm +// segment. Regression test for b/110694797. +TEST(ShmTest, GracefullyFailOnZeroLenSegmentCreation) { + EXPECT_THAT(Shmget(IPC_PRIVATE, 0, 0), PosixErrorIs(EINVAL, _)); +} + +TEST(ShmTest, NoDestructionOfAttachedSegmentWithMultipleRmid) { + ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); + + // There should be 2 refs to the segment from the 2 attachments, and a single + // self-reference. Mark the segment as destroyed more than 3 times through + // shmctl(RMID). If there's a bug with the ref counting, this should cause the + // count to drop to zero. + int id = shm.release(); + for (int i = 0; i < 6; ++i) { + ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr)); + } + + // Segment should remain accessible. + addr[0] = 'x'; + ASSERT_NO_ERRNO(Shmdt(addr)); + + // Segment should remain accessible even after one of the two attachments are + // detached. + addr2[0] = 'x'; + ASSERT_NO_ERRNO(Shmdt(addr2)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sigaction.cc b/test/syscalls/linux/sigaction.cc new file mode 100644 index 000000000..9d9dd57a8 --- /dev/null +++ b/test/syscalls/linux/sigaction.cc @@ -0,0 +1,79 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/syscall.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SigactionTest, GetLessThanOrEqualToZeroFails) { + struct sigaction act = {}; + ASSERT_THAT(sigaction(-1, nullptr, &act), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(sigaction(0, nullptr, &act), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, SetLessThanOrEqualToZeroFails) { + struct sigaction act = {}; + ASSERT_THAT(sigaction(0, &act, nullptr), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(sigaction(0, &act, nullptr), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, GetGreaterThanMaxFails) { + struct sigaction act = {}; + ASSERT_THAT(sigaction(SIGRTMAX + 1, nullptr, &act), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, SetGreaterThanMaxFails) { + struct sigaction act = {}; + ASSERT_THAT(sigaction(SIGRTMAX + 1, &act, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, SetSigkillFails) { + struct sigaction act = {}; + ASSERT_THAT(sigaction(SIGKILL, nullptr, &act), SyscallSucceeds()); + ASSERT_THAT(sigaction(SIGKILL, &act, nullptr), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, SetSigstopFails) { + struct sigaction act = {}; + ASSERT_THAT(sigaction(SIGSTOP, nullptr, &act), SyscallSucceeds()); + ASSERT_THAT(sigaction(SIGSTOP, &act, nullptr), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, BadSigsetFails) { + constexpr size_t kWrongSigSetSize = 43; + + struct sigaction act = {}; + + // The syscall itself (rather than the libc wrapper) takes the sigset_t size. + ASSERT_THAT( + syscall(SYS_rt_sigaction, SIGTERM, nullptr, &act, kWrongSigSetSize), + SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT( + syscall(SYS_rt_sigaction, SIGTERM, &act, nullptr, kWrongSigSetSize), + SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sigaltstack.cc b/test/syscalls/linux/sigaltstack.cc new file mode 100644 index 000000000..24e7c4960 --- /dev/null +++ b/test/syscalls/linux/sigaltstack.cc @@ -0,0 +1,268 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include <functional> +#include <vector> + +#include "gtest/gtest.h" +#include "test/util/cleanup.h" +#include "test/util/fs_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<Cleanup> ScopedSigaltstack(stack_t const& stack) { + stack_t old_stack; + int rc = sigaltstack(&stack, &old_stack); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "sigaltstack failed"); + } + return Cleanup([old_stack] { + EXPECT_THAT(sigaltstack(&old_stack, nullptr), SyscallSucceeds()); + }); +} + +volatile bool got_signal = false; +volatile int sigaltstack_errno = 0; +volatile int ss_flags = 0; + +void sigaltstack_handler(int sig, siginfo_t* siginfo, void* arg) { + got_signal = true; + + stack_t stack; + int ret = sigaltstack(nullptr, &stack); + MaybeSave(); + if (ret < 0) { + sigaltstack_errno = errno; + return; + } + ss_flags = stack.ss_flags; +} + +TEST(SigaltstackTest, Success) { + std::vector<char> stack_mem(SIGSTKSZ); + stack_t stack = {}; + stack.ss_sp = stack_mem.data(); + stack.ss_size = stack_mem.size(); + auto const cleanup_sigstack = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack)); + + struct sigaction sa = {}; + sa.sa_sigaction = sigaltstack_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + auto const cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa)); + + // Send signal to this thread, as sigaltstack is per-thread. + EXPECT_THAT(tgkill(getpid(), gettid(), SIGUSR1), SyscallSucceeds()); + + EXPECT_TRUE(got_signal); + EXPECT_EQ(sigaltstack_errno, 0); + EXPECT_NE(0, ss_flags & SS_ONSTACK); +} + +TEST(SigaltstackTest, ResetByExecve) { + std::vector<char> stack_mem(SIGSTKSZ); + stack_t stack = {}; + stack.ss_sp = stack_mem.data(); + stack.ss_size = stack_mem.size(); + auto const cleanup_sigstack = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack)); + + std::string full_path = RunfilePath("test/syscalls/linux/sigaltstack_check"); + + pid_t child_pid = -1; + int execve_errno = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(full_path, {"sigaltstack_check"}, {}, nullptr, &child_pid, + &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + +volatile bool badhandler_on_sigaltstack = true; // Set by the handler. +char* volatile badhandler_low_water_mark = nullptr; // Set by the handler. +volatile uint8_t badhandler_recursive_faults = 0; // Consumed by the handler. + +void badhandler(int sig, siginfo_t* siginfo, void* arg) { + char stack_var = 0; + char* current_ss = &stack_var; + + stack_t stack; + int ret = sigaltstack(nullptr, &stack); + if (ret < 0 || (stack.ss_flags & SS_ONSTACK) != SS_ONSTACK) { + // We should always be marked as being on the stack. Don't allow this to hit + // the bottom if this is ever not true (the main test will fail as a + // result, but we still need to unwind the recursive faults). + badhandler_on_sigaltstack = false; + } + if (current_ss < badhandler_low_water_mark) { + // Record the low point for the signal stack. We never expected this to be + // before stack bottom, but this is asserted in the actual test. + badhandler_low_water_mark = current_ss; + } + if (badhandler_recursive_faults > 0) { + badhandler_recursive_faults--; + Fault(); + } + FixupFault(reinterpret_cast<ucontext_t*>(arg)); +} + +TEST(SigaltstackTest, WalksOffBottom) { + // This test marks the upper half of the stack_mem array as the signal stack. + // It asserts that when a fault occurs in the handler (already on the signal + // stack), we eventually continue to fault our way off the stack. We should + // not revert to the top of the signal stack when we fall off the bottom and + // the signal stack should remain "in use". When we fall off the signal stack, + // we should have an unconditional signal delivered and not start using the + // first part of the stack_mem array. + std::vector<char> stack_mem(SIGSTKSZ * 2); + stack_t stack = {}; + stack.ss_sp = stack_mem.data() + SIGSTKSZ; // See above: upper half. + stack.ss_size = SIGSTKSZ; // Only one half the array. + auto const cleanup_sigstack = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack)); + + // Setup the handler: this must be for SIGSEGV, and it must allow proper + // nesting (no signal mask, no defer) so that we can trigger multiple times. + // + // When we walk off the bottom of the signal stack and force signal delivery + // of a SIGSEGV, the handler will revert to the default behavior (kill). + struct sigaction sa = {}; + sa.sa_sigaction = badhandler; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_NODEFER; + auto const cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa)); + + // Trigger a single fault. + badhandler_low_water_mark = + static_cast<char*>(stack.ss_sp) + SIGSTKSZ; // Expected top. + badhandler_recursive_faults = 0; // Disable refault. + Fault(); + EXPECT_TRUE(badhandler_on_sigaltstack); + EXPECT_THAT(sigaltstack(nullptr, &stack), SyscallSucceeds()); + EXPECT_EQ(stack.ss_flags & SS_ONSTACK, 0); + EXPECT_LT(badhandler_low_water_mark, + reinterpret_cast<char*>(stack.ss_sp) + 2 * SIGSTKSZ); + EXPECT_GT(badhandler_low_water_mark, reinterpret_cast<char*>(stack.ss_sp)); + + // Trigger two faults. + char* prev_low_water_mark = badhandler_low_water_mark; // Previous top. + badhandler_recursive_faults = 1; // One refault. + Fault(); + ASSERT_TRUE(badhandler_on_sigaltstack); + EXPECT_THAT(sigaltstack(nullptr, &stack), SyscallSucceeds()); + EXPECT_EQ(stack.ss_flags & SS_ONSTACK, 0); + EXPECT_LT(badhandler_low_water_mark, prev_low_water_mark); + EXPECT_GT(badhandler_low_water_mark, reinterpret_cast<char*>(stack.ss_sp)); + + // Calculate the stack growth for a fault, and set the recursive faults to + // ensure that the signal handler stack required exceeds our marked stack area + // by a minimal amount. It should remain in the valid stack_mem area so that + // we can test the signal is forced merely by going out of the signal stack + // bounds, not by a genuine fault. + uintptr_t frame_size = + static_cast<uintptr_t>(prev_low_water_mark - badhandler_low_water_mark); + badhandler_recursive_faults = (SIGSTKSZ + frame_size) / frame_size; + EXPECT_EXIT(Fault(), ::testing::KilledBySignal(SIGSEGV), ""); +} + +volatile int setonstack_retval = 0; // Set by the handler. +volatile int setonstack_errno = 0; // Set by the handler. + +void setonstack(int sig, siginfo_t* siginfo, void* arg) { + char stack_mem[SIGSTKSZ]; + stack_t stack = {}; + stack.ss_sp = &stack_mem[0]; + stack.ss_size = SIGSTKSZ; + setonstack_retval = sigaltstack(&stack, nullptr); + setonstack_errno = errno; + FixupFault(reinterpret_cast<ucontext_t*>(arg)); +} + +TEST(SigaltstackTest, SetWhileOnStack) { + // Reserve twice as much stack here, since the handler will allocate a vector + // of size SIGTKSZ and attempt to set the sigaltstack to that value. + std::vector<char> stack_mem(2 * SIGSTKSZ); + stack_t stack = {}; + stack.ss_sp = stack_mem.data(); + stack.ss_size = stack_mem.size(); + auto const cleanup_sigstack = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack)); + + // See above. + struct sigaction sa = {}; + sa.sa_sigaction = setonstack; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + auto const cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa)); + + // Trigger a fault. + Fault(); + + // The set should have failed. + EXPECT_EQ(setonstack_retval, -1); + EXPECT_EQ(setonstack_errno, EPERM); +} + +TEST(SigaltstackTest, SetCurrentStack) { + // This is executed as an exit test because once the signal stack is set to + // the local stack, there's no good way to unwind. We don't want to taint the + // test of any other tests that might run within this process. + EXPECT_EXIT( + { + char stack_value = 0; + stack_t stack = {}; + stack.ss_sp = &stack_value - kPageSize; // Lower than current level. + stack.ss_size = 2 * kPageSize; // => &stack_value +/- kPageSize. + TEST_CHECK(sigaltstack(&stack, nullptr) == 0); + TEST_CHECK(sigaltstack(nullptr, &stack) == 0); + TEST_CHECK((stack.ss_flags & SS_ONSTACK) != 0); + + // Should not be able to change the stack (even no-op). + TEST_CHECK(sigaltstack(&stack, nullptr) == -1 && errno == EPERM); + + // Should not be able to disable the stack. + stack.ss_flags = SS_DISABLE; + TEST_CHECK(sigaltstack(&stack, nullptr) == -1 && errno == EPERM); + exit(0); + }, + ::testing::ExitedWithCode(0), ""); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sigaltstack_check.cc b/test/syscalls/linux/sigaltstack_check.cc new file mode 100644 index 000000000..5ac1b661d --- /dev/null +++ b/test/syscalls/linux/sigaltstack_check.cc @@ -0,0 +1,33 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Checks that there is no alternate signal stack by default. +// +// Used by a test in sigaltstack.cc. +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "test/util/logging.h" + +int main(int /* argc */, char** /* argv */) { + stack_t stack; + TEST_CHECK(sigaltstack(nullptr, &stack) >= 0); + TEST_CHECK(stack.ss_flags == SS_DISABLE); + TEST_CHECK(stack.ss_sp == 0); + TEST_CHECK(stack.ss_size == 0); + return 0; +} diff --git a/test/syscalls/linux/sigiret.cc b/test/syscalls/linux/sigiret.cc new file mode 100644 index 000000000..6227774a4 --- /dev/null +++ b/test/syscalls/linux/sigiret.cc @@ -0,0 +1,136 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/types.h> +#include <sys/ucontext.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr uint64_t kOrigRcx = 0xdeadbeeffacefeed; +constexpr uint64_t kOrigR11 = 0xfacefeedbaad1dea; + +volatile int gotvtalrm, ready; + +void sigvtalrm(int sig, siginfo_t* siginfo, void* _uc) { + ucontext_t* uc = reinterpret_cast<ucontext_t*>(_uc); + + // Verify that: + // - test is in the busy-wait loop waiting for signal. + // - %rcx and %r11 values in mcontext_t match kOrigRcx and kOrigR11. + if (ready && + static_cast<uint64_t>(uc->uc_mcontext.gregs[REG_RCX]) == kOrigRcx && + static_cast<uint64_t>(uc->uc_mcontext.gregs[REG_R11]) == kOrigR11) { + // Modify the values %rcx and %r11 in the ucontext. These are the + // values seen by the application after the signal handler returns. + uc->uc_mcontext.gregs[REG_RCX] = ~kOrigRcx; + uc->uc_mcontext.gregs[REG_R11] = ~kOrigR11; + gotvtalrm = 1; + } +} + +TEST(SigIretTest, CheckRcxR11) { + // Setup signal handler for SIGVTALRM. + struct sigaction sa = {}; + sigfillset(&sa.sa_mask); + sa.sa_sigaction = sigvtalrm; + sa.sa_flags = SA_SIGINFO; + auto const action_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGVTALRM, sa)); + + auto const mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGVTALRM)); + + // Setup itimer to fire after 500 msecs. + struct itimerval itimer = {}; + itimer.it_value.tv_usec = 500 * 1000; // 500 msecs. + auto const timer_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_VIRTUAL, itimer)); + + // Initialize %rcx and %r11 and spin until the signal handler returns. + uint64_t rcx = kOrigRcx; + uint64_t r11 = kOrigR11; + asm volatile( + "movq %[rcx], %%rcx;" // %rcx = rcx + "movq %[r11], %%r11;" // %r11 = r11 + "movl $1, %[ready];" // ready = 1 + "1: pause; cmpl $0, %[gotvtalrm]; je 1b;" // while (!gotvtalrm); + "movq %%rcx, %[rcx];" // rcx = %rcx + "movq %%r11, %[r11];" // r11 = %r11 + : [ ready ] "=m"(ready), [ rcx ] "+m"(rcx), [ r11 ] "+m"(r11) + : [ gotvtalrm ] "m"(gotvtalrm) + : "cc", "memory", "rcx", "r11"); + + // If sigreturn(2) returns via 'sysret' then %rcx and %r11 will be + // clobbered and set to 'ptregs->rip' and 'ptregs->rflags' respectively. + // + // The following check verifies that %rcx and %r11 were not clobbered + // when returning from the signal handler (via sigreturn(2)). + EXPECT_EQ(rcx, ~kOrigRcx); + EXPECT_EQ(r11, ~kOrigR11); +} + +constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000; + +// Test that a non-canonical signal handler faults as expected. +TEST(SigIretTest, BadHandler) { + struct sigaction sa = {}; + sa.sa_sigaction = + reinterpret_cast<void (*)(int, siginfo_t*, void*)>(kNonCanonicalRip); + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa)); + + pid_t pid = fork(); + if (pid == 0) { + // Child, wait for signal. + while (1) { + pause(); + } + } + ASSERT_THAT(pid, SyscallSucceeds()); + + EXPECT_THAT(kill(pid, SIGUSR1), SyscallSucceeds()); + + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) + << "status = " << status; +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // SigIretTest.CheckRcxR11 depends on delivering SIGVTALRM to the main thread. + // Block SIGVTALRM so that any other threads created by TestInit will also + // have SIGVTALRM blocked. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGVTALRM); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/signalfd.cc b/test/syscalls/linux/signalfd.cc new file mode 100644 index 000000000..389e5fca2 --- /dev/null +++ b/test/syscalls/linux/signalfd.cc @@ -0,0 +1,373 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <poll.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <sys/signalfd.h> +#include <unistd.h> + +#include <functional> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/synchronization/mutex.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +using ::testing::KilledBySignal; + +namespace gvisor { +namespace testing { + +namespace { + +constexpr int kSigno = SIGUSR1; +constexpr int kSignoMax = 64; // SIGRTMAX +constexpr int kSignoAlt = SIGUSR2; + +// Returns a new signalfd. +inline PosixErrorOr<FileDescriptor> NewSignalFD(sigset_t* mask, int flags = 0) { + int fd = signalfd(-1, mask, flags); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, "signalfd"); + } + return FileDescriptor(fd); +} + +class SignalfdTest : public ::testing::TestWithParam<int> {}; + +TEST_P(SignalfdTest, Basic) { + int signo = GetParam(); + // Create the signalfd. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, signo); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0)); + + // Deliver the blocked signal. + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); + + // We should now read the signal. + struct signalfd_siginfo rbuf; + ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(rbuf.ssi_signo, signo); +} + +TEST_P(SignalfdTest, MaskWorks) { + int signo = GetParam(); + // Create two signalfds with different masks. + sigset_t mask1, mask2; + sigemptyset(&mask1); + sigemptyset(&mask2); + sigaddset(&mask1, signo); + sigaddset(&mask2, kSignoAlt); + FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask1, 0)); + FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask2, 0)); + + // Deliver the two signals. + const auto scoped_sigmask1 = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); + const auto scoped_sigmask2 = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, kSignoAlt)); + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); + ASSERT_THAT(tgkill(getpid(), gettid(), kSignoAlt), SyscallSucceeds()); + + // We should see the signals on the appropriate signalfds. + // + // We read in the opposite order as the signals deliver above, to ensure that + // we don't happen to read the correct signal from the correct signalfd. + struct signalfd_siginfo rbuf1, rbuf2; + ASSERT_THAT(read(fd2.get(), &rbuf2, sizeof(rbuf2)), + SyscallSucceedsWithValue(sizeof(rbuf2))); + EXPECT_EQ(rbuf2.ssi_signo, kSignoAlt); + ASSERT_THAT(read(fd1.get(), &rbuf1, sizeof(rbuf1)), + SyscallSucceedsWithValue(sizeof(rbuf1))); + EXPECT_EQ(rbuf1.ssi_signo, signo); +} + +TEST(Signalfd, Cloexec) { + // Exec tests confirm that O_CLOEXEC has the intended effect. We just create a + // signalfd with the appropriate flag here and assert that the FD has it set. + sigset_t mask; + sigemptyset(&mask); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_CLOEXEC)); + EXPECT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); +} + +TEST_P(SignalfdTest, Blocking) { + int signo = GetParam(); + // Create the signalfd in blocking mode. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, signo); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0)); + + // Shared tid variable. + absl::Mutex mu; + bool has_tid; + pid_t tid; + + // Start a thread reading. + ScopedThread t([&] { + // Copy the tid and notify the caller. + { + absl::MutexLock ml(&mu); + tid = gettid(); + has_tid = true; + } + + // Read the signal from the signalfd. + struct signalfd_siginfo rbuf; + ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(rbuf.ssi_signo, signo); + }); + + // Wait until blocked. + absl::MutexLock ml(&mu); + mu.Await(absl::Condition(&has_tid)); + + // Deliver the signal to either the waiting thread, or + // to this thread. N.B. this is a bug in the core gVisor + // behavior for signalfd, and needs to be fixed. + // + // See gvisor.dev/issue/139. + if (IsRunningOnGvisor()) { + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); + } else { + ASSERT_THAT(tgkill(getpid(), tid, signo), SyscallSucceeds()); + } + + // Ensure that it was received. + t.Join(); +} + +TEST_P(SignalfdTest, ThreadGroup) { + int signo = GetParam(); + // Create the signalfd in blocking mode. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, signo); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0)); + + // Shared variable. + absl::Mutex mu; + bool first = false; + bool second = false; + + // Start a thread reading. + ScopedThread t([&] { + // Read the signal from the signalfd. + struct signalfd_siginfo rbuf; + ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(rbuf.ssi_signo, signo); + + // Wait for the other thread. + absl::MutexLock ml(&mu); + first = true; + mu.Await(absl::Condition(&second)); + }); + + // Deliver the signal to the threadgroup. + ASSERT_THAT(kill(getpid(), signo), SyscallSucceeds()); + + // Wait for the first thread to process. + { + absl::MutexLock ml(&mu); + mu.Await(absl::Condition(&first)); + } + + // Deliver to the thread group again (other thread still exists). + ASSERT_THAT(kill(getpid(), signo), SyscallSucceeds()); + + // Ensure that we can also receive it. + struct signalfd_siginfo rbuf; + ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(rbuf.ssi_signo, signo); + + // Mark the test as done. + { + absl::MutexLock ml(&mu); + second = true; + } + + // The other thread should be joinable. + t.Join(); +} + +TEST_P(SignalfdTest, Nonblock) { + int signo = GetParam(); + // Create the signalfd in non-blocking mode. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, signo); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_NONBLOCK)); + + // We should return if we attempt to read. + struct signalfd_siginfo rbuf; + ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Block and deliver the signal. + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); + + // Ensure that a read actually works. + ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(rbuf.ssi_signo, signo); + + // Should block again. + EXPECT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(SignalfdTest, SetMask) { + int signo = GetParam(); + // Create the signalfd matching nothing. + sigset_t mask; + sigemptyset(&mask); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_NONBLOCK)); + + // Block and deliver a signal. + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); + + // We should have nothing. + struct signalfd_siginfo rbuf; + ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Change the signal mask. + sigaddset(&mask, signo); + ASSERT_THAT(signalfd(fd.get(), &mask, 0), SyscallSucceeds()); + + // We should now have the signal. + ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); + EXPECT_EQ(rbuf.ssi_signo, signo); +} + +TEST_P(SignalfdTest, Poll) { + int signo = GetParam(); + // Create the signalfd. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, signo); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0)); + + // Block the signal, and start a thread to deliver it. + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); + pid_t orig_tid = gettid(); + ScopedThread t([&] { + absl::SleepFor(absl::Seconds(5)); + ASSERT_THAT(tgkill(getpid(), orig_tid, signo), SyscallSucceeds()); + }); + + // Start polling for the signal. We expect that it is not available at the + // outset, but then becomes available when the signal is sent. We give a + // timeout of 10000ms (or the delay above + 5 seconds of additional grace + // time). + struct pollfd poll_fd = {fd.get(), POLLIN, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); + + // Actually read the signal to prevent delivery. + struct signalfd_siginfo rbuf; + EXPECT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), + SyscallSucceedsWithValue(sizeof(rbuf))); +} + +std::string PrintSigno(::testing::TestParamInfo<int> info) { + switch (info.param) { + case kSigno: + return "kSigno"; + case kSignoMax: + return "kSignoMax"; + default: + return absl::StrCat(info.param); + } +} +INSTANTIATE_TEST_SUITE_P(Signalfd, SignalfdTest, + ::testing::Values(kSigno, kSignoMax), PrintSigno); + +TEST(Signalfd, Ppoll) { + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGKILL); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_CLOEXEC)); + + // Ensure that the given ppoll blocks. + struct pollfd pfd = {}; + pfd.fd = fd.get(); + pfd.events = POLLIN; + struct timespec timeout = {}; + timeout.tv_sec = 1; + EXPECT_THAT(RetryEINTR(ppoll)(&pfd, 1, &timeout, &mask), + SyscallSucceedsWithValue(0)); +} + +TEST(Signalfd, KillStillKills) { + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGKILL); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_CLOEXEC)); + + // Just because there is a signalfd, we shouldn't see any change in behavior + // for unblockable signals. It's easier to test this with SIGKILL. + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGKILL)); + EXPECT_EXIT(tgkill(getpid(), gettid(), SIGKILL), KilledBySignal(SIGKILL), ""); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // These tests depend on delivering signals. Block them up front so that all + // other threads created by TestInit will also have them blocked, and they + // will not interface with the rest of the test. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, gvisor::testing::kSigno); + sigaddset(&set, gvisor::testing::kSignoMax); + sigaddset(&set, gvisor::testing::kSignoAlt); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/sigprocmask.cc b/test/syscalls/linux/sigprocmask.cc new file mode 100644 index 000000000..a603fc1d1 --- /dev/null +++ b/test/syscalls/linux/sigprocmask.cc @@ -0,0 +1,269 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <stddef.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Signals numbers used for testing. +static constexpr int kTestSignal1 = SIGUSR1; +static constexpr int kTestSignal2 = SIGUSR2; + +static int raw_sigprocmask(int how, const sigset_t* set, sigset_t* oldset) { + return syscall(SYS_rt_sigprocmask, how, set, oldset, _NSIG / 8); +} + +// count of the number of signals received +int signal_count[kMaxSignal + 1]; + +// signal handler increments the signal counter +void SigHandler(int sig, siginfo_t* info, void* context) { + TEST_CHECK(sig > 0 && sig <= kMaxSignal); + signal_count[sig] += 1; +} + +// The test fixture saves and restores the signal mask and +// sets up handlers for kTestSignal1 and kTestSignal2. +class SigProcMaskTest : public ::testing::Test { + protected: + void SetUp() override { + // Save the current signal mask. + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &mask_), + SyscallSucceeds()); + + // Setup signal handlers for kTestSignal1 and kTestSignal2. + struct sigaction sa; + sa.sa_sigaction = SigHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + EXPECT_THAT(sigaction(kTestSignal1, &sa, &sa_test_sig_1_), + SyscallSucceeds()); + EXPECT_THAT(sigaction(kTestSignal2, &sa, &sa_test_sig_2_), + SyscallSucceeds()); + + // Clear the signal counters. + memset(signal_count, 0, sizeof(signal_count)); + } + + void TearDown() override { + // Restore the signal mask. + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &mask_, nullptr), + SyscallSucceeds()); + + // Restore the signal handlers for kTestSignal1 and kTestSignal2. + EXPECT_THAT(sigaction(kTestSignal1, &sa_test_sig_1_, nullptr), + SyscallSucceeds()); + EXPECT_THAT(sigaction(kTestSignal2, &sa_test_sig_2_, nullptr), + SyscallSucceeds()); + } + + private: + sigset_t mask_; + struct sigaction sa_test_sig_1_; + struct sigaction sa_test_sig_2_; +}; + +// Both sigsets nullptr should succeed and do nothing. +TEST_F(SigProcMaskTest, NullAddress) { + EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, nullptr, NULL), SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, nullptr, NULL), SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, NULL), SyscallSucceeds()); +} + +// Bad address for either sigset should fail with EFAULT. +TEST_F(SigProcMaskTest, BadAddress) { + sigset_t* bad_addr = reinterpret_cast<sigset_t*>(-1); + + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, bad_addr, nullptr), + SyscallFailsWithErrno(EFAULT)); + + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, bad_addr), + SyscallFailsWithErrno(EFAULT)); +} + +// Bad value of the "how" parameter should fail with EINVAL. +TEST_F(SigProcMaskTest, BadParameter) { + int bad_param_1 = -1; + int bad_param_2 = 42; + + sigset_t set1; + sigemptyset(&set1); + + EXPECT_THAT(raw_sigprocmask(bad_param_1, &set1, nullptr), + SyscallFailsWithErrno(EINVAL)); + + EXPECT_THAT(raw_sigprocmask(bad_param_2, &set1, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +// Check that we can get the current signal mask. +TEST_F(SigProcMaskTest, GetMask) { + sigset_t set1; + sigset_t set2; + + sigemptyset(&set1); + sigfillset(&set2); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &set1), SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &set2), SyscallSucceeds()); + EXPECT_THAT(set1, EqualsSigset(set2)); +} + +// Check that we can set the signal mask. +TEST_F(SigProcMaskTest, SetMask) { + sigset_t actual; + sigset_t expected; + + // Try to mask all signals + sigfillset(&expected); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + // sigprocmask() should have silently ignored SIGKILL and SIGSTOP. + sigdelset(&expected, SIGSTOP); + sigdelset(&expected, SIGKILL); + EXPECT_THAT(actual, EqualsSigset(expected)); + + // Try to clear the signal mask + sigemptyset(&expected); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + EXPECT_THAT(actual, EqualsSigset(expected)); + + // Try to set a mask with one signal. + sigemptyset(&expected); + sigaddset(&expected, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + EXPECT_THAT(actual, EqualsSigset(expected)); +} + +// Check that we can add and remove signals. +TEST_F(SigProcMaskTest, BlockUnblock) { + sigset_t actual; + sigset_t expected; + + // Try to set a mask with one signal. + sigemptyset(&expected); + sigaddset(&expected, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + EXPECT_THAT(actual, EqualsSigset(expected)); + + // Try to add another signal. + sigset_t block; + sigemptyset(&block); + sigaddset(&block, kTestSignal2); + EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, &block, nullptr), SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + sigaddset(&expected, kTestSignal2); + EXPECT_THAT(actual, EqualsSigset(expected)); + + // Try to remove a signal. + sigset_t unblock; + sigemptyset(&unblock); + sigaddset(&unblock, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, &unblock, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + sigdelset(&expected, kTestSignal1); + EXPECT_THAT(actual, EqualsSigset(expected)); +} + +// Test that the signal mask actually blocks signals. +TEST_F(SigProcMaskTest, SignalHandler) { + sigset_t mask; + + // clear the signal mask + sigemptyset(&mask); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, nullptr), SyscallSucceeds()); + + // Check the initial signal counts. + EXPECT_EQ(0, signal_count[kTestSignal1]); + EXPECT_EQ(0, signal_count[kTestSignal2]); + + // Check that both kTestSignal1 and kTestSignal2 are not blocked. + raise(kTestSignal1); + raise(kTestSignal2); + EXPECT_EQ(1, signal_count[kTestSignal1]); + EXPECT_EQ(1, signal_count[kTestSignal2]); + + // Block kTestSignal1. + sigaddset(&mask, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, &mask, nullptr), SyscallSucceeds()); + + // Check that kTestSignal1 is blocked. + raise(kTestSignal1); + raise(kTestSignal2); + EXPECT_EQ(1, signal_count[kTestSignal1]); + EXPECT_EQ(2, signal_count[kTestSignal2]); + + // Unblock kTestSignal1. + sigaddset(&mask, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, &mask, nullptr), SyscallSucceeds()); + + // Check that the unblocked kTestSignal1 has been delivered. + EXPECT_EQ(2, signal_count[kTestSignal1]); + EXPECT_EQ(2, signal_count[kTestSignal2]); +} + +// Check that sigprocmask correctly handles aliasing of the set and oldset +// pointers. Regression test for b/30502311. +TEST_F(SigProcMaskTest, AliasedSets) { + sigset_t mask; + + // Set a mask in which only kTestSignal1 is blocked. + sigset_t mask1; + sigemptyset(&mask1); + sigaddset(&mask1, kTestSignal1); + mask = mask1; + ASSERT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, nullptr), SyscallSucceeds()); + + // Exchange it with a mask in which only kTestSignal2 is blocked. + sigset_t mask2; + sigemptyset(&mask2); + sigaddset(&mask2, kTestSignal2); + mask = mask2; + ASSERT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, &mask), SyscallSucceeds()); + + // Check that the exchange succeeeded: + // mask should now contain the previously-set mask blocking only kTestSignal1. + EXPECT_THAT(mask, EqualsSigset(mask1)); + // The current mask should block only kTestSignal2. + ASSERT_THAT(raw_sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + EXPECT_THAT(mask, EqualsSigset(mask2)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sigstop.cc b/test/syscalls/linux/sigstop.cc new file mode 100644 index 000000000..b2fcedd62 --- /dev/null +++ b/test/syscalls/linux/sigstop.cc @@ -0,0 +1,151 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <stdlib.h> +#include <sys/select.h> + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +ABSL_FLAG(bool, sigstop_test_child, false, + "If true, run the SigstopTest child workload."); + +namespace gvisor { +namespace testing { + +namespace { + +constexpr absl::Duration kChildStartupDelay = absl::Seconds(5); +constexpr absl::Duration kChildMainThreadDelay = absl::Seconds(10); +constexpr absl::Duration kChildExtraThreadDelay = absl::Seconds(15); +constexpr absl::Duration kPostSIGSTOPDelay = absl::Seconds(20); + +// Comparisons on absl::Duration aren't yet constexpr (2017-07-14), so we +// can't just use static_assert. +TEST(SigstopTest, TimesAreRelativelyConsistent) { + EXPECT_LT(kChildStartupDelay, kChildMainThreadDelay) + << "Child process will exit before the parent process attempts to stop " + "it"; + EXPECT_LT(kChildMainThreadDelay, kChildExtraThreadDelay) + << "Secondary thread in child process will exit before main thread, " + "causing it to exit with the wrong code"; + EXPECT_LT(kChildExtraThreadDelay, kPostSIGSTOPDelay) + << "Parent process stops waiting before child process may exit if " + "improperly stopped, rendering the test ineffective"; +} + +// Exit codes communicated from the child workload to the parent test process. +constexpr int kChildMainThreadExitCode = 10; +constexpr int kChildExtraThreadExitCode = 11; + +TEST(SigstopTest, Correctness) { + pid_t child_pid = -1; + int execve_errno = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", {"/proc/self/exe", "--sigstop_test_child"}, + {}, nullptr, &child_pid, &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + // Wait for the child subprocess to start the second thread before stopping + // it. + absl::SleepFor(kChildStartupDelay); + ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds()); + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, WUNTRACED), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status)); + EXPECT_EQ(SIGSTOP, WSTOPSIG(status)); + + // Sleep for longer than either of the sleeps in the child subprocess, + // expecting the child to stay alive because it's stopped. + absl::SleepFor(kPostSIGSTOPDelay); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Resume the child. + ASSERT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds()); + + EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, WCONTINUED), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFCONTINUED(status)); + + // Expect it to die. + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), kChildMainThreadExitCode); +} + +// Like base:SleepFor, but tries to avoid counting time spent stopped due to a +// stop signal toward the sleep. +// +// This is required due to an inconsistency in how nanosleep(2) and stop signals +// interact on Linux. When nanosleep is interrupted, it writes the remaining +// time back to its second timespec argument, so that if nanosleep is +// interrupted by a signal handler then userspace can immediately call nanosleep +// again with that timespec. However, if nanosleep is automatically restarted +// (because it's interrupted by a signal that is not delivered to a handler, +// such as a stop signal), it's restarted based on the timer's former *absolute* +// expiration time (via ERESTART_RESTARTBLOCK => SYS_restart_syscall => +// hrtimer_nanosleep_restart). This means that time spent stopped is effectively +// counted as time spent sleeping, resulting in less time spent sleeping than +// expected. +// +// Dividing the sleep into multiple smaller sleeps limits the impact of this +// effect to the length of each sleep during which a stop occurs; for example, +// if a sleeping process is only stopped once, SleepIgnoreStopped can +// under-sleep by at most 100ms. +void SleepIgnoreStopped(absl::Duration d) { + absl::Duration const max_sleep = absl::Milliseconds(100); + while (d > absl::ZeroDuration()) { + absl::Duration to_sleep = std::min(d, max_sleep); + absl::SleepFor(to_sleep); + d -= to_sleep; + } +} + +void RunChild() { + // Start another thread that attempts to call exit_group with a different + // error code, in order to verify that SIGSTOP stops this thread as well. + ScopedThread t([] { + SleepIgnoreStopped(kChildExtraThreadDelay); + exit(kChildExtraThreadExitCode); + }); + SleepIgnoreStopped(kChildMainThreadDelay); + exit(kChildMainThreadExitCode); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (absl::GetFlag(FLAGS_sigstop_test_child)) { + gvisor::testing::RunChild(); + return 1; + } + + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/sigtimedwait.cc b/test/syscalls/linux/sigtimedwait.cc new file mode 100644 index 000000000..4f8afff15 --- /dev/null +++ b/test/syscalls/linux/sigtimedwait.cc @@ -0,0 +1,323 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/wait.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// N.B. main() blocks SIGALRM and SIGCHLD on all threads. + +constexpr int kAlarmSecs = 12; + +void NoopHandler(int sig, siginfo_t* info, void* context) {} + +TEST(SigtimedwaitTest, InvalidTimeout) { + sigset_t mask; + sigemptyset(&mask); + struct timespec timeout = {0, 1000000001}; + EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EINVAL)); + timeout = {-1, 0}; + EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EINVAL)); + timeout = {0, -1}; + EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EINVAL)); +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and wait. +TEST(SigtimedwaitTest, AlarmReturnsAlarm_NoRandomSave) { + struct itimerval itv = {}; + itv.it_value.tv_sec = kAlarmSecs; + const auto itimer_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv)); + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGALRM); + siginfo_t info = {}; + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, &info, nullptr), + SyscallSucceedsWithValue(SIGALRM)); + EXPECT_EQ(SIGALRM, info.si_signo); +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and wait. +TEST(SigtimedwaitTest, NullTimeoutReturnsEINTR_NoRandomSave) { + struct sigaction sa; + sa.sa_sigaction = NoopHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + const auto action_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + const auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + struct itimerval itv = {}; + itv.it_value.tv_sec = kAlarmSecs; + const auto itimer_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv)); + + sigset_t mask; + sigemptyset(&mask); + EXPECT_THAT(sigtimedwait(&mask, nullptr, nullptr), + SyscallFailsWithErrno(EINTR)); +} + +TEST(SigtimedwaitTest, LegitTimeoutReturnsEAGAIN) { + sigset_t mask; + sigemptyset(&mask); + struct timespec timeout = {1, 0}; // 1 second + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(SigtimedwaitTest, ZeroTimeoutReturnsEAGAIN) { + sigset_t mask; + sigemptyset(&mask); + struct timespec timeout = {0, 0}; // 0 second + EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(SigtimedwaitTest, KillGeneratedSIGCHLD) { + EXPECT_THAT(kill(getpid(), SIGCHLD), SyscallSucceeds()); + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + struct timespec ts = {5, 0}; + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts), + SyscallSucceedsWithValue(SIGCHLD)); +} + +TEST(SigtimedwaitTest, ChildExitGeneratedSIGCHLD) { + pid_t pid = fork(); + if (pid == 0) { + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status; + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + struct timespec ts = {5, 0}; + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts), + SyscallSucceedsWithValue(SIGCHLD)); +} + +TEST(SigtimedwaitTest, ChildExitGeneratedSIGCHLDWithHandler) { + // Setup handler for SIGCHLD, but don't unblock it. + struct sigaction sa; + sa.sa_sigaction = NoopHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + const auto action_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa)); + + pid_t pid = fork(); + if (pid == 0) { + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + struct timespec ts = {5, 0}; + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts), + SyscallSucceedsWithValue(SIGCHLD)); + + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status; +} + +// sigtimedwait cannot catch SIGKILL. +TEST(SigtimedwaitTest, SIGKILLUncaught) { + // This is a regression test for sigtimedwait dequeuing SIGKILLs, thus + // preventing the task from exiting. + // + // The explanation below is specific to behavior in gVisor. The Linux behavior + // here is irrelevant because without a bug that prevents delivery of SIGKILL, + // none of this behavior is visible (in Linux or gVisor). + // + // SIGKILL is rather intrusive. Simply sending the SIGKILL marks + // ThreadGroup.exitStatus as exiting with SIGKILL, before the SIGKILL is even + // delivered. + // + // As a result, we cannot simply exit the child with a different exit code if + // it survives and expect to see that code in waitpid because: + // 1. PrepareGroupExit will override Task.exitStatus with + // ThreadGroup.exitStatus. + // 2. waitpid(2) will always return ThreadGroup.exitStatus rather than + // Task.exitStatus. + // + // We could use exit(2) to set Task.exitStatus without override, and a SIGCHLD + // handler to receive Task.exitStatus in the parent, but with that much + // test complexity, it is cleaner to simply use a pipe to notify the parent + // that we survived. + constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2); + + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + FileDescriptor rfd(pipe_fds[0]); + FileDescriptor wfd(pipe_fds[1]); + + pid_t pid = fork(); + if (pid == 0) { + rfd.reset(); + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGKILL); + RetryEINTR(sigtimedwait)(&mask, nullptr, nullptr); + + // Survived. + char c = 'a'; + TEST_PCHECK(WriteFd(wfd.get(), &c, 1) == 1); + _exit(1); + } + ASSERT_THAT(pid, SyscallSucceeds()); + + wfd.reset(); + + // Wait for child to block in sigtimedwait, then kill it. + absl::SleepFor(kSigtimedwaitSetupTime); + + // Sending SIGKILL will attempt to enqueue the signal twice: once in the + // normal signal sending path, and once to all Tasks in the ThreadGroup when + // applying SIGKILL side-effects. + // + // If we use kill(2), the former will be on the ThreadGroup signal queue and + // the latter will be on the Task signal queue. sigtimedwait can only dequeue + // one signal, so the other would kill the Task, masking bugs. + // + // If we use tkill(2), the former will be on the Task signal queue and the + // latter will be dropped as a duplicate. Then sigtimedwait can theoretically + // dequeue the single SIGKILL. + EXPECT_THAT(syscall(SYS_tkill, pid, SIGKILL), SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) << status; + + // Child shouldn't have survived. + char c; + EXPECT_THAT(ReadFd(rfd.get(), &c, 1), SyscallSucceedsWithValue(0)); +} + +TEST(SigtimedwaitTest, IgnoredUnmaskedSignal) { + constexpr int kSigno = SIGUSR1; + constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2); + constexpr auto kSigtimedwaitTimeout = absl::Seconds(5); + ASSERT_GT(kSigtimedwaitTimeout, kSigtimedwaitSetupTime); + + // Ensure that kSigno is ignored, and unmasked on this thread. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + const auto scoped_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa)); + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, mask)); + + // Create a thread which will send us kSigno while we are blocked in + // sigtimedwait. + pid_t tid = gettid(); + ScopedThread sigthread([&] { + absl::SleepFor(kSigtimedwaitSetupTime); + EXPECT_THAT(tgkill(getpid(), tid, kSigno), SyscallSucceeds()); + }); + + // sigtimedwait should not observe kSigno since it is ignored and already + // unmasked, causing it to be dropped before it is enqueued. + struct timespec timeout_ts = absl::ToTimespec(kSigtimedwaitTimeout); + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout_ts), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(SigtimedwaitTest, IgnoredMaskedSignal) { + constexpr int kSigno = SIGUSR1; + constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2); + constexpr auto kSigtimedwaitTimeout = absl::Seconds(5); + ASSERT_GT(kSigtimedwaitTimeout, kSigtimedwaitSetupTime); + + // Ensure that kSigno is ignored, and masked on this thread. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + const auto scoped_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa)); + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask)); + + // Create a thread which will send us kSigno while we are blocked in + // sigtimedwait. + pid_t tid = gettid(); + ScopedThread sigthread([&] { + absl::SleepFor(kSigtimedwaitSetupTime); + EXPECT_THAT(tgkill(getpid(), tid, kSigno), SyscallSucceeds()); + }); + + // sigtimedwait should observe kSigno since it is normally masked, causing it + // to be enqueued despite being ignored. + struct timespec timeout_ts = absl::ToTimespec(kSigtimedwaitTimeout); + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout_ts), + SyscallSucceedsWithValue(kSigno)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // These tests depend on delivering SIGALRM/SIGCHLD to the main thread or in + // sigtimedwait. Block them so that any other threads created by TestInit will + // also have them blocked. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigaddset(&set, SIGCHLD); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/socket.cc b/test/syscalls/linux/socket.cc new file mode 100644 index 000000000..c20cd3fcc --- /dev/null +++ b/test/syscalls/linux/socket.cc @@ -0,0 +1,121 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_umask.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST(SocketTest, UnixSocketPairProtocol) { + int socks[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, socks), + SyscallSucceeds()); + close(socks[0]); + close(socks[1]); +} + +TEST(SocketTest, ProtocolUnix) { + struct { + int domain, type, protocol; + } tests[] = { + {AF_UNIX, SOCK_STREAM, PF_UNIX}, + {AF_UNIX, SOCK_SEQPACKET, PF_UNIX}, + {AF_UNIX, SOCK_DGRAM, PF_UNIX}, + }; + for (int i = 0; i < ABSL_ARRAYSIZE(tests); i++) { + ASSERT_NO_ERRNO_AND_VALUE( + Socket(tests[i].domain, tests[i].type, tests[i].protocol)); + } +} + +TEST(SocketTest, ProtocolInet) { + struct { + int domain, type, protocol; + } tests[] = { + {AF_INET, SOCK_DGRAM, IPPROTO_UDP}, + {AF_INET, SOCK_STREAM, IPPROTO_TCP}, + }; + for (int i = 0; i < ABSL_ARRAYSIZE(tests); i++) { + ASSERT_NO_ERRNO_AND_VALUE( + Socket(tests[i].domain, tests[i].type, tests[i].protocol)); + } +} + +TEST(SocketTest, UnixSocketStat) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor bound = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + + // The permissions of the file created with bind(2) should be defined by the + // permissions of the bound socket and the umask. + mode_t sock_perm = 0765, mask = 0123; + ASSERT_THAT(fchmod(bound.get(), sock_perm), SyscallSucceeds()); + TempUmask m(mask); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX)); + ASSERT_THAT(bind(bound.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + + struct stat statbuf = {}; + ASSERT_THAT(stat(addr.sun_path, &statbuf), SyscallSucceeds()); + + // Mode should be S_IFSOCK. + EXPECT_EQ(statbuf.st_mode, S_IFSOCK | sock_perm & ~mask); + + // Timestamps should be equal and non-zero. + // TODO(b/158882152): Sockets currently don't implement timestamps. + if (!IsRunningOnGvisor()) { + EXPECT_NE(statbuf.st_atime, 0); + EXPECT_EQ(statbuf.st_atime, statbuf.st_mtime); + EXPECT_EQ(statbuf.st_atime, statbuf.st_ctime); + } +} + +using SocketOpenTest = ::testing::TestWithParam<int>; + +// UDS cannot be opened. +TEST_P(SocketOpenTest, Unix) { + // FIXME(b/142001530): Open incorrectly succeeds on gVisor. + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor bound = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX)); + + ASSERT_THAT(bind(bound.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + + EXPECT_THAT(open(addr.sun_path, GetParam()), SyscallFailsWithErrno(ENXIO)); +} + +INSTANTIATE_TEST_SUITE_P(OpenModes, SocketOpenTest, + ::testing::Values(O_RDONLY, O_RDWR)); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_abstract.cc b/test/syscalls/linux/socket_abstract.cc new file mode 100644 index 000000000..00999f192 --- /dev/null +++ b/test/syscalls/linux/socket_abstract.cc @@ -0,0 +1,49 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix.h" +#include "test/syscalls/linux/socket_unix_cmsg.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})); +} + +INSTANTIATE_TEST_SUITE_P( + AbstractUnixSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + AbstractUnixSockets, UnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + AbstractUnixSockets, UnixSocketPairCmsgTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_bind_to_device.cc b/test/syscalls/linux/socket_bind_to_device.cc new file mode 100644 index 000000000..6b27f6eab --- /dev/null +++ b/test/syscalls/linux/socket_bind_to_device.cc @@ -0,0 +1,313 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <cstdio> +#include <cstring> +#include <map> +#include <memory> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_bind_to_device_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +using std::string; + +// Test fixture for SO_BINDTODEVICE tests. +class BindToDeviceTest : public ::testing::TestWithParam<SocketKind> { + protected: + void SetUp() override { + printf("Testing case: %s\n", GetParam().description.c_str()); + ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) + << "CAP_NET_RAW is required to use SO_BINDTODEVICE"; + + interface_name_ = "eth1"; + auto interface_names = GetInterfaceNames(); + if (interface_names.find(interface_name_) == interface_names.end()) { + // Need a tunnel. + tunnel_ = ASSERT_NO_ERRNO_AND_VALUE(Tunnel::New()); + interface_name_ = tunnel_->GetName(); + ASSERT_FALSE(interface_name_.empty()); + } + socket_ = ASSERT_NO_ERRNO_AND_VALUE(GetParam().Create()); + } + + string interface_name() const { return interface_name_; } + + int socket_fd() const { return socket_->get(); } + + private: + std::unique_ptr<Tunnel> tunnel_; + string interface_name_; + std::unique_ptr<FileDescriptor> socket_; +}; + +constexpr char kIllegalIfnameChar = '/'; + +// Tests getsockopt of the default value. +TEST_P(BindToDeviceTest, GetsockoptDefault) { + char name_buffer[IFNAMSIZ * 2]; + char original_name_buffer[IFNAMSIZ * 2]; + socklen_t name_buffer_size; + + // Read the default SO_BINDTODEVICE. + memset(original_name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + for (size_t i = 0; i <= sizeof(name_buffer); i++) { + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer_size = i; + EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, + name_buffer, &name_buffer_size), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(name_buffer_size, 0); + EXPECT_EQ(memcmp(name_buffer, original_name_buffer, sizeof(name_buffer)), + 0); + } +} + +// Tests setsockopt of invalid device name. +TEST_P(BindToDeviceTest, SetsockoptInvalidDeviceName) { + char name_buffer[IFNAMSIZ * 2]; + socklen_t name_buffer_size; + + // Set an invalid device name. + memset(name_buffer, kIllegalIfnameChar, 5); + name_buffer_size = 5; + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + name_buffer_size), + SyscallFailsWithErrno(ENODEV)); +} + +// Tests setsockopt of a buffer with a valid device name but not +// null-terminated, with different sizes of buffer. +TEST_P(BindToDeviceTest, SetsockoptValidDeviceNameWithoutNullTermination) { + char name_buffer[IFNAMSIZ * 2]; + socklen_t name_buffer_size; + + strncpy(name_buffer, interface_name().c_str(), interface_name().size() + 1); + // Intentionally overwrite the null at the end. + memset(name_buffer + interface_name().size(), kIllegalIfnameChar, + sizeof(name_buffer) - interface_name().size()); + for (size_t i = 1; i <= sizeof(name_buffer); i++) { + name_buffer_size = i; + SCOPED_TRACE(absl::StrCat("Buffer size: ", i)); + // It should only work if the size provided is exactly right. + if (name_buffer_size == interface_name().size()) { + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, + name_buffer, name_buffer_size), + SyscallSucceeds()); + } else { + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, + name_buffer, name_buffer_size), + SyscallFailsWithErrno(ENODEV)); + } + } +} + +// Tests setsockopt of a buffer with a valid device name and null-terminated, +// with different sizes of buffer. +TEST_P(BindToDeviceTest, SetsockoptValidDeviceNameWithNullTermination) { + char name_buffer[IFNAMSIZ * 2]; + socklen_t name_buffer_size; + + strncpy(name_buffer, interface_name().c_str(), interface_name().size() + 1); + // Don't overwrite the null at the end. + memset(name_buffer + interface_name().size() + 1, kIllegalIfnameChar, + sizeof(name_buffer) - interface_name().size() - 1); + for (size_t i = 1; i <= sizeof(name_buffer); i++) { + name_buffer_size = i; + SCOPED_TRACE(absl::StrCat("Buffer size: ", i)); + // It should only work if the size provided is at least the right size. + if (name_buffer_size >= interface_name().size()) { + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, + name_buffer, name_buffer_size), + SyscallSucceeds()); + } else { + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, + name_buffer, name_buffer_size), + SyscallFailsWithErrno(ENODEV)); + } + } +} + +// Tests that setsockopt of an invalid device name doesn't unset the previous +// valid setsockopt. +TEST_P(BindToDeviceTest, SetsockoptValidThenInvalid) { + char name_buffer[IFNAMSIZ * 2]; + socklen_t name_buffer_size; + + // Write successfully. + strncpy(name_buffer, interface_name().c_str(), sizeof(name_buffer)); + ASSERT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + sizeof(name_buffer)), + SyscallSucceeds()); + + // Read it back successfully. + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer_size = sizeof(name_buffer); + EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + &name_buffer_size), + SyscallSucceeds()); + EXPECT_EQ(name_buffer_size, interface_name().size() + 1); + EXPECT_STREQ(name_buffer, interface_name().c_str()); + + // Write unsuccessfully. + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer_size = 5; + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + sizeof(name_buffer)), + SyscallFailsWithErrno(ENODEV)); + + // Read it back successfully, it's unchanged. + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer_size = sizeof(name_buffer); + EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + &name_buffer_size), + SyscallSucceeds()); + EXPECT_EQ(name_buffer_size, interface_name().size() + 1); + EXPECT_STREQ(name_buffer, interface_name().c_str()); +} + +// Tests that setsockopt of zero-length string correctly unsets the previous +// value. +TEST_P(BindToDeviceTest, SetsockoptValidThenClear) { + char name_buffer[IFNAMSIZ * 2]; + socklen_t name_buffer_size; + + // Write successfully. + strncpy(name_buffer, interface_name().c_str(), sizeof(name_buffer)); + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + sizeof(name_buffer)), + SyscallSucceeds()); + + // Read it back successfully. + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer_size = sizeof(name_buffer); + EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + &name_buffer_size), + SyscallSucceeds()); + EXPECT_EQ(name_buffer_size, interface_name().size() + 1); + EXPECT_STREQ(name_buffer, interface_name().c_str()); + + // Clear it successfully. + name_buffer_size = 0; + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + name_buffer_size), + SyscallSucceeds()); + + // Read it back successfully, it's cleared. + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer_size = sizeof(name_buffer); + EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + &name_buffer_size), + SyscallSucceeds()); + EXPECT_EQ(name_buffer_size, 0); +} + +// Tests that setsockopt of empty string correctly unsets the previous +// value. +TEST_P(BindToDeviceTest, SetsockoptValidThenClearWithNull) { + char name_buffer[IFNAMSIZ * 2]; + socklen_t name_buffer_size; + + // Write successfully. + strncpy(name_buffer, interface_name().c_str(), sizeof(name_buffer)); + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + sizeof(name_buffer)), + SyscallSucceeds()); + + // Read it back successfully. + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer_size = sizeof(name_buffer); + EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + &name_buffer_size), + SyscallSucceeds()); + EXPECT_EQ(name_buffer_size, interface_name().size() + 1); + EXPECT_STREQ(name_buffer, interface_name().c_str()); + + // Clear it successfully. + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer[0] = 0; + name_buffer_size = sizeof(name_buffer); + EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + name_buffer_size), + SyscallSucceeds()); + + // Read it back successfully, it's cleared. + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer_size = sizeof(name_buffer); + EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + &name_buffer_size), + SyscallSucceeds()); + EXPECT_EQ(name_buffer_size, 0); +} + +// Tests getsockopt with different buffer sizes. +TEST_P(BindToDeviceTest, GetsockoptDevice) { + char name_buffer[IFNAMSIZ * 2]; + socklen_t name_buffer_size; + + // Write successfully. + strncpy(name_buffer, interface_name().c_str(), sizeof(name_buffer)); + ASSERT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer, + sizeof(name_buffer)), + SyscallSucceeds()); + + // Read it back at various buffer sizes. + for (size_t i = 0; i <= sizeof(name_buffer); i++) { + memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer)); + name_buffer_size = i; + SCOPED_TRACE(absl::StrCat("Buffer size: ", i)); + // Linux only allows a buffer at least IFNAMSIZ, even if less would suffice + // for this interface name. + if (name_buffer_size >= IFNAMSIZ) { + EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, + name_buffer, &name_buffer_size), + SyscallSucceeds()); + EXPECT_EQ(name_buffer_size, interface_name().size() + 1); + EXPECT_STREQ(name_buffer, interface_name().c_str()); + } else { + EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, + name_buffer, &name_buffer_size), + SyscallFailsWithErrno(EINVAL)); + EXPECT_EQ(name_buffer_size, i); + } + } +} + +INSTANTIATE_TEST_SUITE_P(BindToDeviceTest, BindToDeviceTest, + ::testing::Values(IPv4UDPUnboundSocket(0), + IPv4TCPUnboundSocket(0))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_bind_to_device_distribution.cc b/test/syscalls/linux/socket_bind_to_device_distribution.cc new file mode 100644 index 000000000..5ed57625c --- /dev/null +++ b/test/syscalls/linux/socket_bind_to_device_distribution.cc @@ -0,0 +1,401 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <atomic> +#include <cstdio> +#include <cstring> +#include <map> +#include <memory> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_bind_to_device_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +using std::string; +using std::vector; + +struct EndpointConfig { + std::string bind_to_device; + double expected_ratio; +}; + +struct DistributionTestCase { + std::string name; + std::vector<EndpointConfig> endpoints; +}; + +struct ListenerConnector { + TestAddress listener; + TestAddress connector; +}; + +// Test fixture for SO_BINDTODEVICE tests the distribution of packets received +// with varying SO_BINDTODEVICE settings. +class BindToDeviceDistributionTest + : public ::testing::TestWithParam< + ::testing::tuple<ListenerConnector, DistributionTestCase>> { + protected: + void SetUp() override { + printf("Testing case: %s, listener=%s, connector=%s\n", + ::testing::get<1>(GetParam()).name.c_str(), + ::testing::get<0>(GetParam()).listener.description.c_str(), + ::testing::get<0>(GetParam()).connector.description.c_str()); + ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) + << "CAP_NET_RAW is required to use SO_BINDTODEVICE"; + } +}; + +PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) { + switch (family) { + case AF_INET: + return static_cast<uint16_t>( + reinterpret_cast<sockaddr_in const*>(&addr)->sin_port); + case AF_INET6: + return static_cast<uint16_t>( + reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) { + switch (family) { + case AF_INET: + reinterpret_cast<sockaddr_in*>(addr)->sin_port = port; + return NoError(); + case AF_INET6: + reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port; + return NoError(); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +// Binds sockets to different devices and then creates many TCP connections. +// Checks that the distribution of connections received on the sockets matches +// the expectation. +TEST_P(BindToDeviceDistributionTest, Tcp) { + auto const& [listener_connector, test] = GetParam(); + + TestAddress const& listener = listener_connector.listener; + TestAddress const& connector = listener_connector.connector; + sockaddr_storage listen_addr = listener.addr; + sockaddr_storage conn_addr = connector.addr; + + auto interface_names = GetInterfaceNames(); + + // Create the listening sockets. + std::vector<FileDescriptor> listener_fds; + std::vector<std::unique_ptr<Tunnel>> all_tunnels; + for (auto const& endpoint : test.endpoints) { + if (!endpoint.bind_to_device.empty() && + interface_names.find(endpoint.bind_to_device) == + interface_names.end()) { + all_tunnels.push_back( + ASSERT_NO_ERRNO_AND_VALUE(Tunnel::New(endpoint.bind_to_device))); + interface_names.insert(endpoint.bind_to_device); + } + + listener_fds.push_back(ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP))); + int fd = listener_fds.back().get(); + + ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + endpoint.bind_to_device.c_str(), + endpoint.bind_to_device.size() + 1), + SyscallSucceeds()); + ASSERT_THAT( + bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(fd, 40), SyscallSucceeds()); + + // On the first bind we need to determine which port was bound. + if (listener_fds.size() > 1) { + continue; + } + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT( + getsockname(listener_fds[0].get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + } + + constexpr int kConnectAttempts = 10000; + std::atomic<int> connects_received = ATOMIC_VAR_INIT(0); + std::vector<int> accept_counts(listener_fds.size(), 0); + std::vector<std::unique_ptr<ScopedThread>> listen_threads( + listener_fds.size()); + + for (int i = 0; i < listener_fds.size(); i++) { + listen_threads[i] = absl::make_unique<ScopedThread>( + [&listener_fds, &accept_counts, &connects_received, i, + kConnectAttempts]() { + do { + auto fd = Accept(listener_fds[i].get(), nullptr, nullptr); + if (!fd.ok()) { + // Another thread has shutdown our read side causing the accept to + // fail. + ASSERT_GE(connects_received, kConnectAttempts) + << "errno = " << fd.error(); + return; + } + // Receive some data from a socket to be sure that the connect() + // system call has been completed on another side. + // Do a short read and then close the socket to trigger a RST. This + // ensures that both ends of the connection are cleaned up and no + // goroutines hang around in TIME-WAIT. We do this so that this test + // does not timeout under gotsan runs where lots of goroutines can + // cause the test to use absurd amounts of memory. + // + // See: https://tools.ietf.org/html/rfc2525#page-50 section 2.17 + uint16_t data; + EXPECT_THAT( + RetryEINTR(recv)(fd.ValueOrDie().get(), &data, sizeof(data), 0), + SyscallSucceedsWithValue(sizeof(data))); + accept_counts[i]++; + } while (++connects_received < kConnectAttempts); + + // Shutdown all sockets to wake up other threads. + for (auto const& listener_fd : listener_fds) { + shutdown(listener_fd.get(), SHUT_RDWR); + } + }); + } + + for (int i = 0; i < kConnectAttempts; i++) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT( + RetryEINTR(connect)(fd.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Do two separate sends to ensure two segments are received. This is + // required for netstack where read is incorrectly assuming a whole + // segment is read when endpoint.Read() is called which is technically + // incorrect as the syscall that invoked endpoint.Read() may only + // consume it partially. This results in a case where a close() of + // such a socket does not trigger a RST in netstack due to the + // endpoint assuming that the endpoint has no unread data. + EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + // TODO(gvisor.dev/issue/1449): Remove this block once netstack correctly + // generates a RST. + if (IsRunningOnGvisor()) { + EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + } + } + + // Join threads to be sure that all connections have been counted. + for (auto const& listen_thread : listen_threads) { + listen_thread->Join(); + } + // Check that connections are distributed correctly among listening sockets. + for (int i = 0; i < accept_counts.size(); i++) { + EXPECT_THAT( + accept_counts[i], + EquivalentWithin(static_cast<int>(kConnectAttempts * + test.endpoints[i].expected_ratio), + 0.10)) + << "endpoint " << i << " got the wrong number of packets"; + } +} + +// Binds sockets to different devices and then sends many UDP packets. Checks +// that the distribution of packets received on the sockets matches the +// expectation. +TEST_P(BindToDeviceDistributionTest, Udp) { + auto const& [listener_connector, test] = GetParam(); + + TestAddress const& listener = listener_connector.listener; + TestAddress const& connector = listener_connector.connector; + sockaddr_storage listen_addr = listener.addr; + sockaddr_storage conn_addr = connector.addr; + + auto interface_names = GetInterfaceNames(); + + // Create the listening socket. + std::vector<FileDescriptor> listener_fds; + std::vector<std::unique_ptr<Tunnel>> all_tunnels; + for (auto const& endpoint : test.endpoints) { + if (!endpoint.bind_to_device.empty() && + interface_names.find(endpoint.bind_to_device) == + interface_names.end()) { + all_tunnels.push_back( + ASSERT_NO_ERRNO_AND_VALUE(Tunnel::New(endpoint.bind_to_device))); + interface_names.insert(endpoint.bind_to_device); + } + + listener_fds.push_back( + ASSERT_NO_ERRNO_AND_VALUE(Socket(listener.family(), SOCK_DGRAM, 0))); + int fd = listener_fds.back().get(); + + ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + endpoint.bind_to_device.c_str(), + endpoint.bind_to_device.size() + 1), + SyscallSucceeds()); + ASSERT_THAT( + bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len), + SyscallSucceeds()); + + // On the first bind we need to determine which port was bound. + if (listener_fds.size() > 1) { + continue; + } + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT( + getsockname(listener_fds[0].get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port)); + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + } + + constexpr int kConnectAttempts = 10000; + std::atomic<int> packets_received = ATOMIC_VAR_INIT(0); + std::vector<int> packets_per_socket(listener_fds.size(), 0); + std::vector<std::unique_ptr<ScopedThread>> receiver_threads( + listener_fds.size()); + + for (int i = 0; i < listener_fds.size(); i++) { + receiver_threads[i] = absl::make_unique<ScopedThread>( + [&listener_fds, &packets_per_socket, &packets_received, i]() { + do { + struct sockaddr_storage addr = {}; + socklen_t addrlen = sizeof(addr); + int data; + + auto ret = RetryEINTR(recvfrom)( + listener_fds[i].get(), &data, sizeof(data), 0, + reinterpret_cast<struct sockaddr*>(&addr), &addrlen); + + if (packets_received < kConnectAttempts) { + ASSERT_THAT(ret, SyscallSucceedsWithValue(sizeof(data))); + } + + if (ret != sizeof(data)) { + // Another thread may have shutdown our read side causing the + // recvfrom to fail. + break; + } + + packets_received++; + packets_per_socket[i]++; + + // A response is required to synchronize with the main thread, + // otherwise the main thread can send more than can fit into receive + // queues. + EXPECT_THAT(RetryEINTR(sendto)( + listener_fds[i].get(), &data, sizeof(data), 0, + reinterpret_cast<sockaddr*>(&addr), addrlen), + SyscallSucceedsWithValue(sizeof(data))); + } while (packets_received < kConnectAttempts); + + // Shutdown all sockets to wake up other threads. + for (auto const& listener_fd : listener_fds) { + shutdown(listener_fd.get(), SHUT_RDWR); + } + }); + } + + for (int i = 0; i < kConnectAttempts; i++) { + FileDescriptor const fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(connector.family(), SOCK_DGRAM, 0)); + EXPECT_THAT(RetryEINTR(sendto)(fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceedsWithValue(sizeof(i))); + int data; + EXPECT_THAT(RetryEINTR(recv)(fd.get(), &data, sizeof(data), 0), + SyscallSucceedsWithValue(sizeof(data))); + } + + // Join threads to be sure that all connections have been counted. + for (auto const& receiver_thread : receiver_threads) { + receiver_thread->Join(); + } + // Check that packets are distributed correctly among listening sockets. + for (int i = 0; i < packets_per_socket.size(); i++) { + EXPECT_THAT( + packets_per_socket[i], + EquivalentWithin(static_cast<int>(kConnectAttempts * + test.endpoints[i].expected_ratio), + 0.10)) + << "endpoint " << i << " got the wrong number of packets"; + } +} + +std::vector<DistributionTestCase> GetDistributionTestCases() { + return std::vector<DistributionTestCase>{ + {"Even distribution among sockets not bound to device", + {{"", 1. / 3}, {"", 1. / 3}, {"", 1. / 3}}}, + {"Sockets bound to other interfaces get no packets", + {{"eth1", 0}, {"", 1. / 2}, {"", 1. / 2}}}, + {"Bound has priority over unbound", {{"eth1", 0}, {"", 0}, {"lo", 1}}}, + {"Even distribution among sockets bound to device", + {{"eth1", 0}, {"lo", 1. / 2}, {"lo", 1. / 2}}}, + }; +} + +INSTANTIATE_TEST_SUITE_P( + BindToDeviceTest, BindToDeviceDistributionTest, + ::testing::Combine(::testing::Values( + // Listeners bound to IPv4 addresses refuse + // connections using IPv6 addresses. + ListenerConnector{V4Any(), V4Loopback()}, + ListenerConnector{V4Loopback(), V4MappedLoopback()}), + ::testing::ValuesIn(GetDistributionTestCases()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc new file mode 100644 index 000000000..d3cc71dbf --- /dev/null +++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc @@ -0,0 +1,513 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <linux/capability.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <cstdio> +#include <cstring> +#include <map> +#include <memory> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/node_hash_map.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_bind_to_device_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +using std::string; +using std::vector; + +// Test fixture for SO_BINDTODEVICE tests the results of sequences of socket +// binding. +class BindToDeviceSequenceTest : public ::testing::TestWithParam<SocketKind> { + protected: + void SetUp() override { + printf("Testing case: %s\n", GetParam().description.c_str()); + ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) + << "CAP_NET_RAW is required to use SO_BINDTODEVICE"; + socket_factory_ = GetParam(); + + interface_names_ = GetInterfaceNames(); + } + + PosixErrorOr<std::unique_ptr<FileDescriptor>> NewSocket() const { + return socket_factory_.Create(); + } + + // Gets a device by device_id. If the device_id has been seen before, returns + // the previously returned device. If not, finds or creates a new device. + // Returns an empty string on failure. + void GetDevice(int device_id, string* device_name) { + auto device = devices_.find(device_id); + if (device != devices_.end()) { + *device_name = device->second; + return; + } + + // Need to pick a new device. Try ethernet first. + *device_name = absl::StrCat("eth", next_unused_eth_); + if (interface_names_.find(*device_name) != interface_names_.end()) { + devices_[device_id] = *device_name; + next_unused_eth_++; + return; + } + + // Need to make a new tunnel device. gVisor tests should have enough + // ethernet devices to never reach here. + ASSERT_FALSE(IsRunningOnGvisor()); + // Need a tunnel. + tunnels_.push_back(ASSERT_NO_ERRNO_AND_VALUE(Tunnel::New())); + devices_[device_id] = tunnels_.back()->GetName(); + *device_name = devices_[device_id]; + } + + // Release the socket + void ReleaseSocket(int socket_id) { + // Close the socket that was made in a previous action. The socket_id + // indicates which socket to close based on index into the list of actions. + sockets_to_close_.erase(socket_id); + } + + // SetDevice changes the bind_to_device option. It does not bind or re-bind. + void SetDevice(int socket_id, int device_id) { + auto socket_fd = sockets_to_close_[socket_id]->get(); + string device_name; + ASSERT_NO_FATAL_FAILURE(GetDevice(device_id, &device_name)); + EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, + device_name.c_str(), device_name.size() + 1), + SyscallSucceedsWithValue(0)); + } + + // Bind a socket with the reuse options and bind_to_device options. Checks + // that all steps succeed and that the bind command's error matches want. + // Sets the socket_id to uniquely identify the socket bound if it is not + // nullptr. + void BindSocket(bool reuse_port, bool reuse_addr, int device_id = 0, + int want = 0, int* socket_id = nullptr) { + next_socket_id_++; + sockets_to_close_[next_socket_id_] = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket_fd = sockets_to_close_[next_socket_id_]->get(); + if (socket_id != nullptr) { + *socket_id = next_socket_id_; + } + + // If reuse_port is indicated, do that. + if (reuse_port) { + EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + } + + // If reuse_addr is indicated, do that. + if (reuse_addr) { + EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + } + + // If the device is non-zero, bind to that device. + if (device_id != 0) { + string device_name; + ASSERT_NO_FATAL_FAILURE(GetDevice(device_id, &device_name)); + EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, + device_name.c_str(), device_name.size() + 1), + SyscallSucceedsWithValue(0)); + char get_device[100]; + socklen_t get_device_size = 100; + EXPECT_THAT(getsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, get_device, + &get_device_size), + SyscallSucceedsWithValue(0)); + } + + struct sockaddr_in addr = {}; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_port = port_; + if (want == 0) { + ASSERT_THAT( + bind(socket_fd, reinterpret_cast<const struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + } else { + ASSERT_THAT( + bind(socket_fd, reinterpret_cast<const struct sockaddr*>(&addr), + sizeof(addr)), + SyscallFailsWithErrno(want)); + } + + if (port_ == 0) { + // We don't yet know what port we'll be using so we need to fetch it and + // remember it for future commands. + socklen_t addr_size = sizeof(addr); + ASSERT_THAT( + getsockname(socket_fd, reinterpret_cast<struct sockaddr*>(&addr), + &addr_size), + SyscallSucceeds()); + port_ = addr.sin_port; + } + } + + private: + SocketKind socket_factory_; + // devices maps from the device id in the test case to the name of the device. + absl::node_hash_map<int, string> devices_; + // These are the tunnels that were created for the test and will be destroyed + // by the destructor. + vector<std::unique_ptr<Tunnel>> tunnels_; + // A list of all interface names before the test started. + std::unordered_set<string> interface_names_; + // The next ethernet device to use when requested a device. + int next_unused_eth_ = 1; + // The port for all tests. Originally 0 (any) and later set to the port that + // all further commands will use. + in_port_t port_ = 0; + // sockets_to_close_ is a map from action index to the socket that was + // created. + absl::node_hash_map<int, + std::unique_ptr<gvisor::testing::FileDescriptor>> + sockets_to_close_; + int next_socket_id_ = 0; +}; + +TEST_P(BindToDeviceSequenceTest, BindTwiceWithDeviceFails) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 3)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 3, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindToDevice) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 1)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 2)); +} + +TEST_P(BindToDeviceSequenceTest, BindToDeviceAndThenWithoutDevice) { + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindWithoutDevice) { + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindWithDevice) { + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 456, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 789, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindWithReuse) { + ASSERT_NO_FATAL_FAILURE( + BindSocket(/* reusePort */ true, /* reuse_addr */ false)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, + /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 0)); +} + +TEST_P(BindToDeviceSequenceTest, BindingWithReuseAndDevice) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 456)); + ASSERT_NO_FATAL_FAILURE( + BindSocket(/* reuse_port */ true, /* reuse_addr */ false)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 789)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 999, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, MixingReuseAndNotReuseByBindingToDevice) { + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 123, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 456, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 789, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 999, 0)); +} + +TEST_P(BindToDeviceSequenceTest, CannotBindTo0AfterMixingReuseAndNotReuse) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 456)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindAndRelease) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); + int to_release; + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, 0, &to_release)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 345, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 789)); + // Release the bind to device 0 and try again. + ASSERT_NO_FATAL_FAILURE(ReleaseSocket(to_release)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 345)); +} + +TEST_P(BindToDeviceSequenceTest, BindTwiceWithReuseOnce) { + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindWithReuseAddr) { + ASSERT_NO_FATAL_FAILURE( + BindSocket(/* reusePort */ false, /* reuse_addr */ true)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 0)); +} + +TEST_P(BindToDeviceSequenceTest, + CannotBindTo0AfterMixingReuseAddrAndNotReuseAddr) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 456)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReusePort) { + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReuseAddr) { + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReusePort) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ true, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReuseAddr) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ true, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindReusePortThenReuseAddrReusePort) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindReuseAddrThenReuseAddr) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, + BindReuseAddrThenReuseAddrReusePortThenReuseAddr) { + // The behavior described in this test seems like a Linux bug. It doesn't + // make any sense and it is unlikely that any applications rely on it. + // + // Both SO_REUSEADDR and SO_REUSEPORT allow binding multiple UDP sockets to + // the same address and deliver each packet to exactly one of the bound + // sockets. If both are enabled, one of the strategies is selected to route + // packets. The strategy is selected dynamically based on the settings of the + // currently bound sockets. Usually, the strategy is selected based on the + // common setting (SO_REUSEADDR or SO_REUSEPORT) amongst the sockets, but for + // some reason, Linux allows binding sets of sockets with no overlapping + // settings in some situations. In this case, it is not obvious which strategy + // would be selected as the configured setting is a contradiction. + SKIP_IF(IsRunningOnGvisor()); + + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0)); +} + +// Repro test for gvisor.dev/issue/1217. Not replicated in ports_test.go as this +// test is different from the others and wouldn't fit well there. +TEST_P(BindToDeviceSequenceTest, BindAndReleaseDifferentDevice) { + int to_release; + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 3, 0, &to_release)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 3, EADDRINUSE)); + // Change the device. Since the socket was already bound, this should have no + // effect. + SetDevice(to_release, 2); + // Release the bind to device 3 and try again. + ASSERT_NO_FATAL_FAILURE(ReleaseSocket(to_release)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 3)); +} + +INSTANTIATE_TEST_SUITE_P(BindToDeviceTest, BindToDeviceSequenceTest, + ::testing::Values(IPv4UDPUnboundSocket(0), + IPv4TCPUnboundSocket(0))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_bind_to_device_util.cc b/test/syscalls/linux/socket_bind_to_device_util.cc new file mode 100644 index 000000000..f4ee775bd --- /dev/null +++ b/test/syscalls/linux/socket_bind_to_device_util.cc @@ -0,0 +1,75 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_bind_to_device_util.h" + +#include <arpa/inet.h> +#include <fcntl.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <unistd.h> + +#include <cstdio> +#include <cstring> +#include <map> +#include <memory> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +using std::string; + +PosixErrorOr<std::unique_ptr<Tunnel>> Tunnel::New(string tunnel_name) { + int fd; + RETURN_ERROR_IF_SYSCALL_FAIL(fd = open("/dev/net/tun", O_RDWR)); + + // Using `new` to access a non-public constructor. + auto new_tunnel = absl::WrapUnique(new Tunnel(fd)); + + ifreq ifr = {}; + ifr.ifr_flags = IFF_TUN; + strncpy(ifr.ifr_name, tunnel_name.c_str(), sizeof(ifr.ifr_name)); + + RETURN_ERROR_IF_SYSCALL_FAIL(ioctl(fd, TUNSETIFF, &ifr)); + new_tunnel->name_ = ifr.ifr_name; + return new_tunnel; +} + +std::unordered_set<string> GetInterfaceNames() { + struct if_nameindex* interfaces = if_nameindex(); + std::unordered_set<string> names; + if (interfaces == nullptr) { + return names; + } + for (auto interface = interfaces; + interface->if_index != 0 || interface->if_name != nullptr; interface++) { + names.insert(interface->if_name); + } + if_freenameindex(interfaces); + return names; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_bind_to_device_util.h b/test/syscalls/linux/socket_bind_to_device_util.h new file mode 100644 index 000000000..f941ccc86 --- /dev/null +++ b/test/syscalls/linux/socket_bind_to_device_util.h @@ -0,0 +1,67 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_SOCKET_BIND_TO_DEVICE_UTILS_H_ +#define GVISOR_TEST_SYSCALLS_SOCKET_BIND_TO_DEVICE_UTILS_H_ + +#include <arpa/inet.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <unistd.h> + +#include <cstdio> +#include <cstring> +#include <map> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "absl/memory/memory.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +class Tunnel { + public: + static PosixErrorOr<std::unique_ptr<Tunnel>> New( + std::string tunnel_name = ""); + const std::string& GetName() const { return name_; } + + ~Tunnel() { + if (fd_ != -1) { + close(fd_); + } + } + + private: + Tunnel(int fd) : fd_(fd) {} + int fd_ = -1; + std::string name_; +}; + +std::unordered_set<std::string> GetInterfaceNames(); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_SOCKET_BIND_TO_DEVICE_UTILS_H_ diff --git a/test/syscalls/linux/socket_blocking.cc b/test/syscalls/linux/socket_blocking.cc new file mode 100644 index 000000000..7e88aa2d9 --- /dev/null +++ b/test/syscalls/linux/socket_blocking.cc @@ -0,0 +1,60 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_blocking.h" + +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <cstdio> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(BlockingSocketPairTest, RecvBlocks) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + constexpr auto kDuration = absl::Milliseconds(200); + auto before = Now(CLOCK_MONOTONIC); + + const ScopedThread t([&]() { + absl::SleepFor(kDuration); + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + }); + + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + auto after = Now(CLOCK_MONOTONIC); + EXPECT_GE(after - before, kDuration); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_blocking.h b/test/syscalls/linux/socket_blocking.h new file mode 100644 index 000000000..db26e5ef5 --- /dev/null +++ b/test/syscalls/linux/socket_blocking.h @@ -0,0 +1,29 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_BLOCKING_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_BLOCKING_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of blocking connected sockets. +using BlockingSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_BLOCKING_H_ diff --git a/test/syscalls/linux/socket_capability.cc b/test/syscalls/linux/socket_capability.cc new file mode 100644 index 000000000..84b5b2b21 --- /dev/null +++ b/test/syscalls/linux/socket_capability.cc @@ -0,0 +1,61 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Subset of socket tests that need Linux-specific headers (compared to POSIX +// headers). + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST(SocketTest, UnixConnectNeedsWritePerm) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor bound = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX)); + ASSERT_THAT(bind(bound.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + ASSERT_THAT(listen(bound.get(), 1), SyscallSucceeds()); + + // Drop capabilites that allow us to override permision checks. Otherwise if + // the test is run as root, the connect below will bypass permission checks + // and succeed unexpectedly. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + // Connect should fail without write perms. + ASSERT_THAT(chmod(addr.sun_path, 0500), SyscallSucceeds()); + FileDescriptor client = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + ASSERT_THAT(connect(client.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallFailsWithErrno(EACCES)); + + // Connect should succeed with write perms. + ASSERT_THAT(chmod(addr.sun_path, 0200), SyscallSucceeds()); + EXPECT_THAT(connect(client.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_filesystem.cc b/test/syscalls/linux/socket_filesystem.cc new file mode 100644 index 000000000..287359363 --- /dev/null +++ b/test/syscalls/linux/socket_filesystem.cc @@ -0,0 +1,49 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix.h" +#include "test/syscalls/linux/socket_unix_cmsg.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})); +} + +INSTANTIATE_TEST_SUITE_P( + FilesystemUnixSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + FilesystemUnixSockets, UnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + FilesystemUnixSockets, UnixSocketPairCmsgTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc new file mode 100644 index 000000000..f7d6139f1 --- /dev/null +++ b/test/syscalls/linux/socket_generic.cc @@ -0,0 +1,820 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_generic.h" + +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +// This file is a generic socket test file. It must be built with another file +// that provides the test types. + +namespace gvisor { +namespace testing { + +TEST_P(AllSocketPairTest, BasicReadWrite) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[20]; + const std::string data = "abc"; + ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), 3), + SyscallSucceedsWithValue(3)); + ASSERT_THAT(ReadFd(sockets->second_fd(), buf, 3), + SyscallSucceedsWithValue(3)); + EXPECT_EQ(data, absl::string_view(buf, 3)); +} + +TEST_P(AllSocketPairTest, BasicSendRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(AllSocketPairTest, BasicSendmmsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[200]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + std::vector<struct mmsghdr> msgs(10); + std::vector<struct iovec> iovs(msgs.size()); + const int chunk_size = sizeof(sent_data) / msgs.size(); + for (size_t i = 0; i < msgs.size(); i++) { + iovs[i].iov_len = chunk_size; + iovs[i].iov_base = &sent_data[i * chunk_size]; + msgs[i].msg_hdr.msg_iov = &iovs[i]; + msgs[i].msg_hdr.msg_iovlen = 1; + } + + ASSERT_THAT( + RetryEINTR(sendmmsg)(sockets->first_fd(), &msgs[0], msgs.size(), 0), + SyscallSucceedsWithValue(msgs.size())); + + for (const struct mmsghdr& msg : msgs) { + EXPECT_EQ(chunk_size, msg.msg_len); + } + + char received_data[sizeof(sent_data)]; + for (size_t i = 0; i < msgs.size(); i++) { + ASSERT_THAT(ReadFd(sockets->second_fd(), &received_data[i * chunk_size], + chunk_size), + SyscallSucceedsWithValue(chunk_size)); + } + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(AllSocketPairTest, BasicRecvmmsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[200]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + char received_data[sizeof(sent_data)]; + std::vector<struct mmsghdr> msgs(10); + std::vector<struct iovec> iovs(msgs.size()); + const int chunk_size = sizeof(sent_data) / msgs.size(); + for (size_t i = 0; i < msgs.size(); i++) { + iovs[i].iov_len = chunk_size; + iovs[i].iov_base = &received_data[i * chunk_size]; + msgs[i].msg_hdr.msg_iov = &iovs[i]; + msgs[i].msg_hdr.msg_iovlen = 1; + } + + for (size_t i = 0; i < msgs.size(); i++) { + ASSERT_THAT( + WriteFd(sockets->first_fd(), &sent_data[i * chunk_size], chunk_size), + SyscallSucceedsWithValue(chunk_size)); + } + + ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->second_fd(), &msgs[0], msgs.size(), + 0, nullptr), + SyscallSucceedsWithValue(msgs.size())); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + for (const struct mmsghdr& msg : msgs) { + EXPECT_EQ(chunk_size, msg.msg_len); + } +} + +TEST_P(AllSocketPairTest, SendmsgRecvmsg10KB) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + std::vector<char> sent_data(10 * 1024); + RandomizeBuffer(sent_data.data(), sent_data.size()); + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size())); + + std::vector<char> received_data(sent_data.size()); + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->second_fd(), received_data.data(), + received_data.size())); + + EXPECT_EQ(0, + memcmp(sent_data.data(), received_data.data(), sent_data.size())); +} + +// This test validates that a sendmsg/recvmsg w/ MSG_CTRUNC is a no-op on +// input flags. +TEST_P(AllSocketPairTest, SendmsgRecvmsgMsgCtruncNoop) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + std::vector<char> sent_data(10 * 1024); + RandomizeBuffer(sent_data.data(), sent_data.size()); + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size())); + + std::vector<char> received_data(sent_data.size()); + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(struct ucred))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = &received_data[0]; + iov.iov_len = received_data.size(); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + // MSG_CTRUNC should be a no-op. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CTRUNC), + SyscallSucceedsWithValue(received_data.size())); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_EQ(cmsg, nullptr); + EXPECT_EQ(msg.msg_controllen, 0); + EXPECT_EQ(0, + memcmp(sent_data.data(), received_data.data(), sent_data.size())); +} + +TEST_P(AllSocketPairTest, SendmsgRecvmsg16KB) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + std::vector<char> sent_data(16 * 1024); + RandomizeBuffer(sent_data.data(), sent_data.size()); + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size())); + + std::vector<char> received_data(sent_data.size()); + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->second_fd(), received_data.data(), + received_data.size())); + + EXPECT_EQ(0, + memcmp(sent_data.data(), received_data.data(), sent_data.size())); +} + +TEST_P(AllSocketPairTest, RecvmsgMsghdrFlagsNotClearedOnFailure) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char received_data[10] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + + // Check that msghdr flags were not changed. + EXPECT_EQ(msg.msg_flags, -1); +} + +TEST_P(AllSocketPairTest, RecvmsgMsghdrFlagsCleared) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data)] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(sent_data))); + + // Check that msghdr flags were cleared. + EXPECT_EQ(msg.msg_flags, 0); +} + +TEST_P(AllSocketPairTest, RecvmsgPeekMsghdrFlagsCleared) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data)] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_PEEK), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(sent_data))); + + // Check that msghdr flags were cleared. + EXPECT_EQ(msg.msg_flags, 0); +} + +TEST_P(AllSocketPairTest, RecvmsgIovNotUpdated) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data) * 2] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(sent_data))); + + // Check that the iovec length was not updated. + EXPECT_EQ(msg.msg_iov->iov_len, sizeof(received_data)); +} + +TEST_P(AllSocketPairTest, RecvmmsgInvalidTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[10]; + struct mmsghdr msg = {}; + struct iovec iov = {}; + iov.iov_len = sizeof(buf); + iov.iov_base = buf; + msg.msg_hdr.msg_iov = &iov; + msg.msg_hdr.msg_iovlen = 1; + struct timespec timeout = {-1, -1}; + ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->first_fd(), &msg, 1, 0, &timeout), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, RecvmmsgTimeoutBeforeRecv) { + // There is a known bug in the Linux recvmmsg(2) causing it to block forever + // if the timeout expires while blocking for the first message. + SKIP_IF(!IsRunningOnGvisor()); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[10]; + struct mmsghdr msg = {}; + struct iovec iov = {}; + iov.iov_len = sizeof(buf); + iov.iov_base = buf; + msg.msg_hdr.msg_iov = &iov; + msg.msg_hdr.msg_iovlen = 1; + struct timespec timeout = {}; + ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->first_fd(), &msg, 1, 0, &timeout), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, MsgPeek) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[50]; + memset(&sent_data, 0, sizeof(sent_data)); + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data)]; + for (int i = 0; i < 3; i++) { + memset(received_data, 0, sizeof(received_data)); + EXPECT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_PEEK), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); + } + + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); +} + +TEST_P(AllSocketPairTest, LingerSocketOption) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + struct linger got_linger = {-1, -1}; + socklen_t length = sizeof(struct linger); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, + &got_linger, &length), + SyscallSucceedsWithValue(0)); + struct linger want_linger = {}; + EXPECT_EQ(0, memcmp(&want_linger, &got_linger, sizeof(struct linger))); + EXPECT_EQ(sizeof(struct linger), length); +} + +TEST_P(AllSocketPairTest, KeepAliveSocketOption) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + int keepalive = -1; + socklen_t length = sizeof(int); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, + &keepalive, &length), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, keepalive); + EXPECT_EQ(sizeof(int), length); +} + +TEST_P(AllSocketPairTest, RcvBufSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + int size = 0; + socklen_t size_size = sizeof(size); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &size, &size_size), + SyscallSucceeds()); + EXPECT_GT(size, 0); +} + +TEST_P(AllSocketPairTest, SndBufSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + int size = 0; + socklen_t size_size = sizeof(size); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &size, &size_size), + SyscallSucceeds()); + EXPECT_GT(size, 0); +} + +TEST_P(AllSocketPairTest, RecvTimeoutReadSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + EXPECT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutRecvSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutRecvOneSecondSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 1, .tv_usec = 0 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutRecvmsgSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + struct msghdr msg = {}; + char buf[20] = {}; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, SendTimeoutDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + timeval actual_tv = {.tv_sec = -1, .tv_usec = -1}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv_sec, 0); + EXPECT_EQ(actual_tv.tv_usec, 0); +} + +TEST_P(AllSocketPairTest, SetGetSendTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + timeval tv = {.tv_sec = 89, .tv_usec = 42000}; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + timeval actual_tv = {}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv_sec, 89); + EXPECT_EQ(actual_tv.tv_usec, 42000); +} + +TEST_P(AllSocketPairTest, SetGetSendTimeoutLargerArg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval_with_extra { + struct timeval tv; + int64_t extra_data; + } ABSL_ATTRIBUTE_PACKED; + + timeval_with_extra tv_extra = { + .tv = {.tv_sec = 0, .tv_usec = 123000}, + }; + + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, + &tv_extra, sizeof(tv_extra)), + SyscallSucceeds()); + + timeval_with_extra actual_tv = {}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv.tv_sec, 0); + EXPECT_EQ(actual_tv.tv.tv_usec, 123000); +} + +TEST_P(AllSocketPairTest, SendTimeoutAllowsWrite) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST_P(AllSocketPairTest, SendTimeoutAllowsSend) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST_P(AllSocketPairTest, SendTimeoutAllowsSendmsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + ASSERT_NO_FATAL_FAILURE(SendNullCmsg(sockets->first_fd(), buf, sizeof(buf))); +} + +TEST_P(AllSocketPairTest, RecvTimeoutDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + timeval actual_tv = {.tv_sec = -1, .tv_usec = -1}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv_sec, 0); + EXPECT_EQ(actual_tv.tv_usec, 0); +} + +TEST_P(AllSocketPairTest, SetGetRecvTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + timeval tv = {.tv_sec = 123, .tv_usec = 456000}; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + timeval actual_tv = {}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv_sec, 123); + EXPECT_EQ(actual_tv.tv_usec, 456000); +} + +TEST_P(AllSocketPairTest, SetGetRecvTimeoutLargerArg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval_with_extra { + struct timeval tv; + int64_t extra_data; + } ABSL_ATTRIBUTE_PACKED; + + timeval_with_extra tv_extra = { + .tv = {.tv_sec = 0, .tv_usec = 432000}, + }; + + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, + &tv_extra, sizeof(tv_extra)), + SyscallSucceeds()); + + timeval_with_extra actual_tv = {}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv.tv_sec, 0); + EXPECT_EQ(actual_tv.tv.tv_usec, 432000); +} + +TEST_P(AllSocketPairTest, RecvTimeoutRecvmsgOneSecondSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 1, .tv_usec = 0 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + struct msghdr msg = {}; + char buf[20] = {}; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutUsecTooLarge) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 2000000 // 2 seconds. + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallFailsWithErrno(EDOM)); +} + +TEST_P(AllSocketPairTest, SendTimeoutUsecTooLarge) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 2000000 // 2 seconds. + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), + SyscallFailsWithErrno(EDOM)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutUsecNeg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = -1 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallFailsWithErrno(EDOM)); +} + +TEST_P(AllSocketPairTest, SendTimeoutUsecNeg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = -1 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), + SyscallFailsWithErrno(EDOM)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutNegSecRead) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = -1, .tv_usec = 0 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + EXPECT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutNegSecRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = -1, .tv_usec = 0 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutNegSecRecvmsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = -1, .tv_usec = 0 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + struct msghdr msg = {}; + char buf[20] = {}; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvWaitAll) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_WAITALL), + SyscallSucceedsWithValue(sizeof(sent_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(AllSocketPairTest, RecvWaitAllDontWait) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char data[100] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), data, sizeof(data), + MSG_WAITALL | MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutWaitAll) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 200000 // 200ms + }; + EXPECT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, + sizeof(tv)), + SyscallSucceeds()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data) * 2] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_WAITALL), + SyscallSucceedsWithValue(sizeof(sent_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(AllSocketPairTest, GetSockoptType) { + int type = GetParam().type; + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + for (const int fd : {sockets->first_fd(), sockets->second_fd()}) { + int opt; + socklen_t optlen = sizeof(opt); + EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_TYPE, &opt, &optlen), + SyscallSucceeds()); + + // Type may have SOCK_NONBLOCK and SOCK_CLOEXEC ORed into it. Remove these + // before comparison. + type &= ~(SOCK_NONBLOCK | SOCK_CLOEXEC); + EXPECT_EQ(opt, type) << absl::StrFormat( + "getsockopt(%d, SOL_SOCKET, SO_TYPE, &opt, &optlen) => opt=%d was " + "unexpected", + fd, opt); + } +} + +TEST_P(AllSocketPairTest, GetSockoptDomain) { + const int domain = GetParam().domain; + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + for (const int fd : {sockets->first_fd(), sockets->second_fd()}) { + int opt; + socklen_t optlen = sizeof(opt); + EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &opt, &optlen), + SyscallSucceeds()); + EXPECT_EQ(opt, domain) << absl::StrFormat( + "getsockopt(%d, SOL_SOCKET, SO_DOMAIN, &opt, &optlen) => opt=%d was " + "unexpected", + fd, opt); + } +} + +TEST_P(AllSocketPairTest, GetSockoptProtocol) { + const int protocol = GetParam().protocol; + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + for (const int fd : {sockets->first_fd(), sockets->second_fd()}) { + int opt; + socklen_t optlen = sizeof(opt); + EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &opt, &optlen), + SyscallSucceeds()); + EXPECT_EQ(opt, protocol) << absl::StrFormat( + "getsockopt(%d, SOL_SOCKET, SO_PROTOCOL, &opt, &optlen) => opt=%d was " + "unexpected", + fd, opt); + } +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_generic.h b/test/syscalls/linux/socket_generic.h new file mode 100644 index 000000000..00ae7bfc3 --- /dev/null +++ b/test/syscalls/linux/socket_generic.h @@ -0,0 +1,30 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of blocking and non-blocking +// connected stream sockets. +using AllSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_ diff --git a/test/syscalls/linux/socket_generic_stress.cc b/test/syscalls/linux/socket_generic_stress.cc new file mode 100644 index 000000000..6a232238d --- /dev/null +++ b/test/syscalls/linux/socket_generic_stress.cc @@ -0,0 +1,83 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected sockets. +using ConnectStressTest = SocketPairTest; + +TEST_P(ConnectStressTest, Reset65kTimes) { + for (int i = 0; i < 1 << 16; ++i) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Send some data to ensure that the connection gets reset and the port gets + // released immediately. This avoids either end entering TIME-WAIT. + char sent_data[100] = {}; + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + } +} + +INSTANTIATE_TEST_SUITE_P( + AllConnectedSockets, ConnectStressTest, + ::testing::Values(IPv6UDPBidirectionalBindSocketPair(0), + IPv4UDPBidirectionalBindSocketPair(0), + DualStackUDPBidirectionalBindSocketPair(0), + + // Without REUSEADDR, we get port exhaustion on Linux. + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn)(IPv6TCPAcceptBindSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn)(IPv4TCPAcceptBindSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + DualStackTCPAcceptBindSocketPair(0)))); + +// Test fixture for tests that apply to pairs of connected sockets created with +// a persistent listener (if applicable). +using PersistentListenerConnectStressTest = SocketPairTest; + +TEST_P(PersistentListenerConnectStressTest, 65kTimes) { + for (int i = 0; i < 1 << 16; ++i) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + } +} + +INSTANTIATE_TEST_SUITE_P( + AllConnectedSockets, PersistentListenerConnectStressTest, + ::testing::Values( + IPv6UDPBidirectionalBindSocketPair(0), + IPv4UDPBidirectionalBindSocketPair(0), + DualStackUDPBidirectionalBindSocketPair(0), + + // Without REUSEADDR, we get port exhaustion on Linux. + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + IPv6TCPAcceptBindPersistentListenerSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + IPv4TCPAcceptBindPersistentListenerSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + DualStackTCPAcceptBindPersistentListenerSocketPair(0)))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc new file mode 100644 index 000000000..18b9e4b70 --- /dev/null +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -0,0 +1,2566 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <poll.h> +#include <string.h> +#include <sys/socket.h> + +#include <atomic> +#include <iostream> +#include <memory> +#include <string> +#include <tuple> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::Gt; + +PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) { + switch (family) { + case AF_INET: + return static_cast<uint16_t>( + reinterpret_cast<sockaddr_in const*>(&addr)->sin_port); + case AF_INET6: + return static_cast<uint16_t>( + reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) { + switch (family) { + case AF_INET: + reinterpret_cast<sockaddr_in*>(addr)->sin_port = port; + return NoError(); + case AF_INET6: + reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port; + return NoError(); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +struct TestParam { + TestAddress listener; + TestAddress connector; +}; + +std::string DescribeTestParam(::testing::TestParamInfo<TestParam> const& info) { + return absl::StrCat("Listen", info.param.listener.description, "_Connect", + info.param.connector.description); +} + +using SocketInetLoopbackTest = ::testing::TestWithParam<TestParam>; + +TEST(BadSocketPairArgs, ValidateErrForBadCallsToSocketPair) { + int fd[2] = {}; + + // Valid AF but invalid for socketpair(2) return ESOCKTNOSUPPORT. + ASSERT_THAT(socketpair(AF_INET, 0, 0, fd), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + ASSERT_THAT(socketpair(AF_INET6, 0, 0, fd), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + + // Invalid AF will return ENOAFSUPPORT. + ASSERT_THAT(socketpair(AF_MAX, 0, 0, fd), + SyscallFailsWithErrno(EAFNOSUPPORT)); + ASSERT_THAT(socketpair(8675309, 0, 0, fd), + SyscallFailsWithErrno(EAFNOSUPPORT)); +} + +enum class Operation { + Bind, + Connect, + SendTo, +}; + +std::string OperationToString(Operation operation) { + switch (operation) { + case Operation::Bind: + return "Bind"; + case Operation::Connect: + return "Connect"; + case Operation::SendTo: + return "SendTo"; + } +} + +using OperationSequence = std::vector<Operation>; + +using DualStackSocketTest = + ::testing::TestWithParam<std::tuple<TestAddress, OperationSequence>>; + +TEST_P(DualStackSocketTest, AddressOperations) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_DGRAM, 0)); + + const TestAddress& addr = std::get<0>(GetParam()); + const OperationSequence& operations = std::get<1>(GetParam()); + + auto addr_in = reinterpret_cast<const sockaddr*>(&addr.addr); + + // sockets may only be bound once. Both `connect` and `sendto` cause a socket + // to be bound. + bool bound = false; + for (const Operation& operation : operations) { + bool sockname = false; + bool peername = false; + switch (operation) { + case Operation::Bind: { + ASSERT_NO_ERRNO(SetAddrPort( + addr.family(), const_cast<sockaddr_storage*>(&addr.addr), 0)); + + int bind_ret = bind(fd.get(), addr_in, addr.addr_len); + + // Dual stack sockets may only be bound to AF_INET6. + if (!bound && addr.family() == AF_INET6) { + EXPECT_THAT(bind_ret, SyscallSucceeds()); + bound = true; + + sockname = true; + } else { + EXPECT_THAT(bind_ret, SyscallFailsWithErrno(EINVAL)); + } + break; + } + case Operation::Connect: { + ASSERT_NO_ERRNO(SetAddrPort( + addr.family(), const_cast<sockaddr_storage*>(&addr.addr), 1337)); + + EXPECT_THAT(RetryEINTR(connect)(fd.get(), addr_in, addr.addr_len), + SyscallSucceeds()) + << GetAddrStr(addr_in); + bound = true; + + sockname = true; + peername = true; + + break; + } + case Operation::SendTo: { + const char payload[] = "hello"; + ASSERT_NO_ERRNO(SetAddrPort( + addr.family(), const_cast<sockaddr_storage*>(&addr.addr), 1337)); + + ssize_t sendto_ret = sendto(fd.get(), &payload, sizeof(payload), 0, + addr_in, addr.addr_len); + + EXPECT_THAT(sendto_ret, SyscallSucceedsWithValue(sizeof(payload))); + sockname = !bound; + bound = true; + break; + } + } + + if (sockname) { + sockaddr_storage sock_addr; + socklen_t addrlen = sizeof(sock_addr); + ASSERT_THAT(getsockname(fd.get(), reinterpret_cast<sockaddr*>(&sock_addr), + &addrlen), + SyscallSucceeds()); + ASSERT_EQ(addrlen, sizeof(struct sockaddr_in6)); + + auto sock_addr_in6 = reinterpret_cast<const sockaddr_in6*>(&sock_addr); + + if (operation == Operation::SendTo) { + EXPECT_EQ(sock_addr_in6->sin6_family, AF_INET6); + EXPECT_TRUE(IN6_IS_ADDR_UNSPECIFIED(sock_addr_in6->sin6_addr.s6_addr32)) + << OperationToString(operation) << " getsocknam=" + << GetAddrStr(reinterpret_cast<sockaddr*>(&sock_addr)); + + EXPECT_NE(sock_addr_in6->sin6_port, 0); + } else if (IN6_IS_ADDR_V4MAPPED( + reinterpret_cast<const sockaddr_in6*>(addr_in) + ->sin6_addr.s6_addr32)) { + EXPECT_TRUE(IN6_IS_ADDR_V4MAPPED(sock_addr_in6->sin6_addr.s6_addr32)) + << OperationToString(operation) << " getsocknam=" + << GetAddrStr(reinterpret_cast<sockaddr*>(&sock_addr)); + } + } + + if (peername) { + sockaddr_storage peer_addr; + socklen_t addrlen = sizeof(peer_addr); + ASSERT_THAT(getpeername(fd.get(), reinterpret_cast<sockaddr*>(&peer_addr), + &addrlen), + SyscallSucceeds()); + ASSERT_EQ(addrlen, sizeof(struct sockaddr_in6)); + + if (addr.family() == AF_INET || + IN6_IS_ADDR_V4MAPPED(reinterpret_cast<const sockaddr_in6*>(addr_in) + ->sin6_addr.s6_addr32)) { + EXPECT_TRUE(IN6_IS_ADDR_V4MAPPED( + reinterpret_cast<const sockaddr_in6*>(&peer_addr) + ->sin6_addr.s6_addr32)) + << OperationToString(operation) << " getpeername=" + << GetAddrStr(reinterpret_cast<sockaddr*>(&peer_addr)); + } + } + } +} + +// TODO(gvisor.dev/issue/1556): uncomment V4MappedAny. +INSTANTIATE_TEST_SUITE_P( + All, DualStackSocketTest, + ::testing::Combine( + ::testing::Values(V4Any(), V4Loopback(), /*V4MappedAny(),*/ + V4MappedLoopback(), V6Any(), V6Loopback()), + ::testing::ValuesIn<OperationSequence>( + {{Operation::Bind, Operation::Connect, Operation::SendTo}, + {Operation::Bind, Operation::SendTo, Operation::Connect}, + {Operation::Connect, Operation::Bind, Operation::SendTo}, + {Operation::Connect, Operation::SendTo, Operation::Bind}, + {Operation::SendTo, Operation::Bind, Operation::Connect}, + {Operation::SendTo, Operation::Connect, Operation::Bind}})), + [](::testing::TestParamInfo< + std::tuple<TestAddress, OperationSequence>> const& info) { + const TestAddress& addr = std::get<0>(info.param); + const OperationSequence& operations = std::get<1>(info.param); + std::string s = addr.description; + for (const Operation& operation : operations) { + absl::StrAppend(&s, OperationToString(operation)); + } + return s; + }); + +void tcpSimpleConnectTest(TestAddress const& listener, + TestAddress const& connector, bool unbound) { + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + if (!unbound) { + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + } + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + // + // We have to assign a name to the accepted socket, as unamed temporary + // objects are destructed upon full evaluation of the expression it is in, + // potentially causing the connecting socket to fail to shutdown properly. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RDWR), SyscallSucceeds()); + + ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds()); +} + +TEST_P(SocketInetLoopbackTest, TCP) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + tcpSimpleConnectTest(listener, connector, true); +} + +TEST_P(SocketInetLoopbackTest, TCPListenUnbound) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + tcpSimpleConnectTest(listener, connector, false); +} + +TEST_P(SocketInetLoopbackTest, TCPListenShutdownListen) { + const auto& param = GetParam(); + + const TestAddress& listener = param.listener; + const TestAddress& connector = param.connector; + + constexpr int kBacklog = 5; + + // Create the listening socket. + FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RD), SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + const uint16_t port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + + for (int i = 0; i < kBacklog; i++) { + auto client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT(RetryEINTR(connect)(client.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + } + for (int i = 0; i < kBacklog; i++) { + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), SyscallSucceeds()); + } +} + +TEST_P(SocketInetLoopbackTest, TCPListenShutdown) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + constexpr int kBacklog = 2; + constexpr int kFDs = kBacklog + 1; + + // Create the listening socket. + FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + + // Shutdown the write of the listener, expect to not have any effect. + ASSERT_THAT(shutdown(listen_fd.get(), SHUT_WR), SyscallSucceeds()); + + for (int i = 0; i < kFDs; i++) { + auto client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT(RetryEINTR(connect)(client.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), SyscallSucceeds()); + } + + // Shutdown the read of the listener, expect to fail subsequent + // server accepts, binds and client connects. + ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RD), SyscallSucceeds()); + + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); + + // Check that shutdown did not release the port. + FileDescriptor new_listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT( + bind(new_listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Check that subsequent connection attempts receive a RST. + auto client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + for (int i = 0; i < kFDs; i++) { + auto client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT(RetryEINTR(connect)(client.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallFailsWithErrno(ECONNREFUSED)); + } +} + +TEST_P(SocketInetLoopbackTest, TCPListenClose) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + constexpr int kAcceptCount = 2; + constexpr int kBacklog = kAcceptCount + 2; + constexpr int kFDs = kBacklog * 3; + + // Create the listening socket. + FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + std::vector<FileDescriptor> clients; + for (int i = 0; i < kFDs; i++) { + auto client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len); + if (ret != 0) { + EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); + } + clients.push_back(std::move(client)); + } + for (int i = 0; i < kAcceptCount; i++) { + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + } +} + +void TestListenWhileConnect(const TestParam& param, + void (*stopListen)(FileDescriptor&)) { + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + constexpr int kBacklog = 2; + constexpr int kClients = kBacklog + 1; + + // Create the listening socket. + FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + std::vector<FileDescriptor> clients; + for (int i = 0; i < kClients; i++) { + FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len); + if (ret != 0) { + EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); + clients.push_back(std::move(client)); + } + } + + stopListen(listen_fd); + + for (auto& client : clients) { + const int kTimeout = 10000; + struct pollfd pfd = { + .fd = client.get(), + .events = POLLIN, + }; + // When the listening socket is closed, then we expect the remote to reset + // the connection. + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + ASSERT_EQ(pfd.revents, POLLIN | POLLHUP | POLLERR); + char c; + // Subsequent read can fail with: + // ECONNRESET: If the client connection was established and was reset by the + // remote. + // ECONNREFUSED: If the client connection failed to be established. + ASSERT_THAT(read(client.get(), &c, sizeof(c)), + AnyOf(SyscallFailsWithErrno(ECONNRESET), + SyscallFailsWithErrno(ECONNREFUSED))); + } +} + +TEST_P(SocketInetLoopbackTest, TCPListenCloseWhileConnect) { + TestListenWhileConnect(GetParam(), [](FileDescriptor& f) { + ASSERT_THAT(close(f.release()), SyscallSucceeds()); + }); +} + +TEST_P(SocketInetLoopbackTest, TCPListenShutdownWhileConnect) { + TestListenWhileConnect(GetParam(), [](FileDescriptor& f) { + ASSERT_THAT(shutdown(f.get(), SHUT_RD), SyscallSucceeds()); + }); +} + +TEST_P(SocketInetLoopbackTest, TCPbacklog) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), 2), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + int i = 0; + while (1) { + int ret; + + // Connect to the listening socket. + const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ret = connect(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len); + if (ret != 0) { + EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); + struct pollfd pfd = { + .fd = conn_fd.get(), + .events = POLLOUT, + }; + ret = poll(&pfd, 1, 3000); + if (ret == 0) break; + EXPECT_THAT(ret, SyscallSucceedsWithValue(1)); + } + EXPECT_THAT(RetryEINTR(send)(conn_fd.get(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds()); + i++; + } + + for (; i != 0; i--) { + // Accept the connection. + // + // We have to assign a name to the accepted socket, as unamed temporary + // objects are destructed upon full evaluation of the expression it is in, + // potentially causing the connecting socket to fail to shutdown properly. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + } +} + +// TCPFinWait2Test creates a pair of connected sockets then closes one end to +// trigger FIN_WAIT2 state for the closed endpoint. Then it binds the same local +// IP/port on a new socket and tries to connect. The connect should fail w/ +// an EADDRINUSE. Then we wait till the FIN_WAIT2 timeout is over and try the +// connect again with a new socket and this time it should succeed. +// +// TCP timers are not S/R today, this can cause this test to be flaky when run +// under random S/R due to timer being reset on a restore. +TEST_P(SocketInetLoopbackTest, TCPFinWait2Test_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + // Lower FIN_WAIT2 state to 5 seconds for test. + constexpr int kTCPLingerTimeout = 5; + EXPECT_THAT(setsockopt(conn_fd.get(), IPPROTO_TCP, TCP_LINGER2, + &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)), + SyscallSucceedsWithValue(0)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // Get the address/port bound by the connecting socket. + sockaddr_storage conn_bound_addr; + socklen_t conn_addrlen = connector.addr_len; + ASSERT_THAT( + getsockname(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr), + &conn_addrlen), + SyscallSucceeds()); + + // close the connecting FD to trigger FIN_WAIT2 on the connected fd. + conn_fd.reset(); + + // Now bind and connect a new socket. + const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + // Disable cooperative saves after this point. As a save between the first + // bind/connect and the second one can cause the linger timeout timer to + // be restarted causing the final bind/connect to fail. + DisableSave ds; + + ASSERT_THAT(bind(conn_fd2.get(), + reinterpret_cast<sockaddr*>(&conn_bound_addr), conn_addrlen), + SyscallFailsWithErrno(EADDRINUSE)); + + // Sleep for a little over the linger timeout to reduce flakiness in + // save/restore tests. + absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 2)); + + ds.reset(); + + if (!IsRunningOnGvisor()) { + ASSERT_THAT( + bind(conn_fd2.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr), + conn_addrlen), + SyscallSucceeds()); + } + ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + conn_addrlen), + SyscallSucceeds()); +} + +// TCPLinger2TimeoutAfterClose creates a pair of connected sockets +// then closes one end to trigger FIN_WAIT2 state for the closed endpont. +// It then sleeps for the TCP_LINGER2 timeout and verifies that bind/ +// connecting the same address succeeds. +// +// TCP timers are not S/R today, this can cause this test to be flaky when run +// under random S/R due to timer being reset on a restore. +TEST_P(SocketInetLoopbackTest, TCPLinger2TimeoutAfterClose_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // Get the address/port bound by the connecting socket. + sockaddr_storage conn_bound_addr; + socklen_t conn_addrlen = connector.addr_len; + ASSERT_THAT( + getsockname(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr), + &conn_addrlen), + SyscallSucceeds()); + + // Disable cooperative saves after this point as TCP timers are not restored + // across a S/R. + { + DisableSave ds; + constexpr int kTCPLingerTimeout = 5; + EXPECT_THAT(setsockopt(conn_fd.get(), IPPROTO_TCP, TCP_LINGER2, + &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)), + SyscallSucceedsWithValue(0)); + + // close the connecting FD to trigger FIN_WAIT2 on the connected fd. + conn_fd.reset(); + + absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 1)); + + // ds going out of scope will Re-enable S/R's since at this point the timer + // must have fired and cleaned up the endpoint. + } + + // Now bind and connect a new socket and verify that we can immediately + // rebind the address bound by the conn_fd as it never entered TIME_WAIT. + const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + ASSERT_THAT(bind(conn_fd2.get(), + reinterpret_cast<sockaddr*>(&conn_bound_addr), conn_addrlen), + SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + conn_addrlen), + SyscallSucceeds()); +} + +// TCPResetAfterClose creates a pair of connected sockets then closes +// one end to trigger FIN_WAIT2 state for the closed endpoint verifies +// that we generate RSTs for any new data after the socket is fully +// closed. +TEST_P(SocketInetLoopbackTest, TCPResetAfterClose) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // close the connecting FD to trigger FIN_WAIT2 on the connected fd. + conn_fd.reset(); + + int data = 1234; + + // Now send data which should trigger a RST as the other end should + // have timed out and closed the socket. + EXPECT_THAT(RetryEINTR(send)(accepted.get(), &data, sizeof(data), 0), + SyscallSucceeds()); + // Sleep for a shortwhile to get a RST back. + absl::SleepFor(absl::Seconds(1)); + + // Try writing again and we should get an EPIPE back. + EXPECT_THAT(RetryEINTR(send)(accepted.get(), &data, sizeof(data), 0), + SyscallFailsWithErrno(EPIPE)); + + // Trying to read should return zero as the other end did send + // us a FIN. We do it twice to verify that the RST does not cause an + // ECONNRESET on the read after EOF has been read by applicaiton. + EXPECT_THAT(RetryEINTR(recv)(accepted.get(), &data, sizeof(data), 0), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(RetryEINTR(recv)(accepted.get(), &data, sizeof(data), 0), + SyscallSucceedsWithValue(0)); +} + +// This test is disabled under random save as the the restore run +// results in the stack.Seed() being different which can cause +// sequence number of final connect to be one that is considered +// old and can cause the test to be flaky. +TEST_P(SocketInetLoopbackTest, TCPTimeWaitTest_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + // We disable saves after this point as a S/R causes the netstack seed + // to be regenerated which changes what ports/ISN is picked for a given + // tuple (src ip,src port, dst ip, dst port). This can cause the final + // SYN to use a sequence number that looks like one from the current + // connection in TIME_WAIT and will not be accepted causing the test + // to timeout. + // + // TODO(gvisor.dev/issue/940): S/R portSeed/portHint + DisableSave ds; + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // Get the address/port bound by the connecting socket. + sockaddr_storage conn_bound_addr; + socklen_t conn_addrlen = connector.addr_len; + ASSERT_THAT( + getsockname(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr), + &conn_addrlen), + SyscallSucceeds()); + + // close the accept FD to trigger TIME_WAIT on the accepted socket which + // should cause the conn_fd to follow CLOSE_WAIT->LAST_ACK->CLOSED instead of + // TIME_WAIT. + accepted.reset(); + absl::SleepFor(absl::Seconds(1)); + conn_fd.reset(); + absl::SleepFor(absl::Seconds(1)); + + // Now bind and connect a new socket and verify that we can immediately + // rebind the address bound by the conn_fd as it never entered TIME_WAIT. + const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + ASSERT_THAT(bind(conn_fd2.get(), + reinterpret_cast<sockaddr*>(&conn_bound_addr), conn_addrlen), + SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + conn_addrlen), + SyscallSucceeds()); +} + +TEST_P(SocketInetLoopbackTest, AcceptedInheritsTCPUserTimeout) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + + const uint16_t port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Set the userTimeout on the listening socket. + constexpr int kUserTimeout = 10; + ASSERT_THAT(setsockopt(listen_fd.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kUserTimeout, sizeof(kUserTimeout)), + SyscallSucceeds()); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + // Verify that the accepted socket inherited the user timeout set on + // listening socket. + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(accepted.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kUserTimeout); +} + +// TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not +// saved. Enable S/R once issue is fixed. +TEST_P(SocketInetLoopbackTest, TCPDeferAccept_NoRandomSave) { + // TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not + // saved. Enable S/R issue is fixed. + DisableSave ds; + + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + + const uint16_t port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Set the TCP_DEFER_ACCEPT on the listening socket. + constexpr int kTCPDeferAccept = 3; + ASSERT_THAT(setsockopt(listen_fd.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, + &kTCPDeferAccept, sizeof(kTCPDeferAccept)), + SyscallSucceeds()); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Set the listening socket to nonblock so that we can verify that there is no + // connection in queue despite the connect above succeeding since the peer has + // sent no data and TCP_DEFER_ACCEPT is set on the listening socket. Set the + // FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(listen_fd.get(), F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds()); + + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Set FD back to blocking. + opts &= ~O_NONBLOCK; + ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds()); + + // Now write some data to the socket. + int data = 0; + ASSERT_THAT(RetryEINTR(write)(conn_fd.get(), &data, sizeof(data)), + SyscallSucceedsWithValue(sizeof(data))); + + // This should now cause the connection to complete and be delivered to the + // accept socket. + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // Verify that the accepted socket returns the data written. + int get = -1; + ASSERT_THAT(RetryEINTR(recv)(accepted.get(), &get, sizeof(get), 0), + SyscallSucceedsWithValue(sizeof(get))); + + EXPECT_EQ(get, data); +} + +// TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not +// saved. Enable S/R once issue is fixed. +TEST_P(SocketInetLoopbackTest, TCPDeferAcceptTimeout_NoRandomSave) { + // TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not + // saved. Enable S/R once issue is fixed. + DisableSave ds; + + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + + const uint16_t port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Set the TCP_DEFER_ACCEPT on the listening socket. + constexpr int kTCPDeferAccept = 3; + ASSERT_THAT(setsockopt(listen_fd.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, + &kTCPDeferAccept, sizeof(kTCPDeferAccept)), + SyscallSucceeds()); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Set the listening socket to nonblock so that we can verify that there is no + // connection in queue despite the connect above succeeding since the peer has + // sent no data and TCP_DEFER_ACCEPT is set on the listening socket. Set the + // FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(listen_fd.get(), F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds()); + + // Verify that there is no acceptable connection before TCP_DEFER_ACCEPT + // timeout is hit. + absl::SleepFor(absl::Seconds(kTCPDeferAccept - 1)); + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Set FD back to blocking. + opts &= ~O_NONBLOCK; + ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds()); + + // Now sleep for a little over the TCP_DEFER_ACCEPT duration. When the timeout + // is hit a SYN-ACK should be retransmitted by the listener as a last ditch + // attempt to complete the connection with or without data. + absl::SleepFor(absl::Seconds(2)); + + // Verify that we have a connection that can be accepted even though no + // data was written. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); +} + +INSTANTIATE_TEST_SUITE_P( + All, SocketInetLoopbackTest, + ::testing::Values( + // Listeners bound to IPv4 addresses refuse connections using IPv6 + // addresses. + TestParam{V4Any(), V4Any()}, TestParam{V4Any(), V4Loopback()}, + TestParam{V4Any(), V4MappedAny()}, + TestParam{V4Any(), V4MappedLoopback()}, + TestParam{V4Loopback(), V4Any()}, TestParam{V4Loopback(), V4Loopback()}, + TestParam{V4Loopback(), V4MappedLoopback()}, + TestParam{V4MappedAny(), V4Any()}, + TestParam{V4MappedAny(), V4Loopback()}, + TestParam{V4MappedAny(), V4MappedAny()}, + TestParam{V4MappedAny(), V4MappedLoopback()}, + TestParam{V4MappedLoopback(), V4Any()}, + TestParam{V4MappedLoopback(), V4Loopback()}, + TestParam{V4MappedLoopback(), V4MappedLoopback()}, + + // Listeners bound to IN6ADDR_ANY accept all connections. + TestParam{V6Any(), V4Any()}, TestParam{V6Any(), V4Loopback()}, + TestParam{V6Any(), V4MappedAny()}, + TestParam{V6Any(), V4MappedLoopback()}, TestParam{V6Any(), V6Any()}, + TestParam{V6Any(), V6Loopback()}, + + // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4 + // addresses. + TestParam{V6Loopback(), V6Any()}, + TestParam{V6Loopback(), V6Loopback()}), + DescribeTestParam); + +using SocketInetReusePortTest = ::testing::TestWithParam<TestParam>; + +// TODO(gvisor.dev/issue/940): Remove _NoRandomSave when portHint/stack.Seed is +// saved/restored. +TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + sockaddr_storage listen_addr = listener.addr; + sockaddr_storage conn_addr = connector.addr; + constexpr int kThreadCount = 3; + constexpr int kConnectAttempts = 10000; + + // Create the listening socket. + FileDescriptor listener_fds[kThreadCount]; + for (int i = 0; i < kThreadCount; i++) { + listener_fds[i] = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + int fd = listener_fds[i].get(); + + ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT( + bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(fd, 40), SyscallSucceeds()); + + // On the first bind we need to determine which port was bound. + if (i != 0) { + continue; + } + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT( + getsockname(listener_fds[0].get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port)); + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + } + + std::atomic<int> connects_received = ATOMIC_VAR_INIT(0); + std::unique_ptr<ScopedThread> listen_thread[kThreadCount]; + int accept_counts[kThreadCount] = {}; + // TODO(avagin): figure how to not disable S/R for the whole test. + // We need to take into account that this test executes a lot of system + // calls from many threads. + DisableSave ds; + + for (int i = 0; i < kThreadCount; i++) { + listen_thread[i] = absl::make_unique<ScopedThread>( + [&listener_fds, &accept_counts, i, &connects_received]() { + do { + auto fd = Accept(listener_fds[i].get(), nullptr, nullptr); + if (!fd.ok()) { + if (connects_received >= kConnectAttempts) { + // Another thread have shutdown our read side causing the + // accept to fail. + ASSERT_EQ(errno, EINVAL); + break; + } + ASSERT_NO_ERRNO(fd); + break; + } + // Receive some data from a socket to be sure that the connect() + // system call has been completed on another side. + // Do a short read and then close the socket to trigger a RST. This + // ensures that both ends of the connection are cleaned up and no + // goroutines hang around in TIME-WAIT. We do this so that this test + // does not timeout under gotsan runs where lots of goroutines can + // cause the test to use absurd amounts of memory. + // + // See: https://tools.ietf.org/html/rfc2525#page-50 section 2.17 + uint16_t data; + EXPECT_THAT( + RetryEINTR(recv)(fd.ValueOrDie().get(), &data, sizeof(data), 0), + SyscallSucceedsWithValue(sizeof(data))); + accept_counts[i]++; + } while (++connects_received < kConnectAttempts); + + // Shutdown all sockets to wake up other threads. + for (int j = 0; j < kThreadCount; j++) { + shutdown(listener_fds[j].get(), SHUT_RDWR); + } + }); + } + + ScopedThread connecting_thread([&connector, &conn_addr]() { + for (int i = 0; i < kConnectAttempts; i++) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT( + RetryEINTR(connect)(fd.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Do two separate sends to ensure two segments are received. This is + // required for netstack where read is incorrectly assuming a whole + // segment is read when endpoint.Read() is called which is technically + // incorrect as the syscall that invoked endpoint.Read() may only + // consume it partially. This results in a case where a close() of + // such a socket does not trigger a RST in netstack due to the + // endpoint assuming that the endpoint has no unread data. + EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + // TODO(gvisor.dev/issue/1449): Remove this block once netstack correctly + // generates a RST. + if (IsRunningOnGvisor()) { + EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + } + } + }); + + // Join threads to be sure that all connections have been counted + connecting_thread.Join(); + for (int i = 0; i < kThreadCount; i++) { + listen_thread[i]->Join(); + } + // Check that connections are distributed fairly between listening sockets + for (int i = 0; i < kThreadCount; i++) + EXPECT_THAT(accept_counts[i], + EquivalentWithin((kConnectAttempts / kThreadCount), 0.10)); +} + +TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThread_NoRandomSave) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + sockaddr_storage listen_addr = listener.addr; + sockaddr_storage conn_addr = connector.addr; + constexpr int kThreadCount = 3; + + // Create the listening socket. + FileDescriptor listener_fds[kThreadCount]; + for (int i = 0; i < kThreadCount; i++) { + listener_fds[i] = + ASSERT_NO_ERRNO_AND_VALUE(Socket(listener.family(), SOCK_DGRAM, 0)); + int fd = listener_fds[i].get(); + + ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT( + bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len), + SyscallSucceeds()); + + // On the first bind we need to determine which port was bound. + if (i != 0) { + continue; + } + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT( + getsockname(listener_fds[0].get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port)); + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + } + + constexpr int kConnectAttempts = 10000; + std::atomic<int> packets_received = ATOMIC_VAR_INIT(0); + std::unique_ptr<ScopedThread> receiver_thread[kThreadCount]; + int packets_per_socket[kThreadCount] = {}; + // TODO(avagin): figure how to not disable S/R for the whole test. + DisableSave ds; // Too expensive. + + for (int i = 0; i < kThreadCount; i++) { + receiver_thread[i] = absl::make_unique<ScopedThread>( + [&listener_fds, &packets_per_socket, i, &packets_received]() { + do { + struct sockaddr_storage addr = {}; + socklen_t addrlen = sizeof(addr); + int data; + + auto ret = RetryEINTR(recvfrom)( + listener_fds[i].get(), &data, sizeof(data), 0, + reinterpret_cast<struct sockaddr*>(&addr), &addrlen); + + if (packets_received < kConnectAttempts) { + ASSERT_THAT(ret, SyscallSucceedsWithValue(sizeof(data))); + } + + if (ret != sizeof(data)) { + // Another thread may have shutdown our read side causing the + // recvfrom to fail. + break; + } + + packets_received++; + packets_per_socket[i]++; + + // A response is required to synchronize with the main thread, + // otherwise the main thread can send more than can fit into receive + // queues. + EXPECT_THAT(RetryEINTR(sendto)( + listener_fds[i].get(), &data, sizeof(data), 0, + reinterpret_cast<sockaddr*>(&addr), addrlen), + SyscallSucceedsWithValue(sizeof(data))); + } while (packets_received < kConnectAttempts); + + // Shutdown all sockets to wake up other threads. + for (int j = 0; j < kThreadCount; j++) + shutdown(listener_fds[j].get(), SHUT_RDWR); + }); + } + + ScopedThread main_thread([&connector, &conn_addr]() { + for (int i = 0; i < kConnectAttempts; i++) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(connector.family(), SOCK_DGRAM, 0)); + EXPECT_THAT(RetryEINTR(sendto)(fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceedsWithValue(sizeof(i))); + int data; + EXPECT_THAT(RetryEINTR(recv)(fd.get(), &data, sizeof(data), 0), + SyscallSucceedsWithValue(sizeof(data))); + } + }); + + main_thread.Join(); + + // Join threads to be sure that all connections have been counted + for (int i = 0; i < kThreadCount; i++) { + receiver_thread[i]->Join(); + } + // Check that packets are distributed fairly between listening sockets. + for (int i = 0; i < kThreadCount; i++) + EXPECT_THAT(packets_per_socket[i], + EquivalentWithin((kConnectAttempts / kThreadCount), 0.10)); +} + +TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThreadShort_NoRandomSave) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + sockaddr_storage listen_addr = listener.addr; + sockaddr_storage conn_addr = connector.addr; + constexpr int kThreadCount = 3; + + // TODO(b/141211329): endpointsByNic.seed has to be saved/restored. + const DisableSave ds141211329; + + // Create listening sockets. + FileDescriptor listener_fds[kThreadCount]; + for (int i = 0; i < kThreadCount; i++) { + listener_fds[i] = + ASSERT_NO_ERRNO_AND_VALUE(Socket(listener.family(), SOCK_DGRAM, 0)); + int fd = listener_fds[i].get(); + + ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT( + bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len), + SyscallSucceeds()); + + // On the first bind we need to determine which port was bound. + if (i != 0) { + continue; + } + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT( + getsockname(listener_fds[0].get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port)); + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + } + + constexpr int kConnectAttempts = 10; + FileDescriptor client_fds[kConnectAttempts]; + + // Do the first run without save/restore. + DisableSave ds; + for (int i = 0; i < kConnectAttempts; i++) { + client_fds[i] = + ASSERT_NO_ERRNO_AND_VALUE(Socket(connector.family(), SOCK_DGRAM, 0)); + EXPECT_THAT(RetryEINTR(sendto)(client_fds[i].get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceedsWithValue(sizeof(i))); + } + ds.reset(); + + // Check that a mapping of client and server sockets has + // not been change after save/restore. + for (int i = 0; i < kConnectAttempts; i++) { + EXPECT_THAT(RetryEINTR(sendto)(client_fds[i].get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceedsWithValue(sizeof(i))); + } + + struct pollfd pollfds[kThreadCount]; + for (int i = 0; i < kThreadCount; i++) { + pollfds[i].fd = listener_fds[i].get(); + pollfds[i].events = POLLIN; + } + + std::map<uint16_t, int> portToFD; + + int received = 0; + while (received < kConnectAttempts * 2) { + ASSERT_THAT(poll(pollfds, kThreadCount, -1), + SyscallSucceedsWithValue(Gt(0))); + + for (int i = 0; i < kThreadCount; i++) { + if ((pollfds[i].revents & POLLIN) == 0) { + continue; + } + + received++; + + const int fd = pollfds[i].fd; + struct sockaddr_storage addr = {}; + socklen_t addrlen = sizeof(addr); + int data; + EXPECT_THAT(RetryEINTR(recvfrom)( + fd, &data, sizeof(data), 0, + reinterpret_cast<struct sockaddr*>(&addr), &addrlen), + SyscallSucceedsWithValue(sizeof(data))); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(connector.family(), addr)); + auto prev_port = portToFD.find(port); + // Check that all packets from one client have been delivered to the + // same server socket. + if (prev_port == portToFD.end()) { + portToFD[port] = fd; + } else { + EXPECT_EQ(portToFD[port], fd); + } + } + } +} + +INSTANTIATE_TEST_SUITE_P( + All, SocketInetReusePortTest, + ::testing::Values( + // Listeners bound to IPv4 addresses refuse connections using IPv6 + // addresses. + TestParam{V4Any(), V4Loopback()}, + TestParam{V4Loopback(), V4MappedLoopback()}, + + // Listeners bound to IN6ADDR_ANY accept all connections. + TestParam{V6Any(), V4Loopback()}, TestParam{V6Any(), V6Loopback()}, + + // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4 + // addresses. + TestParam{V6Loopback(), V6Loopback()}), + DescribeTestParam); + +struct ProtocolTestParam { + std::string description; + int type; +}; + +std::string DescribeProtocolTestParam( + ::testing::TestParamInfo<ProtocolTestParam> const& info) { + return info.param.description; +} + +using SocketMultiProtocolInetLoopbackTest = + ::testing::TestWithParam<ProtocolTestParam>; + +TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedLoopbackOnlyReservesV4) { + auto const& param = GetParam(); + + for (int i = 0; true; i++) { + // Bind the v4 loopback on a dual stack socket. + TestAddress const& test_addr_dual = V4MappedLoopback(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that we can still bind the v6 loopback on the same port. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + int ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len); + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + // Verify that binding the v4 loopback with the same port on a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedAnyOnlyReservesV4) { + auto const& param = GetParam(); + + for (int i = 0; true; i++) { + // Bind the v4 any on a dual stack socket. + TestAddress const& test_addr_dual = V4MappedAny(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that we can still bind the v6 loopback on the same port. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + int ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len); + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + // Verify that binding the v4 loopback with the same port on a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, DualStackV6AnyReservesEverything) { + auto const& param = GetParam(); + + // Bind the v6 any on a dual stack socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v6 loopback with the same port fails. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v6 socket + // fails. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + ASSERT_THAT( + bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped), + test_addr_v4_mapped.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 any on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4_any = V4Any(); + sockaddr_storage addr_v4_any = test_addr_v4_any.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port)); + const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_any.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any), + test_addr_v4_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, + DualStackV6AnyReuseAddrDoesNotReserveV4Any) { + auto const& param = GetParam(); + + // Bind the v6 any on a dual stack socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(setsockopt(fd_dual.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v4 any on the same port with a v4 socket succeeds. + TestAddress const& test_addr_v4_any = V4Any(); + sockaddr_storage addr_v4_any = test_addr_v4_any.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port)); + const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_any.family(), param.type, 0)); + ASSERT_THAT(setsockopt(fd_v4_any.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any), + test_addr_v4_any.addr_len), + SyscallSucceeds()); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, + DualStackV6AnyReuseAddrListenReservesV4Any) { + auto const& param = GetParam(); + + // Only TCP sockets are supported. + SKIP_IF((param.type & SOCK_STREAM) == 0); + + // Bind the v6 any on a dual stack socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(setsockopt(fd_dual.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + ASSERT_THAT(listen(fd_dual.get(), 5), SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v4 any on the same port with a v4 socket succeeds. + TestAddress const& test_addr_v4_any = V4Any(); + sockaddr_storage addr_v4_any = test_addr_v4_any.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port)); + const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_any.family(), param.type, 0)); + ASSERT_THAT(setsockopt(fd_v4_any.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any), + test_addr_v4_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, + DualStackV6AnyWithListenReservesEverything) { + auto const& param = GetParam(); + + // Only TCP sockets are supported. + SKIP_IF((param.type & SOCK_STREAM) == 0); + + // Bind the v6 any on a dual stack socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + ASSERT_THAT(listen(fd_dual.get(), 5), SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v6 loopback with the same port fails. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v6 socket + // fails. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + ASSERT_THAT( + bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped), + test_addr_v4_mapped.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 any on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4_any = V4Any(); + sockaddr_storage addr_v4_any = test_addr_v4_any.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port)); + const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_any.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any), + test_addr_v4_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V6OnlyV6AnyReservesV6) { + auto const& param = GetParam(); + + for (int i = 0; true; i++) { + // Bind the v6 any on a v6-only socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_dual.family(), param.type, 0)); + EXPECT_THAT(setsockopt(fd_dual.get(), IPPROTO_IPV6, IPV6_V6ONLY, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v6 loopback with the same port fails. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that we can still bind the v4 loopback on the same port. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + int ret = + bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped), + test_addr_v4_mapped.addr_len); + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) { + auto const& param = GetParam(); + + for (int i = 0; true; i++) { + // Bind the v6 loopback on a dual stack socket. + TestAddress const& test_addr = V6Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast<sockaddr*>(&bound_addr), + bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast<sockaddr*>(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr), + connected_addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v6 loopback with the same port fails. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that we can still bind the v4 loopback on the same port. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, + ephemeral_port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + int ret = + bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped), + test_addr_v4_mapped.addr_len); + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + EXPECT_THAT(ret, SyscallSucceeds()); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReservedReuseAddr) { + auto const& param = GetParam(); + + // Bind the v6 loopback on a dual stack socket. + TestAddress const& test_addr = V6Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast<sockaddr*>(&bound_addr), + bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast<sockaddr*>(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is not reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(checking_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr), + connected_addr_len), + SyscallSucceeds()); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) { + auto const& param = GetParam(); + + for (int i = 0; true; i++) { + // Bind the v4 loopback on a dual stack socket. + TestAddress const& test_addr = V4MappedLoopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast<sockaddr*>(&bound_addr), + bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast<sockaddr*>(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr), + connected_addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v4.family(), &addr_v4, ephemeral_port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + EXPECT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v6 any on the same port with a dual-stack socket + // fails. + TestAddress const& test_addr_v6_any = V6Any(); + sockaddr_storage addr_v6_any = test_addr_v6_any.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6_any.family(), &addr_v6_any, ephemeral_port)); + const FileDescriptor fd_v6_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6_any.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any), + test_addr_v6_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // For some reason, binding the TCP v6-only any is flaky on Linux. Maybe we + // tend to run out of ephemeral ports? Regardless, binding the v6 loopback + // seems pretty reliable. Only try to bind the v6-only any on UDP and + // gVisor. + + int ret = -1; + + if (!IsRunningOnGvisor() && param.type == SOCK_STREAM) { + // Verify that we can still bind the v6 loopback on the same port. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port)); + const FileDescriptor fd_v6 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6.family(), param.type, 0)); + ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len); + } else { + // Verify that we can still bind the v6 any on the same port with a + // v6-only socket. + const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6_any.family(), param.type, 0)); + EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ret = + bind(fd_v6_only_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any), + test_addr_v6_any.addr_len); + } + + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + EXPECT_THAT(ret, SyscallSucceeds()); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, + V4MappedEphemeralPortReservedResueAddr) { + auto const& param = GetParam(); + + // Bind the v4 loopback on a dual stack socket. + TestAddress const& test_addr = V4MappedLoopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast<sockaddr*>(&bound_addr), + bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast<sockaddr*>(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is not reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(checking_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr), + connected_addr_len), + SyscallSucceeds()); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) { + auto const& param = GetParam(); + + for (int i = 0; true; i++) { + // Bind the v4 loopback on a v4 socket. + TestAddress const& test_addr = V4Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast<sockaddr*>(&bound_addr), + bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast<sockaddr*>(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr), + connected_addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v6 socket + // fails. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, + ephemeral_port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + EXPECT_THAT( + bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped), + test_addr_v4_mapped.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v6 any on the same port with a dual-stack socket + // fails. + TestAddress const& test_addr_v6_any = V6Any(); + sockaddr_storage addr_v6_any = test_addr_v6_any.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6_any.family(), &addr_v6_any, ephemeral_port)); + const FileDescriptor fd_v6_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6_any.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any), + test_addr_v6_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // For some reason, binding the TCP v6-only any is flaky on Linux. Maybe we + // tend to run out of ephemeral ports? Regardless, binding the v6 loopback + // seems pretty reliable. Only try to bind the v6-only any on UDP and + // gVisor. + + int ret = -1; + + if (!IsRunningOnGvisor() && param.type == SOCK_STREAM) { + // Verify that we can still bind the v6 loopback on the same port. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port)); + const FileDescriptor fd_v6 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6.family(), param.type, 0)); + ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len); + } else { + // Verify that we can still bind the v6 any on the same port with a + // v6-only socket. + const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6_any.family(), param.type, 0)); + EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ret = + bind(fd_v6_only_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any), + test_addr_v6_any.addr_len); + } + + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + EXPECT_THAT(ret, SyscallSucceeds()); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReservedReuseAddr) { + auto const& param = GetParam(); + + // Bind the v4 loopback on a v4 socket. + TestAddress const& test_addr = V4Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + + ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + + ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast<sockaddr*>(&bound_addr), + bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast<sockaddr*>(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is not reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(checking_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr), + connected_addr_len), + SyscallSucceeds()); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, PortReuseTwoSockets) { + auto const& param = GetParam(); + TestAddress const& test_addr = V4Loopback(); + sockaddr_storage addr = test_addr.addr; + + for (int i = 0; i < 2; i++) { + const int portreuse1 = i % 2; + auto s1 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + int fd1 = s1.get(); + socklen_t addrlen = test_addr.addr_len; + + EXPECT_THAT( + setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &portreuse1, sizeof(int)), + SyscallSucceeds()); + + ASSERT_THAT(bind(fd1, reinterpret_cast<sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(getsockname(fd1, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(fd1, 1), SyscallSucceeds()); + } + + // j is less than 4 to check that the port reuse logic works correctly after + // closing bound sockets. + for (int j = 0; j < 4; j++) { + const int portreuse2 = j % 2; + auto s2 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + int fd2 = s2.get(); + + EXPECT_THAT( + setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &portreuse2, sizeof(int)), + SyscallSucceeds()); + + std::cout << portreuse1 << " " << portreuse2 << std::endl; + int ret = bind(fd2, reinterpret_cast<sockaddr*>(&addr), addrlen); + + // Verify that two sockets can be bound to the same port only if + // SO_REUSEPORT is set for both of them. + if (!portreuse1 || !portreuse2) { + ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRINUSE)); + } else { + ASSERT_THAT(ret, SyscallSucceeds()); + } + } + } +} + +// Check that when a socket was bound to an address with REUSEPORT and then +// closed, we can bind a different socket to the same address without needing +// REUSEPORT. +TEST_P(SocketMultiProtocolInetLoopbackTest, NoReusePortFollowingReusePort) { + auto const& param = GetParam(); + TestAddress const& test_addr = V4Loopback(); + sockaddr_storage addr = test_addr.addr; + + auto s = ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + int fd = s.get(); + socklen_t addrlen = test_addr.addr_len; + int portreuse = 1; + ASSERT_THAT( + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &portreuse, sizeof(portreuse)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd, reinterpret_cast<sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + ASSERT_THAT(getsockname(fd, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + ASSERT_EQ(addrlen, test_addr.addr_len); + + s.reset(); + + // Open a new socket and bind to the same address, but w/o REUSEPORT. + s = ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + fd = s.get(); + portreuse = 0; + ASSERT_THAT( + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &portreuse, sizeof(portreuse)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd, reinterpret_cast<sockaddr*>(&addr), addrlen), + SyscallSucceeds()); +} + +INSTANTIATE_TEST_SUITE_P( + AllFamilies, SocketMultiProtocolInetLoopbackTest, + ::testing::Values(ProtocolTestParam{"TCP", SOCK_STREAM}, + ProtocolTestParam{"UDP", SOCK_DGRAM}), + DescribeProtocolTestParam); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc new file mode 100644 index 000000000..2324c7f6a --- /dev/null +++ b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc @@ -0,0 +1,171 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <string.h> + +#include <iostream> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::Gt; + +PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) { + switch (family) { + case AF_INET: + return static_cast<uint16_t>( + reinterpret_cast<sockaddr_in const*>(&addr)->sin_port); + case AF_INET6: + return static_cast<uint16_t>( + reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) { + switch (family) { + case AF_INET: + reinterpret_cast<sockaddr_in*>(addr)->sin_port = port; + return NoError(); + case AF_INET6: + reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port; + return NoError(); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +struct TestParam { + TestAddress listener; + TestAddress connector; +}; + +std::string DescribeTestParam(::testing::TestParamInfo<TestParam> const& info) { + return absl::StrCat("Listen", info.param.listener.description, "_Connect", + info.param.connector.description); +} + +using SocketInetLoopbackTest = ::testing::TestWithParam<TestParam>; + +// This test verifies that connect returns EADDRNOTAVAIL if all local ephemeral +// ports are already in use for a given destination ip/port. +// We disable S/R because this test creates a large number of sockets. +TEST_P(SocketInetLoopbackTest, TestTCPPortExhaustion_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + constexpr int kBacklog = 10; + constexpr int kClients = 65536; + + // Create the listening socket. + auto listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Disable cooperative S/R as we are making too many syscalls. + DisableSave ds; + + // Now we keep opening connections till we run out of local ephemeral ports. + // and assert the error we get back. + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + std::vector<FileDescriptor> clients; + std::vector<FileDescriptor> servers; + + for (int i = 0; i < kClients; i++) { + FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len); + if (ret == 0) { + clients.push_back(std::move(client)); + FileDescriptor server = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + servers.push_back(std::move(server)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRNOTAVAIL)); + break; + } +} + +INSTANTIATE_TEST_SUITE_P( + All, SocketInetLoopbackTest, + ::testing::Values( + // Listeners bound to IPv4 addresses refuse connections using IPv6 + // addresses. + TestParam{V4Any(), V4Any()}, TestParam{V4Any(), V4Loopback()}, + TestParam{V4Any(), V4MappedAny()}, + TestParam{V4Any(), V4MappedLoopback()}, + TestParam{V4Loopback(), V4Any()}, TestParam{V4Loopback(), V4Loopback()}, + TestParam{V4Loopback(), V4MappedLoopback()}, + TestParam{V4MappedAny(), V4Any()}, + TestParam{V4MappedAny(), V4Loopback()}, + TestParam{V4MappedAny(), V4MappedAny()}, + TestParam{V4MappedAny(), V4MappedLoopback()}, + TestParam{V4MappedLoopback(), V4Any()}, + TestParam{V4MappedLoopback(), V4Loopback()}, + TestParam{V4MappedLoopback(), V4MappedLoopback()}, + + // Listeners bound to IN6ADDR_ANY accept all connections. + TestParam{V6Any(), V4Any()}, TestParam{V6Any(), V4Loopback()}, + TestParam{V6Any(), V4MappedAny()}, + TestParam{V6Any(), V4MappedLoopback()}, TestParam{V6Any(), V6Any()}, + TestParam{V6Any(), V6Loopback()}, + + // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4 + // addresses. + TestParam{V6Loopback(), V6Any()}, + TestParam{V6Loopback(), V6Loopback()}), + DescribeTestParam); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_loopback_blocking.cc b/test/syscalls/linux/socket_ip_loopback_blocking.cc new file mode 100644 index 000000000..fda252dd7 --- /dev/null +++ b/test/syscalls/linux/socket_ip_loopback_blocking.cc @@ -0,0 +1,49 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/tcp.h> + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + std::vector<SocketPairKind>{ + IPv6UDPBidirectionalBindSocketPair(0), + IPv4UDPBidirectionalBindSocketPair(0), + }, + ApplyVecToVec<SocketPairKind>( + std::vector<Middleware>{ + NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)}, + std::vector<SocketPairKind>{ + IPv6TCPAcceptBindSocketPair(0), + IPv4TCPAcceptBindSocketPair(0), + })); +} + +INSTANTIATE_TEST_SUITE_P( + BlockingIPSockets, BlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc new file mode 100644 index 000000000..c2ecb639f --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -0,0 +1,1054 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ip_tcp_generic.h" + +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <poll.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(TCPSocketPairTest, TcpInfoSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct tcp_info opt = {}; + socklen_t optLen = sizeof(opt); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen), + SyscallSucceeds()); +} + +TEST_P(TCPSocketPairTest, ShortTcpInfoSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct tcp_info opt = {}; + socklen_t optLen = 1; + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen), + SyscallSucceeds()); +} + +TEST_P(TCPSocketPairTest, ZeroTcpInfoSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct tcp_info opt = {}; + socklen_t optLen = 0; + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen), + SyscallSucceeds()); +} + +// This test validates that an RST is sent instead of a FIN when data is +// unread on calls to close(2). +TEST_P(TCPSocketPairTest, RSTSentOnCloseWithUnreadData) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until t_ sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now close the connected without reading the data. + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + + // Wait for the other end to receive the RST (up to 20 seconds). + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // A shutdown with unread data will cause a RST to be sent instead + // of a FIN, per RFC 2525 section 2.17; this is also what Linux does. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(ECONNRESET)); +} + +// This test will validate that a RST will cause POLLHUP to trigger. +TEST_P(TCPSocketPairTest, RSTCausesPollHUP) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until second sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); + + // Confirm we at least have one unread byte. + int bytes_available = 0; + ASSERT_THAT( + RetryEINTR(ioctl)(sockets->second_fd(), FIONREAD, &bytes_available), + SyscallSucceeds()); + EXPECT_GT(bytes_available, 0); + + // Now close the connected socket without reading the data from the second, + // this will cause a RST and we should see that with POLLHUP. + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + + // Wait for the other end to receive the RST (up to 20 seconds). + struct pollfd poll_fd3 = {sockets->first_fd(), POLLHUP, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd3, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + ASSERT_NE(poll_fd3.revents & POLLHUP, 0); +} + +// This test validates that even if a RST is sent the other end will not +// get an ECONNRESET until it's read all data. +TEST_P(TCPSocketPairTest, RSTSentOnCloseWithUnreadDataAllowsReadBuffered) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(RetryEINTR(write)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until second sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0}; + constexpr int kPollTimeoutMs = 30000; // Wait up to 30 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Wait until first sees the data on its side but don't read it. + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now close the connected socket without reading the data from the second. + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + + // Wait for the other end to receive the RST (up to 30 seconds). + struct pollfd poll_fd3 = {sockets->first_fd(), POLLHUP, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd3, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Since we also have data buffered we should be able to read it before + // the syscall will fail with ECONNRESET. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // A shutdown with unread data will cause a RST to be sent instead + // of a FIN, per RFC 2525 section 2.17; this is also what Linux does. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(ECONNRESET)); +} + +// This test will verify that a clean shutdown (FIN) is preformed when there +// is unread data but only the write side is closed. +TEST_P(TCPSocketPairTest, FINSentOnShutdownWrWithUnreadData) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until t_ sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now shutdown the write end leaving the read end open. + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_WR), SyscallSucceeds()); + + // Wait for the other end to receive the FIN (up to 20 seconds). + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Since we didn't shutdown the read end this will be a clean close. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); +} + +// This test will verify that when data is received by a socket, even if it's +// not read SHUT_RD will not cause any packets to be generated. +TEST_P(TCPSocketPairTest, ShutdownRdShouldCauseNoPacketsWithUnreadData) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until t_ sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now shutdown the read end, this will generate no packets to the other end. + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds()); + + // We should not receive any events on the other side of the socket. + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollNoResponseTimeoutMs = 3000; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollNoResponseTimeoutMs), + SyscallSucceedsWithValue(0)); // Timeout. +} + +// This test will verify that a socket which has unread data will still allow +// the data to be read after shutting down the read side, and once there is no +// unread data left, then read will return an EOF. +TEST_P(TCPSocketPairTest, ShutdownRdAllowsReadOfReceivedDataBeforeEOF) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until t_ sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now shutdown the read end. + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds()); + + // Even though we did a SHUT_RD on the read end we can still read the data. + ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // After reading all of the data, reading the closed read end returns EOF. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); +} + +// This test verifies that a shutdown(wr) by the server after sending +// data allows the client to still read() the queued data and a client +// close after sending response allows server to read the incoming +// response. +TEST_P(TCPSocketPairTest, ShutdownWrServerClientClose) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[10] = {}; + ScopedThread t([&]() { + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(close(sockets->release_first_fd()), + SyscallSucceedsWithValue(0)); + }); + ASSERT_THAT(RetryEINTR(write)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(RetryEINTR(shutdown)(sockets->second_fd(), SHUT_WR), + SyscallSucceedsWithValue(0)); + t.Join(); + + ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST_P(TCPSocketPairTest, ClosedReadNonBlockingSocket) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Set the read end to O_NONBLOCK. + int opts = 0; + ASSERT_THAT(opts = fcntl(sockets->second_fd(), F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(sockets->second_fd(), F_SETFL, opts | O_NONBLOCK), + SyscallSucceeds()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until second_fd sees the data and then recv it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0}; + constexpr int kPollTimeoutMs = 2000; // Wait up to 2 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); + + // Now shutdown the write end leaving the read end open. + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + + // Wait for close notification and recv again. + struct pollfd poll_fd2 = {sockets->second_fd(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(0)); +} + +TEST_P(TCPSocketPairTest, + ShutdownRdUnreadDataShouldCauseNoPacketsUnlessClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until t_ sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now shutdown the read end, this will generate no packets to the other end. + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds()); + + // We should not receive any events on the other side of the socket. + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollNoResponseTimeoutMs = 3000; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollNoResponseTimeoutMs), + SyscallSucceedsWithValue(0)); // Timeout. + + // Now since we've fully closed the connection it will generate a RST. + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); // The other end has closed. + + // A shutdown with unread data will cause a RST to be sent instead + // of a FIN, per RFC 2525 section 2.17; this is also what Linux does. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(ECONNRESET)); +} + +TEST_P(TCPSocketPairTest, TCPCorkDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(TCPSocketPairTest, SetTCPCork) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(TCPSocketPairTest, TCPCork) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(sockets->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + ASSERT_NO_FATAL_FAILURE(RecvNoData(sockets->second_fd())); + + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + // Create a receive buffer larger than kData. + char buf[(sizeof(kData) + 1) * 2] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(kData))); + EXPECT_EQ(absl::string_view(kData, sizeof(kData)), + absl::string_view(buf, sizeof(kData))); +} + +TEST_P(TCPSocketPairTest, TCPQuickAckDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +TEST_P(TCPSocketPairTest, SetTCPQuickAck) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +TEST_P(TCPSocketPairTest, SoKeepaliveDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(TCPSocketPairTest, SetSoKeepalive) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(TCPSocketPairTest, TCPKeepidleDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 2 * 60 * 60); // 2 hours. +} + +TEST_P(TCPSocketPairTest, TCPKeepintvlDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 75); // 75 seconds. +} + +TEST_P(TCPSocketPairTest, SetTCPKeepidleZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE, &kZero, + sizeof(kZero)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepintvlZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL, + &kZero, sizeof(kZero)), + SyscallFailsWithErrno(EINVAL)); +} + +// Copied from include/net/tcp.h. +constexpr int MAX_TCP_KEEPIDLE = 32767; +constexpr int MAX_TCP_KEEPINTVL = 32767; +constexpr int MAX_TCP_KEEPCNT = 127; + +TEST_P(TCPSocketPairTest, SetTCPKeepidleAboveMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kAboveMax = MAX_TCP_KEEPIDLE + 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE, + &kAboveMax, sizeof(kAboveMax)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepintvlAboveMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kAboveMax = MAX_TCP_KEEPINTVL + 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL, + &kAboveMax, sizeof(kAboveMax)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepidleToMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE, + &MAX_TCP_KEEPIDLE, sizeof(MAX_TCP_KEEPIDLE)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, MAX_TCP_KEEPIDLE); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepintvlToMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL, + &MAX_TCP_KEEPINTVL, sizeof(MAX_TCP_KEEPINTVL)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, MAX_TCP_KEEPINTVL); +} + +TEST_P(TCPSocketPairTest, TCPKeepcountDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 9); // 9 keepalive probes. +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &kZero, + sizeof(kZero)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountAboveMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kAboveMax = MAX_TCP_KEEPCNT + 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, + &kAboveMax, sizeof(kAboveMax)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountToMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, + &MAX_TCP_KEEPCNT, sizeof(MAX_TCP_KEEPCNT)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, MAX_TCP_KEEPCNT); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountToOne) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int keepaliveCount = 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, + &keepaliveCount, sizeof(keepaliveCount)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, keepaliveCount); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountToNegative) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int keepaliveCount = -5; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, + &keepaliveCount, sizeof(keepaliveCount)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(TCPSocketPairTest, SetOOBInline) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_OOBINLINE, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_OOBINLINE, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +TEST_P(TCPSocketPairTest, MsgTruncMsgPeek) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // Read half of the data with MSG_TRUNC | MSG_PEEK. This way there will still + // be some data left to read in the next step even if the data gets consumed. + char received_data1[sizeof(sent_data) / 2] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data1, + sizeof(received_data1), MSG_TRUNC | MSG_PEEK), + SyscallSucceedsWithValue(sizeof(received_data1))); + + // Check that we didn't get anything. + char zeros[sizeof(received_data1)] = {}; + EXPECT_EQ(0, memcmp(zeros, received_data1, sizeof(received_data1))); + + // Check that all of the data is still there. + char received_data2[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data2, + sizeof(received_data2), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + EXPECT_EQ(0, memcmp(received_data2, sent_data, sizeof(sent_data))); +} + +TEST_P(TCPSocketPairTest, SetCongestionControlSucceedsForSupported) { + // This is Linux's net/tcp.h TCP_CA_NAME_MAX. + const int kTcpCaNameMax = 16; + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + // Netstack only supports reno & cubic so we only test these two values here. + { + const char kSetCC[kTcpCaNameMax] = "reno"; + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &kSetCC, strlen(kSetCC)), + SyscallSucceedsWithValue(0)); + + char got_cc[kTcpCaNameMax]; + memset(got_cc, '1', sizeof(got_cc)); + socklen_t optlen = sizeof(got_cc); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC))); + } + { + const char kSetCC[kTcpCaNameMax] = "cubic"; + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &kSetCC, strlen(kSetCC)), + SyscallSucceedsWithValue(0)); + + char got_cc[kTcpCaNameMax]; + memset(got_cc, '1', sizeof(got_cc)); + socklen_t optlen = sizeof(got_cc); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC))); + } +} + +TEST_P(TCPSocketPairTest, SetGetTCPCongestionShortReadBuffer) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + { + // Verify that getsockopt/setsockopt work with buffers smaller than + // kTcpCaNameMax. + const char kSetCC[] = "cubic"; + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &kSetCC, strlen(kSetCC)), + SyscallSucceedsWithValue(0)); + + char got_cc[sizeof(kSetCC)]; + socklen_t optlen = sizeof(got_cc); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(got_cc))); + } +} + +TEST_P(TCPSocketPairTest, SetGetTCPCongestionLargeReadBuffer) { + // This is Linux's net/tcp.h TCP_CA_NAME_MAX. + const int kTcpCaNameMax = 16; + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + { + // Verify that getsockopt works with buffers larger than + // kTcpCaNameMax. + const char kSetCC[] = "cubic"; + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &kSetCC, strlen(kSetCC)), + SyscallSucceedsWithValue(0)); + + char got_cc[kTcpCaNameMax + 5]; + socklen_t optlen = sizeof(got_cc); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + // Linux copies the minimum of kTcpCaNameMax or the length of the passed in + // buffer and sets optlen to the number of bytes actually copied + // irrespective of the actual length of the congestion control name. + EXPECT_EQ(kTcpCaNameMax, optlen); + EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC))); + } +} + +TEST_P(TCPSocketPairTest, SetCongestionControlFailsForUnsupported) { + // This is Linux's net/tcp.h TCP_CA_NAME_MAX. + const int kTcpCaNameMax = 16; + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char old_cc[kTcpCaNameMax]; + socklen_t optlen = sizeof(old_cc); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &old_cc, &optlen), + SyscallSucceedsWithValue(0)); + + const char kSetCC[] = "invalid_ca_cc"; + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &kSetCC, strlen(kSetCC)), + SyscallFailsWithErrno(ENOENT)); + + char got_cc[kTcpCaNameMax]; + optlen = sizeof(got_cc); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION, + &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(old_cc))); +} + +// Linux and Netstack both default to a 60s TCP_LINGER2 timeout. +constexpr int kDefaultTCPLingerTimeout = 60; + +TEST_P(TCPSocketPairTest, TCPLingerTimeoutDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kDefaultTCPLingerTimeout); +} + +TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutZeroOrLess) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &kZero, + sizeof(kZero)), + SyscallSucceedsWithValue(0)); + + constexpr int kNegative = -1234; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, + &kNegative, sizeof(kNegative)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutAboveDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Values above the net.ipv4.tcp_fin_timeout are capped to tcp_fin_timeout + // on linux (defaults to 60 seconds on linux). + constexpr int kAboveDefault = kDefaultTCPLingerTimeout + 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, + &kAboveDefault, sizeof(kAboveDefault)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kDefaultTCPLingerTimeout); +} + +TEST_P(TCPSocketPairTest, SetTCPLingerTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Values above the net.ipv4.tcp_fin_timeout are capped to tcp_fin_timeout + // on linux (defaults to 60 seconds on linux). + constexpr int kTCPLingerTimeout = kDefaultTCPLingerTimeout - 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, + &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kTCPLingerTimeout); +} + +TEST_P(TCPSocketPairTest, TestTCPCloseWithData) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ScopedThread t([&]() { + // Close one end to trigger sending of a FIN. + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_WR), SyscallSucceeds()); + char buf[3]; + ASSERT_THAT(read(sockets->second_fd(), buf, 3), + SyscallSucceedsWithValue(3)); + absl::SleepFor(absl::Milliseconds(50)); + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + }); + + absl::SleepFor(absl::Milliseconds(50)); + // Send some data then close. + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->first_fd(), kStr, 3), SyscallSucceedsWithValue(3)); + t.Join(); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); +} + +TEST_P(TCPSocketPairTest, TCPUserTimeoutDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); // 0 ms (disabled). +} + +TEST_P(TCPSocketPairTest, SetTCPUserTimeoutZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kZero = 0; + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kZero, sizeof(kZero)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); // 0 ms (disabled). +} + +TEST_P(TCPSocketPairTest, SetTCPUserTimeoutBelowZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kNeg = -10; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kNeg, sizeof(kNeg)), + SyscallFailsWithErrno(EINVAL)); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); // 0 ms (disabled). +} + +TEST_P(TCPSocketPairTest, SetTCPUserTimeoutAboveZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kAbove = 10; + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kAbove, sizeof(kAbove)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kAbove); +} + +TEST_P(TCPSocketPairTest, SetTCPWindowClampBelowMinRcvBufConnectedSocket) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + // Discover minimum receive buf by setting a really low value + // for the receive buffer. + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &kZero, + sizeof(kZero)), + SyscallSucceeds()); + + // Now retrieve the minimum value for SO_RCVBUF as the set above should + // have caused SO_RCVBUF for the socket to be set to the minimum. + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int min_so_rcvbuf = get; + + { + // Setting TCP_WINDOW_CLAMP to zero for a connected socket is not permitted. + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &kZero, sizeof(kZero)), + SyscallFailsWithErrno(EINVAL)); + + // Non-zero clamp values below MIN_SO_RCVBUF/2 should result in the clamp + // being set to MIN_SO_RCVBUF/2. + int below_half_min_so_rcvbuf = min_so_rcvbuf / 2 - 1; + EXPECT_THAT( + setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &below_half_min_so_rcvbuf, sizeof(below_half_min_so_rcvbuf)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(min_so_rcvbuf / 2, get); + } +} + +TEST_P(TCPSocketPairTest, IpMulticastTtlDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_GT(get, 0); +} + +TEST_P(TCPSocketPairTest, IpMulticastLoopDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 1); +} + +TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) { + DisableSave ds; // Too many syscalls. + constexpr int kThreadCount = 1000; + std::unique_ptr<ScopedThread> instances[kThreadCount]; + for (int i = 0; i < kThreadCount; i++) { + instances[i] = absl::make_unique<ScopedThread>([&]() { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ScopedThread t([&]() { + // Close one end to trigger sending of a FIN. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + // Wait up to 20 seconds for the data. + constexpr int kPollTimeoutMs = 20000; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + }); + + // Send some data then close. + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->first_fd(), kStr, 3), + SyscallSucceedsWithValue(3)); + absl::SleepFor(absl::Milliseconds(10)); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + t.Join(); + }); + } + for (int i = 0; i < kThreadCount; i++) { + instances[i]->Join(); + } +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_generic.h b/test/syscalls/linux/socket_ip_tcp_generic.h new file mode 100644 index 000000000..a3eff3c73 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_generic.h @@ -0,0 +1,29 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected TCP sockets. +using TCPSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_ diff --git a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc new file mode 100644 index 000000000..4e79d21f4 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc @@ -0,0 +1,45 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/tcp.h> + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_ip_tcp_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVecToVec<SocketPairKind>( + std::vector<Middleware>{ + NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)}, + std::vector<SocketPairKind>{ + IPv6TCPAcceptBindSocketPair(0), + IPv4TCPAcceptBindSocketPair(0), + DualStackTCPAcceptBindSocketPair(0), + }); +} + +INSTANTIATE_TEST_SUITE_P( + AllTCPSockets, TCPSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_loopback.cc b/test/syscalls/linux/socket_ip_tcp_loopback.cc new file mode 100644 index 000000000..9db3037bc --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_loopback.cc @@ -0,0 +1,40 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return { + IPv6TCPAcceptBindSocketPair(0), + IPv4TCPAcceptBindSocketPair(0), + DualStackTCPAcceptBindSocketPair(0), + }; +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc new file mode 100644 index 000000000..f996b93d2 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc @@ -0,0 +1,45 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/tcp.h> + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_stream_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVecToVec<SocketPairKind>( + std::vector<Middleware>{ + NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)}, + std::vector<SocketPairKind>{ + IPv6TCPAcceptBindSocketPair(0), + IPv4TCPAcceptBindSocketPair(0), + DualStackTCPAcceptBindSocketPair(0), + }); +} + +INSTANTIATE_TEST_SUITE_P( + BlockingTCPSockets, BlockingStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc new file mode 100644 index 000000000..ffa377210 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc @@ -0,0 +1,44 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/tcp.h> + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVecToVec<SocketPairKind>( + std::vector<Middleware>{ + NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)}, + std::vector<SocketPairKind>{ + IPv6TCPAcceptBindSocketPair(SOCK_NONBLOCK), + IPv4TCPAcceptBindSocketPair(SOCK_NONBLOCK), + }); +} + +INSTANTIATE_TEST_SUITE_P( + NonBlockingTCPSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_udp_generic.cc b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc new file mode 100644 index 000000000..f178f1af9 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc @@ -0,0 +1,77 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <poll.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of TCP and UDP sockets. +using TcpUdpSocketPairTest = SocketPairTest; + +TEST_P(TcpUdpSocketPairTest, ShutdownWrFollowedBySendIsError) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Now shutdown the write end of the first. + ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_WR), SyscallSucceeds()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EPIPE)); +} + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + IPv6UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + IPv4UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + DualStackUDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + IPv6TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + IPv4TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + DualStackTCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK}))); +} + +INSTANTIATE_TEST_SUITE_P( + AllIPSockets, TcpUdpSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc new file mode 100644 index 000000000..edb86aded --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -0,0 +1,452 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ip_udp_generic.h" + +#include <errno.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <poll.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(UDPSocketPairTest, MulticastTTLDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 1); +} + +TEST_P(UDPSocketPairTest, SetUDPMulticastTTLMin) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kMin = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &kMin, sizeof(kMin)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kMin); +} + +TEST_P(UDPSocketPairTest, SetUDPMulticastTTLMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kMax = 255; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &kMax, sizeof(kMax)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kMax); +} + +TEST_P(UDPSocketPairTest, SetUDPMulticastTTLNegativeOne) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kArbitrary = 6; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &kArbitrary, sizeof(kArbitrary)), + SyscallSucceeds()); + + constexpr int kNegOne = -1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &kNegOne, sizeof(kNegOne)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 1); +} + +TEST_P(UDPSocketPairTest, SetUDPMulticastTTLBelowMin) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kBelowMin = -2; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &kBelowMin, sizeof(kBelowMin)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(UDPSocketPairTest, SetUDPMulticastTTLAboveMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kAboveMax = 256; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &kAboveMax, sizeof(kAboveMax)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(UDPSocketPairTest, SetUDPMulticastTTLChar) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr char kArbitrary = 6; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &kArbitrary, sizeof(kArbitrary)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kArbitrary); +} + +TEST_P(UDPSocketPairTest, SetEmptyIPAddMembership) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct ip_mreqn req = {}; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, + &req, sizeof(req)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(UDPSocketPairTest, MulticastLoopDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +TEST_P(UDPSocketPairTest, SetMulticastLoop) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +TEST_P(UDPSocketPairTest, SetMulticastLoopChar) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr char kSockOptOnChar = kSockOptOn; + constexpr char kSockOptOffChar = kSockOptOff; + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &kSockOptOffChar, sizeof(kSockOptOffChar)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &kSockOptOnChar, sizeof(kSockOptOnChar)), + SyscallSucceeds()); + + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +TEST_P(UDPSocketPairTest, ReuseAddrDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(UDPSocketPairTest, SetReuseAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(UDPSocketPairTest, ReusePortDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(UDPSocketPairTest, SetReusePort) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(UDPSocketPairTest, SetReuseAddrReusePort) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +// Test getsockopt for a socket which is not set with IP_PKTINFO option. +TEST_P(UDPSocketPairTest, IPPKTINFODefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_IP, IP_PKTINFO, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +// Test setsockopt and getsockopt for a socket with IP_PKTINFO option. +TEST_P(UDPSocketPairTest, SetAndGetIPPKTINFO) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int level = SOL_IP; + int type = IP_PKTINFO; + + // Check getsockopt before IP_PKTINFO is set. + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT(setsockopt(sockets->first_fd(), level, type, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + ASSERT_THAT(getsockopt(sockets->first_fd(), level, type, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOn); + EXPECT_EQ(get_len, sizeof(get)); + + ASSERT_THAT(setsockopt(sockets->first_fd(), level, type, &kSockOptOff, + sizeof(kSockOptOff)), + SyscallSucceedsWithValue(0)); + + ASSERT_THAT(getsockopt(sockets->first_fd(), level, type, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOff); + EXPECT_EQ(get_len, sizeof(get)); +} + +// Holds TOS or TClass information for IPv4 or IPv6 respectively. +struct RecvTosOption { + int level; + int option; +}; + +RecvTosOption GetRecvTosOption(int domain) { + TEST_CHECK(domain == AF_INET || domain == AF_INET6); + RecvTosOption opt; + switch (domain) { + case AF_INET: + opt.level = IPPROTO_IP; + opt.option = IP_RECVTOS; + break; + case AF_INET6: + opt.level = IPPROTO_IPV6; + opt.option = IPV6_RECVTCLASS; + break; + } + return opt; +} + +// Ensure that Receiving TOS or TCLASS is off by default. +TEST_P(UDPSocketPairTest, RecvTosDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + RecvTosOption t = GetRecvTosOption(GetParam().domain); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +// Test that setting and getting IP_RECVTOS or IPV6_RECVTCLASS works as +// expected. +TEST_P(UDPSocketPairTest, SetRecvTos) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + RecvTosOption t = GetRecvTosOption(GetParam().domain); + + ASSERT_THAT(setsockopt(sockets->first_fd(), t.level, t.option, &kSockOptOff, + sizeof(kSockOptOff)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); + + ASSERT_THAT(setsockopt(sockets->first_fd(), t.level, t.option, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +// Test that any socket (including IPv6 only) accepts the IPv4 TOS option: this +// mirrors behavior in linux. +TEST_P(UDPSocketPairTest, TOSRecvMismatch) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + RecvTosOption t = GetRecvTosOption(AF_INET); + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len), + SyscallSucceedsWithValue(0)); +} + +// Test that an IPv4 socket does not support the IPv6 TClass option. +TEST_P(UDPSocketPairTest, TClassRecvMismatch) { + // This should only test AF_INET sockets for the mismatch behavior. + SKIP_IF(GetParam().domain != AF_INET); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IPV6, IPV6_RECVTCLASS, + &get, &get_len), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_generic.h b/test/syscalls/linux/socket_ip_udp_generic.h new file mode 100644 index 000000000..106c54e9f --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_generic.h @@ -0,0 +1,29 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_GENERIC_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_GENERIC_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected UDP sockets. +using UDPSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_GENERIC_H_ diff --git a/test/syscalls/linux/socket_ip_udp_loopback.cc b/test/syscalls/linux/socket_ip_udp_loopback.cc new file mode 100644 index 000000000..c7fa44884 --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_loopback.cc @@ -0,0 +1,50 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_ip_udp_generic.h" +#include "test/syscalls/linux/socket_non_stream.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return { + IPv6UDPBidirectionalBindSocketPair(0), + IPv4UDPBidirectionalBindSocketPair(0), + DualStackUDPBidirectionalBindSocketPair(0), + }; +} + +INSTANTIATE_TEST_SUITE_P( + AllUDPSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + AllUDPSockets, NonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + AllUDPSockets, UDPSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc new file mode 100644 index 000000000..d6925a8df --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc @@ -0,0 +1,39 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_non_stream_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return { + IPv6UDPBidirectionalBindSocketPair(0), + IPv4UDPBidirectionalBindSocketPair(0), + }; +} + +INSTANTIATE_TEST_SUITE_P( + BlockingUDPSockets, BlockingNonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc new file mode 100644 index 000000000..d675eddc6 --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc @@ -0,0 +1,39 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return { + IPv6UDPBidirectionalBindSocketPair(SOCK_NONBLOCK), + IPv4UDPBidirectionalBindSocketPair(SOCK_NONBLOCK), + }; +} + +INSTANTIATE_TEST_SUITE_P( + NonBlockingUDPSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc new file mode 100644 index 000000000..1c7b0cf90 --- /dev/null +++ b/test/syscalls/linux/socket_ip_unbound.cc @@ -0,0 +1,474 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <cstdio> +#include <cstring> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of IP sockets. +using IPUnboundSocketTest = SimpleSocketTest; + +TEST_P(IPUnboundSocketTest, TtlDefault) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int get = -1; + socklen_t get_sz = sizeof(get); + EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_TRUE(get == 64 || get == 127); + EXPECT_EQ(get_sz, sizeof(get)); +} + +TEST_P(IPUnboundSocketTest, SetTtl) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int get1 = -1; + socklen_t get1_sz = sizeof(get1); + EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get1, &get1_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get1_sz, sizeof(get1)); + + int set = 100; + if (set == get1) { + set += 1; + } + socklen_t set_sz = sizeof(set); + EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set, set_sz), + SyscallSucceedsWithValue(0)); + + int get2 = -1; + socklen_t get2_sz = sizeof(get2); + EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get2, &get2_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get2_sz, sizeof(get2)); + EXPECT_EQ(get2, set); +} + +TEST_P(IPUnboundSocketTest, ResetTtlToDefault) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int get1 = -1; + socklen_t get1_sz = sizeof(get1); + EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get1, &get1_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get1_sz, sizeof(get1)); + + int set1 = 100; + if (set1 == get1) { + set1 += 1; + } + socklen_t set1_sz = sizeof(set1); + EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set1, set1_sz), + SyscallSucceedsWithValue(0)); + + int set2 = -1; + socklen_t set2_sz = sizeof(set2); + EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set2, set2_sz), + SyscallSucceedsWithValue(0)); + + int get2 = -1; + socklen_t get2_sz = sizeof(get2); + EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get2, &get2_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get2_sz, sizeof(get2)); + EXPECT_EQ(get2, get1); +} + +TEST_P(IPUnboundSocketTest, ZeroTtl) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int set = 0; + socklen_t set_sz = sizeof(set); + EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set, set_sz), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(IPUnboundSocketTest, InvalidLargeTtl) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int set = 256; + socklen_t set_sz = sizeof(set); + EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set, set_sz), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(IPUnboundSocketTest, InvalidNegativeTtl) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int set = -2; + socklen_t set_sz = sizeof(set); + EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set, set_sz), + SyscallFailsWithErrno(EINVAL)); +} + +struct TOSOption { + int level; + int option; + int cmsg_level; +}; + +constexpr int INET_ECN_MASK = 3; + +static TOSOption GetTOSOption(int domain) { + TOSOption opt; + switch (domain) { + case AF_INET: + opt.level = IPPROTO_IP; + opt.option = IP_TOS; + opt.cmsg_level = SOL_IP; + break; + case AF_INET6: + opt.level = IPPROTO_IPV6; + opt.option = IPV6_TCLASS; + opt.cmsg_level = SOL_IPV6; + break; + } + return opt; +} + +TEST_P(IPUnboundSocketTest, TOSDefault) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + TOSOption t = GetTOSOption(GetParam().domain); + int get = -1; + socklen_t get_sz = sizeof(get); + constexpr int kDefaultTOS = 0; + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, sizeof(get)); + EXPECT_EQ(get, kDefaultTOS); +} + +TEST_P(IPUnboundSocketTest, SetTOS) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + int set = 0xC0; + socklen_t set_sz = sizeof(set); + TOSOption t = GetTOSOption(GetParam().domain); + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_sz = sizeof(get); + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, sizeof(get)); + EXPECT_EQ(get, set); +} + +TEST_P(IPUnboundSocketTest, ZeroTOS) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + int set = 0; + socklen_t set_sz = sizeof(set); + TOSOption t = GetTOSOption(GetParam().domain); + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallSucceedsWithValue(0)); + int get = -1; + socklen_t get_sz = sizeof(get); + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, sizeof(get)); + EXPECT_EQ(get, set); +} + +TEST_P(IPUnboundSocketTest, InvalidLargeTOS) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + // Test with exceeding the byte space. + int set = 256; + constexpr int kDefaultTOS = 0; + socklen_t set_sz = sizeof(set); + TOSOption t = GetTOSOption(GetParam().domain); + if (GetParam().domain == AF_INET) { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallSucceedsWithValue(0)); + } else { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallFailsWithErrno(EINVAL)); + } + int get = -1; + socklen_t get_sz = sizeof(get); + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, sizeof(get)); + EXPECT_EQ(get, kDefaultTOS); +} + +TEST_P(IPUnboundSocketTest, CheckSkipECN) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + int set = 0xFF; + socklen_t set_sz = sizeof(set); + TOSOption t = GetTOSOption(GetParam().domain); + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallSucceedsWithValue(0)); + int expect = static_cast<uint8_t>(set); + if (GetParam().protocol == IPPROTO_TCP) { + expect &= ~INET_ECN_MASK; + } + int get = -1; + socklen_t get_sz = sizeof(get); + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, sizeof(get)); + EXPECT_EQ(get, expect); +} + +TEST_P(IPUnboundSocketTest, ZeroTOSOptionSize) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + int set = 0xC0; + socklen_t set_sz = 0; + TOSOption t = GetTOSOption(GetParam().domain); + if (GetParam().domain == AF_INET) { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallSucceedsWithValue(0)); + } else { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallFailsWithErrno(EINVAL)); + } + int get = -1; + socklen_t get_sz = 0; + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, 0); + EXPECT_EQ(get, -1); +} + +TEST_P(IPUnboundSocketTest, SmallTOSOptionSize) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + int set = 0xC0; + constexpr int kDefaultTOS = 0; + TOSOption t = GetTOSOption(GetParam().domain); + for (socklen_t i = 1; i < sizeof(int); i++) { + int expect_tos; + socklen_t expect_sz; + if (GetParam().domain == AF_INET) { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, i), + SyscallSucceedsWithValue(0)); + expect_tos = set; + expect_sz = sizeof(uint8_t); + } else { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, i), + SyscallFailsWithErrno(EINVAL)); + expect_tos = kDefaultTOS; + expect_sz = i; + } + uint get = -1; + socklen_t get_sz = i; + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, expect_sz); + // Account for partial copies by getsockopt, retrieve the lower + // bits specified by get_sz, while comparing against expect_tos. + EXPECT_EQ(get & ~(~0 << (get_sz * 8)), expect_tos); + } +} + +TEST_P(IPUnboundSocketTest, LargeTOSOptionSize) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + int set = 0xC0; + TOSOption t = GetTOSOption(GetParam().domain); + for (socklen_t i = sizeof(int); i < 10; i++) { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, i), + SyscallSucceedsWithValue(0)); + int get = -1; + socklen_t get_sz = i; + // We expect the system call handler to only copy atmost sizeof(int) bytes + // as asserted by the check below. Hence, we do not expect the copy to + // overflow in getsockopt. + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, sizeof(int)); + EXPECT_EQ(get, set); + } +} + +TEST_P(IPUnboundSocketTest, NegativeTOS) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int set = -1; + socklen_t set_sz = sizeof(set); + TOSOption t = GetTOSOption(GetParam().domain); + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallSucceedsWithValue(0)); + int expect; + if (GetParam().domain == AF_INET) { + expect = static_cast<uint8_t>(set); + if (GetParam().protocol == IPPROTO_TCP) { + expect &= ~INET_ECN_MASK; + } + } else { + // On IPv6 TCLASS, setting -1 has the effect of resetting the + // TrafficClass. + expect = 0; + } + int get = -1; + socklen_t get_sz = sizeof(get); + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, sizeof(get)); + EXPECT_EQ(get, expect); +} + +TEST_P(IPUnboundSocketTest, InvalidNegativeTOS) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + int set = -2; + socklen_t set_sz = sizeof(set); + TOSOption t = GetTOSOption(GetParam().domain); + int expect; + if (GetParam().domain == AF_INET) { + ASSERT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallSucceedsWithValue(0)); + expect = static_cast<uint8_t>(set); + if (GetParam().protocol == IPPROTO_TCP) { + expect &= ~INET_ECN_MASK; + } + } else { + ASSERT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + SyscallFailsWithErrno(EINVAL)); + expect = 0; + } + int get = 0; + socklen_t get_sz = sizeof(get); + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_sz, sizeof(get)); + EXPECT_EQ(get, expect); +} + +TEST_P(IPUnboundSocketTest, NullTOS) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + TOSOption t = GetTOSOption(GetParam().domain); + int set_sz = sizeof(int); + if (GetParam().domain == AF_INET) { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, nullptr, set_sz), + SyscallFailsWithErrno(EFAULT)); + } else { // AF_INET6 + // The AF_INET6 behavior is not yet compatible. gVisor will try to read + // optval from user memory at syscall handler, it needs substantial + // refactoring to implement this behavior just for IPv6. + if (IsRunningOnGvisor()) { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, nullptr, set_sz), + SyscallFailsWithErrno(EFAULT)); + } else { + // Linux's IPv6 stack treats nullptr optval as input of 0, so the call + // succeeds. (net/ipv6/ipv6_sockglue.c, do_ipv6_setsockopt()) + // + // Linux's implementation would need fixing as passing a nullptr as optval + // and non-zero optlen may not be valid. + // TODO(b/158666797): Combine the gVisor and linux cases for IPv6. + // Some kernel versions return EFAULT, so we handle both. + EXPECT_THAT( + setsockopt(socket->get(), t.level, t.option, nullptr, set_sz), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(0))); + } + } + socklen_t get_sz = sizeof(int); + EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, nullptr, &get_sz), + SyscallFailsWithErrno(EFAULT)); + int get = -1; + EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, nullptr), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_P(IPUnboundSocketTest, InsufficientBufferTOS) { + SKIP_IF(GetParam().protocol == IPPROTO_TCP); + + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + TOSOption t = GetTOSOption(GetParam().domain); + + in_addr addr4; + in6_addr addr6; + ASSERT_THAT(inet_pton(AF_INET, "127.0.0.1", &addr4), ::testing::Eq(1)); + ASSERT_THAT(inet_pton(AF_INET6, "fe80::", &addr6), ::testing::Eq(1)); + + cmsghdr cmsg = {}; + cmsg.cmsg_len = sizeof(cmsg); + cmsg.cmsg_level = t.cmsg_level; + cmsg.cmsg_type = t.option; + + msghdr msg = {}; + msg.msg_control = &cmsg; + msg.msg_controllen = sizeof(cmsg); + if (GetParam().domain == AF_INET) { + msg.msg_name = &addr4; + msg.msg_namelen = sizeof(addr4); + } else { + msg.msg_name = &addr6; + msg.msg_namelen = sizeof(addr6); + } + + EXPECT_THAT(sendmsg(socket->get(), &msg, 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(IPUnboundSocketTest, ReuseAddrDefault) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int get = -1; + socklen_t get_sz = sizeof(get); + ASSERT_THAT( + getsockopt(socket->get(), SOL_SOCKET, SO_REUSEADDR, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOff); + EXPECT_EQ(get_sz, sizeof(get)); +} + +TEST_P(IPUnboundSocketTest, SetReuseAddr) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ASSERT_THAT(setsockopt(socket->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_sz = sizeof(get); + ASSERT_THAT( + getsockopt(socket->get(), SOL_SOCKET, SO_REUSEADDR, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOn); + EXPECT_EQ(get_sz, sizeof(get)); +} + +INSTANTIATE_TEST_SUITE_P( + IPUnboundSockets, IPUnboundSocketTest, + ::testing::ValuesIn(VecCat<SocketKind>(VecCat<SocketKind>( + ApplyVec<SocketKind>(IPv4UDPUnboundSocket, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{0, + SOCK_NONBLOCK})), + ApplyVec<SocketKind>(IPv6UDPUnboundSocket, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{0, + SOCK_NONBLOCK})), + ApplyVec<SocketKind>(IPv4TCPUnboundSocket, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, + SOCK_NONBLOCK})), + ApplyVec<SocketKind>(IPv6TCPUnboundSocket, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{ + 0, SOCK_NONBLOCK})))))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc new file mode 100644 index 000000000..80f12b0a9 --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc @@ -0,0 +1,66 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h" + +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <cstdio> +#include <cstring> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// Verifies that a newly instantiated TCP socket does not have the +// broadcast socket option enabled. +TEST_P(IPv4TCPUnboundExternalNetworkingSocketTest, TCPBroadcastDefault) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int get = -1; + socklen_t get_sz = sizeof(get); + EXPECT_THAT( + getsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOff); + EXPECT_EQ(get_sz, sizeof(get)); +} + +// Verifies that a newly instantiated TCP socket returns true after enabling +// the broadcast socket option. +TEST_P(IPv4TCPUnboundExternalNetworkingSocketTest, SetTCPBroadcast) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + EXPECT_THAT(setsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_sz = sizeof(get); + EXPECT_THAT( + getsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOn); + EXPECT_EQ(get_sz, sizeof(get)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h new file mode 100644 index 000000000..fb582b224 --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h @@ -0,0 +1,30 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_TCP_UNBOUND_EXTERNAL_NETWORKING_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_TCP_UNBOUND_EXTERNAL_NETWORKING_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to unbound IPv4 TCP sockets in a sandbox +// with external networking support. +using IPv4TCPUnboundExternalNetworkingSocketTest = SimpleSocketTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_TCP_UNBOUND_EXTERNAL_NETWORKING_H_ diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc new file mode 100644 index 000000000..797c4174e --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc @@ -0,0 +1,39 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h" + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketKind> GetSockets() { + return ApplyVec<SocketKind>( + IPv4TCPUnboundSocket, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})); +} + +INSTANTIATE_TEST_SUITE_P(IPv4TCPUnboundSockets, + IPv4TCPUnboundExternalNetworkingSocketTest, + ::testing::ValuesIn(GetSockets())); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc new file mode 100644 index 000000000..de0f5f01b --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -0,0 +1,2456 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ipv4_udp_unbound.h" + +#include <arpa/inet.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <cstdio> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// Check that packets are not received without a group membership. Default send +// interface configured by bind. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNoGroup) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the first FD to the loopback. This is an alternative to + // IP_MULTICAST_IF for setting the default send interface. + auto sender_addr = V4Loopback(); + EXPECT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address. If multicast worked like unicast, + // this would ensure that we get the packet. + auto receiver_addr = V4Any(); + EXPECT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Send the multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we did not receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + EXPECT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Check that not setting a default send interface prevents multicast packets +// from being sent. Group membership interface configured by address. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackAddrNoDefaultSendIf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the second FD to the v4 any address to ensure that we can receive any + // unicast packet. + auto receiver_addr = V4Any(); + EXPECT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallFailsWithErrno(ENETUNREACH)); +} + +// Check that not setting a default send interface prevents multicast packets +// from being sent. Group membership interface configured by NIC ID. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNicNoDefaultSendIf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the second FD to the v4 any address to ensure that we can receive any + // unicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallFailsWithErrno(ENETUNREACH)); +} + +// Check that multicast works when the default send interface is configured by +// bind and the group membership is configured by address. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the first FD to the loopback. This is an alternative to + // IP_MULTICAST_IF for setting the default send interface. + auto sender_addr = V4Loopback(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast works when the default send interface is configured by +// bind and the group membership is configured by NIC ID. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the first FD to the loopback. This is an alternative to + // IP_MULTICAST_IF for setting the default send interface. + auto sender_addr = V4Loopback(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in sendto, and the group +// membership is configured by address. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreq iface = {}; + iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in sendto, and the group +// membership is configured by NIC ID. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreqn iface = {}; + iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in connect, and the group +// membership is configured by address. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrConnect) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreq iface = {}; + iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto connect_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&connect_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + ASSERT_THAT( + RetryEINTR(connect)(socket1->get(), + reinterpret_cast<sockaddr*>(&connect_addr.addr), + connect_addr.addr_len), + SyscallSucceeds()); + + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in connect, and the group +// membership is configured by NIC ID. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicConnect) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreqn iface = {}; + iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto connect_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&connect_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + ASSERT_THAT( + RetryEINTR(connect)(socket1->get(), + reinterpret_cast<sockaddr*>(&connect_addr.addr), + connect_addr.addr_len), + SyscallSucceeds()); + + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in sendto, and the group +// membership is configured by address. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrSelf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreq iface = {}; + iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + // Bind the first FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in sendto, and the group +// membership is configured by NIC ID. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicSelf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreqn iface = {}; + iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + // Bind the first FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in connect, and the group +// membership is configured by address. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrSelfConnect) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreq iface = {}; + iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + // Bind the first FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto connect_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&connect_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + EXPECT_THAT( + RetryEINTR(connect)(socket1->get(), + reinterpret_cast<sockaddr*>(&connect_addr.addr), + connect_addr.addr_len), + SyscallSucceeds()); + + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we did not receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + EXPECT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in connect, and the group +// membership is configured by NIC ID. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicSelfConnect) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreqn iface = {}; + iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + // Bind the first FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto connect_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&connect_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + ASSERT_THAT( + RetryEINTR(connect)(socket1->get(), + reinterpret_cast<sockaddr*>(&connect_addr.addr), + connect_addr.addr_len), + SyscallSucceeds()); + + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we did not receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + EXPECT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in sendto, and the group +// membership is configured by address. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrSelfNoLoop) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreq iface = {}; + iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_LOOP, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + // Bind the first FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast works when the default send interface is configured by +// IP_MULTICAST_IF, the send address is specified in sendto, and the group +// membership is configured by NIC ID. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicSelfNoLoop) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Set the default send interface. + ip_mreqn iface = {}; + iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_LOOP, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that dropping a group membership that does not exist fails. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastInvalidDrop) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Unregister from a membership that we didn't have. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), + SyscallFailsWithErrno(EADDRNOTAVAIL)); +} + +// Check that dropping a group membership prevents multicast packets from being +// delivered. Default send address configured by bind and group membership +// interface configured by address. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastDropAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the first FD to the loopback. This is an alternative to + // IP_MULTICAST_IF for setting the default send interface. + auto sender_addr = V4Loopback(); + EXPECT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + EXPECT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register and unregister to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we did not receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + EXPECT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Check that dropping a group membership prevents multicast packets from being +// delivered. Default send address configured by bind and group membership +// interface configured by NIC ID. +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastDropNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the first FD to the loopback. This is an alternative to + // IP_MULTICAST_IF for setting the default send interface. + auto sender_addr = V4Loopback(); + EXPECT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + EXPECT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register and unregister to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we did not receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + EXPECT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfZero) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreqn iface = {}; + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfInvalidNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreqn iface = {}; + iface.imr_ifindex = -1; + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallFailsWithErrno(EADDRNOTAVAIL)); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfInvalidAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreq iface = {}; + iface.imr_interface.s_addr = inet_addr("255.255.255"); + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallFailsWithErrno(EADDRNOTAVAIL)); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetShort) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Create a valid full-sized request. + ip_mreqn iface = {}; + iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + + // Send an optlen of 1 to check that optlen is enforced. + EXPECT_THAT( + setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, 1), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfDefault) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + in_addr get = {}; + socklen_t size = sizeof(get); + ASSERT_THAT( + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + SyscallSucceeds()); + EXPECT_EQ(size, sizeof(get)); + EXPECT_EQ(get.s_addr, 0); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfDefaultReqn) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreqn get = {}; + socklen_t size = sizeof(get); + ASSERT_THAT( + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + SyscallSucceeds()); + + // getsockopt(IP_MULTICAST_IF) can only return an in_addr, so it treats the + // first sizeof(struct in_addr) bytes of struct ip_mreqn as a struct in_addr. + // Conveniently, this corresponds to the field ip_mreqn::imr_multiaddr. + EXPECT_EQ(size, sizeof(in_addr)); + + // getsockopt(IP_MULTICAST_IF) will only return the interface address which + // hasn't been set. + EXPECT_EQ(get.imr_multiaddr.s_addr, 0); + EXPECT_EQ(get.imr_address.s_addr, 0); + EXPECT_EQ(get.imr_ifindex, 0); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetAddrGetReqn) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + in_addr set = {}; + set.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, + sizeof(set)), + SyscallSucceeds()); + + ip_mreqn get = {}; + socklen_t size = sizeof(get); + ASSERT_THAT( + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + SyscallSucceeds()); + + // getsockopt(IP_MULTICAST_IF) can only return an in_addr, so it treats the + // first sizeof(struct in_addr) bytes of struct ip_mreqn as a struct in_addr. + // Conveniently, this corresponds to the field ip_mreqn::imr_multiaddr. + EXPECT_EQ(size, sizeof(in_addr)); + EXPECT_EQ(get.imr_multiaddr.s_addr, set.s_addr); + EXPECT_EQ(get.imr_address.s_addr, 0); + EXPECT_EQ(get.imr_ifindex, 0); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetReqAddrGetReqn) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreq set = {}; + set.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, + sizeof(set)), + SyscallSucceeds()); + + ip_mreqn get = {}; + socklen_t size = sizeof(get); + ASSERT_THAT( + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + SyscallSucceeds()); + + // getsockopt(IP_MULTICAST_IF) can only return an in_addr, so it treats the + // first sizeof(struct in_addr) bytes of struct ip_mreqn as a struct in_addr. + // Conveniently, this corresponds to the field ip_mreqn::imr_multiaddr. + EXPECT_EQ(size, sizeof(in_addr)); + EXPECT_EQ(get.imr_multiaddr.s_addr, set.imr_interface.s_addr); + EXPECT_EQ(get.imr_address.s_addr, 0); + EXPECT_EQ(get.imr_ifindex, 0); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetNicGetReqn) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreqn set = {}; + set.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, + sizeof(set)), + SyscallSucceeds()); + + ip_mreqn get = {}; + socklen_t size = sizeof(get); + ASSERT_THAT( + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + SyscallSucceeds()); + EXPECT_EQ(size, sizeof(in_addr)); + EXPECT_EQ(get.imr_multiaddr.s_addr, 0); + EXPECT_EQ(get.imr_address.s_addr, 0); + EXPECT_EQ(get.imr_ifindex, 0); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + in_addr set = {}; + set.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, + sizeof(set)), + SyscallSucceeds()); + + in_addr get = {}; + socklen_t size = sizeof(get); + ASSERT_THAT( + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + SyscallSucceeds()); + + EXPECT_EQ(size, sizeof(get)); + EXPECT_EQ(get.s_addr, set.s_addr); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetReqAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreq set = {}; + set.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, + sizeof(set)), + SyscallSucceeds()); + + in_addr get = {}; + socklen_t size = sizeof(get); + ASSERT_THAT( + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + SyscallSucceeds()); + + EXPECT_EQ(size, sizeof(get)); + EXPECT_EQ(get.s_addr, set.imr_interface.s_addr); +} + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreqn set = {}; + set.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, + sizeof(set)), + SyscallSucceeds()); + + in_addr get = {}; + socklen_t size = sizeof(get); + ASSERT_THAT( + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + SyscallSucceeds()); + EXPECT_EQ(size, sizeof(get)); + EXPECT_EQ(get.s_addr, 0); +} + +TEST_P(IPv4UDPUnboundSocketTest, TestJoinGroupNoIf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallFailsWithErrno(ENODEV)); +} + +TEST_P(IPv4UDPUnboundSocketTest, TestJoinGroupInvalidIf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreqn group = {}; + group.imr_address.s_addr = inet_addr("255.255.255"); + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallFailsWithErrno(ENODEV)); +} + +// Check that multiple memberships are not allowed on the same socket. +TEST_P(IPv4UDPUnboundSocketTest, TestMultipleJoinsOnSingleSocket) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto fd = socket1->get(); + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + + EXPECT_THAT( + setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, sizeof(group)), + SyscallSucceeds()); + + EXPECT_THAT( + setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, sizeof(group)), + SyscallFailsWithErrno(EADDRINUSE)); +} + +// Check that two sockets can join the same multicast group at the same time. +TEST_P(IPv4UDPUnboundSocketTest, TestTwoSocketsJoinSameMulticastGroup) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Drop the membership twice on each socket, the second call for each socket + // should fail. + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), + SyscallFailsWithErrno(EADDRNOTAVAIL)); + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), + SyscallFailsWithErrno(EADDRNOTAVAIL)); +} + +// Check that two sockets can join the same multicast group at the same time, +// and both will receive data on it. +TEST_P(IPv4UDPUnboundSocketTest, TestMcastReceptionOnTwoSockets) { + std::unique_ptr<SocketPair> socket_pairs[2] = { + absl::make_unique<FDSocketPair>(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket())), + absl::make_unique<FDSocketPair>(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket()))}; + + ip_mreq iface = {}, group = {}; + iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + auto receiver_addr = V4Any(); + int bound_port = 0; + + // Create two socketpairs with the exact same configuration. + for (auto& sockets : socket_pairs) { + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, + &iface, sizeof(iface)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, + &group, sizeof(group)), + SyscallSucceeds()); + ASSERT_THAT(bind(sockets->second_fd(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + // Get the port assigned. + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(sockets->second_fd(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + // On the first iteration, save the port we are bound to. On the second + // iteration, verify the port is the same as the one from the first + // iteration. In other words, both sockets listen on the same port. + if (bound_port == 0) { + bound_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + } else { + EXPECT_EQ(bound_port, + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port); + } + } + + // Send a multicast packet to the group from two different sockets and verify + // it is received by both sockets that joined that group. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port; + for (auto& sockets : socket_pairs) { + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT( + RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet on both sockets. + for (auto& sockets : socket_pairs) { + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT( + RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); + } + } +} + +// Check that on two sockets that joined a group and listen on ANY, dropping +// memberships one by one will continue to deliver packets to both sockets until +// both memberships have been dropped. +TEST_P(IPv4UDPUnboundSocketTest, TestMcastReceptionWhenDroppingMemberships) { + std::unique_ptr<SocketPair> socket_pairs[2] = { + absl::make_unique<FDSocketPair>(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket())), + absl::make_unique<FDSocketPair>(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket()))}; + + ip_mreq iface = {}, group = {}; + iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + auto receiver_addr = V4Any(); + int bound_port = 0; + + // Create two socketpairs with the exact same configuration. + for (auto& sockets : socket_pairs) { + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, + &iface, sizeof(iface)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, + &group, sizeof(group)), + SyscallSucceeds()); + ASSERT_THAT(bind(sockets->second_fd(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + // Get the port assigned. + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(sockets->second_fd(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + // On the first iteration, save the port we are bound to. On the second + // iteration, verify the port is the same as the one from the first + // iteration. In other words, both sockets listen on the same port. + if (bound_port == 0) { + bound_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + } else { + EXPECT_EQ(bound_port, + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port); + } + } + + // Drop the membership of the first socket pair and verify data is still + // received. + ASSERT_THAT(setsockopt(socket_pairs[0]->second_fd(), IPPROTO_IP, + IP_DROP_MEMBERSHIP, &group, sizeof(group)), + SyscallSucceeds()); + // Send a packet from each socket_pair. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port; + for (auto& sockets : socket_pairs) { + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT( + RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet on both sockets. + for (auto& sockets : socket_pairs) { + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT( + RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); + } + } + + // Drop the membership of the second socket pair and verify data stops being + // received. + ASSERT_THAT(setsockopt(socket_pairs[1]->second_fd(), IPPROTO_IP, + IP_DROP_MEMBERSHIP, &group, sizeof(group)), + SyscallSucceeds()); + // Send a packet from each socket_pair. + for (auto& sockets : socket_pairs) { + char send_buf[200]; + ASSERT_THAT( + RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + char recv_buf[sizeof(send_buf)] = {}; + for (auto& sockets : socket_pairs) { + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf, + sizeof(recv_buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + } + } +} + +// Check that a receiving socket can bind to the multicast address before +// joining the group and receive data once the group has been joined. +TEST_P(IPv4UDPUnboundSocketTest, TestBindToMcastThenJoinThenReceive) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind second socket (receiver) to the multicast address. + auto receiver_addr = V4Multicast(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + // Update receiver_addr with the correct port number. + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet on the first socket out the loopback interface. + ip_mreq iface = {}; + iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + auto sendto_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallSucceedsWithValue(sizeof(recv_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that a receiving socket can bind to the multicast address and won't +// receive multicast data if it hasn't joined the group. +TEST_P(IPv4UDPUnboundSocketTest, TestBindToMcastThenNoJoinThenNoReceive) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind second socket (receiver) to the multicast address. + auto receiver_addr = V4Multicast(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + // Update receiver_addr with the correct port number. + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Send a multicast packet on the first socket out the loopback interface. + ip_mreq iface = {}; + iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + auto sendto_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we don't receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Check that a socket can bind to a multicast address and still send out +// packets. +TEST_P(IPv4UDPUnboundSocketTest, TestBindToMcastThenSend) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind second socket (receiver) to the ANY address. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Bind the first socket (sender) to the multicast address. + auto sender_addr = V4Multicast(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + socklen_t sender_addr_len = sender_addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&sender_addr.addr), + &sender_addr_len), + SyscallSucceeds()); + EXPECT_EQ(sender_addr_len, sender_addr.addr_len); + + // Send a packet on the first socket to the loopback address. + auto sendto_addr = V4Loopback(); + reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallSucceedsWithValue(sizeof(recv_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that a receiving socket can bind to the broadcast address and receive +// broadcast packets. +TEST_P(IPv4UDPUnboundSocketTest, TestBindToBcastThenReceive) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind second socket (receiver) to the broadcast address. + auto receiver_addr = V4Broadcast(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Send a broadcast packet on the first socket out the loopback interface. + EXPECT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + // Note: Binding to the loopback interface makes the broadcast go out of it. + auto sender_bind_addr = V4Loopback(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_bind_addr.addr), + sender_bind_addr.addr_len), + SyscallSucceeds()); + auto sendto_addr = V4Broadcast(); + reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallSucceedsWithValue(sizeof(recv_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that a socket can bind to the broadcast address and still send out +// packets. +TEST_P(IPv4UDPUnboundSocketTest, TestBindToBcastThenSend) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind second socket (receiver) to the ANY address. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(socket2->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Bind the first socket (sender) to the broadcast address. + auto sender_addr = V4Broadcast(); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + socklen_t sender_addr_len = sender_addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&sender_addr.addr), + &sender_addr_len), + SyscallSucceeds()); + EXPECT_EQ(sender_addr_len, sender_addr.addr_len); + + // Send a packet on the first socket to the loopback address. + auto sendto_addr = V4Loopback(); + reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallSucceedsWithValue(sizeof(recv_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that SO_REUSEADDR always delivers to the most recently bound socket. +// +// FIXME(gvisor.dev/issue/873): Endpoint order is not restored correctly. Enable +// random and co-op save (below) once that is fixed. +TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrDistribution_NoRandomSave) { + std::vector<std::unique_ptr<FileDescriptor>> sockets; + sockets.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(NewSocket())); + + ASSERT_THAT(setsockopt(sockets[0]->get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(sockets[0]->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(sockets[0]->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + constexpr int kMessageSize = 200; + + // FIXME(gvisor.dev/issue/873): Endpoint order is not restored correctly. + const DisableSave ds; + + for (int i = 0; i < 10; i++) { + // Add a new receiver. + sockets.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(NewSocket())); + auto& last = sockets.back(); + ASSERT_THAT(setsockopt(last->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(last->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Send a new message to the SO_REUSEADDR group. We use a new socket each + // time so that a new ephemeral port will be used each time. This ensures + // that we aren't doing REUSEPORT-like hash load blancing. + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + char send_buf[kMessageSize]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + EXPECT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Verify that the most recent socket got the message. We don't expect any + // of the other sockets to have received it, but we will check that later. + char recv_buf[sizeof(send_buf)] = {}; + EXPECT_THAT( + RetryEINTR(recv)(last->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), + SyscallSucceedsWithValue(sizeof(send_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); + } + + // Verify that no other messages were received. + for (auto& socket : sockets) { + char recv_buf[kMessageSize] = {}; + EXPECT_THAT(RetryEINTR(recv)(socket->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + } +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrThenReusePort) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReusePortThenReuseAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertibleToReusePort) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Bind socket3 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertibleToReuseAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Bind socket3 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConversionReversable1) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Close socket2 to revert to just socket1 with REUSEADDR and REUSEPORT. + socket2->reset(); + + // Bind socket3 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConversionReversable2) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Close socket2 to revert to just socket1 with REUSEADDR and REUSEPORT. + socket2->reset(); + + // Bind socket3 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindDoubleReuseAddrReusePortThenReusePort) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, also with REUSEADDR and + // REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Bind socket3 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindDoubleReuseAddrReusePortThenReuseAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, also with REUSEADDR and + // REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Bind socket3 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); +} + +// Check that REUSEPORT takes precedence over REUSEADDR. +TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) { + auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ASSERT_THAT(setsockopt(receiver1->get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(receiver1->get(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(receiver1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(receiver1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind receiver2 to the same address as socket1, also with REUSEADDR and + // REUSEPORT. + ASSERT_THAT(setsockopt(receiver2->get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(receiver2->get(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(receiver2->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + constexpr int kMessageSize = 10; + + for (int i = 0; i < 100; ++i) { + // Send a new message to the REUSEADDR/REUSEPORT group. We use a new socket + // each time so that a new ephemerial port will be used each time. This + // ensures that we cycle through hashes. + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + char send_buf[kMessageSize] = {}; + EXPECT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + } + + // Check that both receivers got messages. This checks that we are using load + // balancing (REUSEPORT) instead of the most recently bound socket + // (REUSEADDR). + char recv_buf[kMessageSize] = {}; + EXPECT_THAT(RetryEINTR(recv)(receiver1->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallSucceedsWithValue(kMessageSize)); + EXPECT_THAT(RetryEINTR(recv)(receiver2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallSucceedsWithValue(kMessageSize)); +} + +// Check that connect returns EADDRNOTAVAIL when out of local ephemeral ports. +// We disable S/R because this test creates a large number of sockets. +TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) { + auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + constexpr int kClients = 65536; + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(receiver1->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(receiver1->get(), + reinterpret_cast<sockaddr*>(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Disable cooperative S/R as we are making too many syscalls. + DisableSave ds; + std::vector<std::unique_ptr<FileDescriptor>> sockets; + for (int i = 0; i < kClients; i++) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int ret = connect(s->get(), reinterpret_cast<sockaddr*>(&addr.addr), + addr.addr_len); + if (ret == 0) { + sockets.push_back(std::move(s)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN)); + break; + } +} + +// Test that socket will receive packet info control message. +TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) { + // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. + SKIP_IF((IsRunningWithHostinet())); + + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto sender_addr = V4Loopback(); + int level = SOL_IP; + int type = IP_PKTINFO; + + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + socklen_t sender_addr_len = sender_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&sender_addr.addr), + &sender_addr_len), + SyscallSucceeds()); + EXPECT_EQ(sender_addr_len, sender_addr.addr_len); + + auto receiver_addr = V4Loopback(); + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&sender_addr.addr)->sin_port; + ASSERT_THAT( + connect(sender->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + + // Allow socket to receive control message. + ASSERT_THAT( + setsockopt(receiver->get(), level, type, &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Prepare message to send. + constexpr size_t kDataLength = 1024; + msghdr sent_msg = {}; + iovec sent_iov = {}; + char sent_data[kDataLength]; + sent_iov.iov_base = sent_data; + sent_iov.iov_len = kDataLength; + sent_msg.msg_iov = &sent_iov; + sent_msg.msg_iovlen = 1; + sent_msg.msg_flags = 0; + + ASSERT_THAT(RetryEINTR(sendmsg)(sender->get(), &sent_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + msghdr received_msg = {}; + iovec received_iov = {}; + char received_data[kDataLength]; + char received_cmsg_buf[CMSG_SPACE(sizeof(in_pktinfo))] = {}; + size_t cmsg_data_len = sizeof(in_pktinfo); + received_iov.iov_base = received_data; + received_iov.iov_len = kDataLength; + received_msg.msg_iov = &received_iov; + received_msg.msg_iovlen = 1; + received_msg.msg_controllen = CMSG_LEN(cmsg_data_len); + received_msg.msg_control = received_cmsg_buf; + + ASSERT_THAT(RetryEINTR(recvmsg)(receiver->get(), &received_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len)); + EXPECT_EQ(cmsg->cmsg_level, level); + EXPECT_EQ(cmsg->cmsg_type, type); + + // Get loopback index. + ifreq ifr = {}; + absl::SNPrintF(ifr.ifr_name, IFNAMSIZ, "lo"); + ASSERT_THAT(ioctl(sender->get(), SIOCGIFINDEX, &ifr), SyscallSucceeds()); + ASSERT_NE(ifr.ifr_ifindex, 0); + + // Check the data + in_pktinfo received_pktinfo = {}; + memcpy(&received_pktinfo, CMSG_DATA(cmsg), sizeof(in_pktinfo)); + EXPECT_EQ(received_pktinfo.ipi_ifindex, ifr.ifr_ifindex); + EXPECT_EQ(received_pktinfo.ipi_spec_dst.s_addr, htonl(INADDR_LOOPBACK)); + EXPECT_EQ(received_pktinfo.ipi_addr.s_addr, htonl(INADDR_LOOPBACK)); +} + +// Check that setting SO_RCVBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufBelowMin) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Discover minimum buffer size by setting it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &below_min, + sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_RCVBUF above max is clamped to the maximum +// receive buffer size. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufAboveMax) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Discover maxmimum buffer size by setting to a really large value. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &above_max, + sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_RCVBUF min <= rcvBufSz <= max is honored. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBuf) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int max = 0; + int min = 0; + { + // Discover maxmimum buffer size by setting to a really large value. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by setting it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &quarter_sz, + sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + ASSERT_EQ(quarter_sz, val); +} + +// Check that setting SO_SNDBUF below min is clamped to the minimum +// send buffer size. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufBelowMin) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Discover minimum buffer size by setting it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz, + sizeof(kSndBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &below_min, + sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_SNDBUF above max is clamped to the maximum +// send buffer size. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufAboveMax) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Discover maxmimum buffer size by setting to a really large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz, + sizeof(kSndBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &above_max, + sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBuf) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int max = 0; + int min = 0; + { + // Discover maxmimum buffer size by setting to a really large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz, + sizeof(kSndBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by setting it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz, + sizeof(kSndBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &quarter_sz, + sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + + ASSERT_EQ(quarter_sz, val); +} +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.h b/test/syscalls/linux/socket_ipv4_udp_unbound.h new file mode 100644 index 000000000..f64c57645 --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.h @@ -0,0 +1,29 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to IPv4 UDP sockets. +using IPv4UDPUnboundSocketTest = SimpleSocketTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc new file mode 100644 index 000000000..d690d9564 --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc @@ -0,0 +1,1099 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h" + +#include <arpa/inet.h> +#include <ifaddrs.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <cstdint> +#include <cstdio> +#include <cstring> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TestAddress V4EmptyAddress() { + TestAddress t("V4Empty"); + t.addr.ss_family = AF_INET; + t.addr_len = sizeof(sockaddr_in); + return t; +} + +void IPv4UDPUnboundExternalNetworkingSocketTest::SetUp() { + got_if_infos_ = false; + + // Get interface list. + ASSERT_NO_ERRNO(if_helper_.Load()); + std::vector<std::string> if_names = if_helper_.InterfaceList(AF_INET); + if (if_names.size() != 2) { + return; + } + + // Figure out which interface is where. + std::string lo = if_names[0]; + std::string eth = if_names[1]; + if (lo != "lo") std::swap(lo, eth); + if (lo != "lo") return; + + lo_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(lo)); + auto lo_if_addr = if_helper_.GetAddr(AF_INET, lo); + if (lo_if_addr == nullptr) { + return; + } + lo_if_addr_ = *reinterpret_cast<const sockaddr_in*>(lo_if_addr); + + eth_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(eth)); + auto eth_if_addr = if_helper_.GetAddr(AF_INET, eth); + if (eth_if_addr == nullptr) { + return; + } + eth_if_addr_ = *reinterpret_cast<const sockaddr_in*>(eth_if_addr); + + got_if_infos_ = true; +} + +// Verifies that a newly instantiated UDP socket does not have the +// broadcast socket option enabled. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, UDPBroadcastDefault) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int get = -1; + socklen_t get_sz = sizeof(get); + EXPECT_THAT( + getsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOff); + EXPECT_EQ(get_sz, sizeof(get)); +} + +// Verifies that a newly instantiated UDP socket returns true after enabling +// the broadcast socket option. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, SetUDPBroadcast) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + EXPECT_THAT(setsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_sz = sizeof(get); + EXPECT_THAT( + getsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOn); + EXPECT_EQ(get_sz, sizeof(get)); +} + +// Verifies that a broadcast UDP packet will arrive at all UDP sockets with +// the destination port number. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + UDPBroadcastReceivedOnExpectedPort) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto rcvr1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto rcvr2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto norcv = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Enable SO_BROADCAST on the sending socket. + ASSERT_THAT(setsockopt(sender->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + // Enable SO_REUSEPORT on the receiving sockets so that they may both be bound + // to the broadcast messages destination port. + ASSERT_THAT(setsockopt(rcvr1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(setsockopt(rcvr2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + // Bind the first socket to the ANY address and let the system assign a port. + auto rcv1_addr = V4Any(); + ASSERT_THAT(bind(rcvr1->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr), + rcv1_addr.addr_len), + SyscallSucceedsWithValue(0)); + // Retrieve port number from first socket so that it can be bound to the + // second socket. + socklen_t rcv_addr_sz = rcv1_addr.addr_len; + ASSERT_THAT( + getsockname(rcvr1->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr), + &rcv_addr_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(rcv_addr_sz, rcv1_addr.addr_len); + auto port = reinterpret_cast<sockaddr_in*>(&rcv1_addr.addr)->sin_port; + + // Bind the second socket to the same address:port as the first. + ASSERT_THAT(bind(rcvr2->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr), + rcv_addr_sz), + SyscallSucceedsWithValue(0)); + + // Bind the non-receiving socket to an ephemeral port. + auto norecv_addr = V4Any(); + ASSERT_THAT(bind(norcv->get(), reinterpret_cast<sockaddr*>(&norecv_addr.addr), + norecv_addr.addr_len), + SyscallSucceedsWithValue(0)); + + // Broadcast a test message. + auto dst_addr = V4Broadcast(); + reinterpret_cast<sockaddr_in*>(&dst_addr.addr)->sin_port = port; + constexpr char kTestMsg[] = "hello, world"; + EXPECT_THAT( + sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0, + reinterpret_cast<sockaddr*>(&dst_addr.addr), dst_addr.addr_len), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + + // Verify that the receiving sockets received the test message. + char buf[sizeof(kTestMsg)] = {}; + EXPECT_THAT(recv(rcvr1->get(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg))); + memset(buf, 0, sizeof(buf)); + EXPECT_THAT(recv(rcvr2->get(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg))); + + // Verify that the non-receiving socket did not receive the test message. + memset(buf, 0, sizeof(buf)); + EXPECT_THAT(RetryEINTR(recv)(norcv->get(), buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Verifies that a broadcast UDP packet will arrive at all UDP sockets bound to +// the destination port number and either INADDR_ANY or INADDR_BROADCAST, but +// not a unicast address. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + UDPBroadcastReceivedOnExpectedAddresses) { + // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its + // IPv4 address on eth0. + SKIP_IF(!got_if_infos_); + + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto rcvr1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto rcvr2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto norcv = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Enable SO_BROADCAST on the sending socket. + ASSERT_THAT(setsockopt(sender->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + // Enable SO_REUSEPORT on all sockets so that they may all be bound to the + // broadcast messages destination port. + ASSERT_THAT(setsockopt(rcvr1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(setsockopt(rcvr2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(setsockopt(norcv->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + // Bind the first socket the ANY address and let the system assign a port. + auto rcv1_addr = V4Any(); + ASSERT_THAT(bind(rcvr1->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr), + rcv1_addr.addr_len), + SyscallSucceedsWithValue(0)); + // Retrieve port number from first socket so that it can be bound to the + // second socket. + socklen_t rcv_addr_sz = rcv1_addr.addr_len; + ASSERT_THAT( + getsockname(rcvr1->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr), + &rcv_addr_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(rcv_addr_sz, rcv1_addr.addr_len); + auto port = reinterpret_cast<sockaddr_in*>(&rcv1_addr.addr)->sin_port; + + // Bind the second socket to the broadcast address. + auto rcv2_addr = V4Broadcast(); + reinterpret_cast<sockaddr_in*>(&rcv2_addr.addr)->sin_port = port; + ASSERT_THAT(bind(rcvr2->get(), reinterpret_cast<sockaddr*>(&rcv2_addr.addr), + rcv2_addr.addr_len), + SyscallSucceedsWithValue(0)); + + // Bind the non-receiving socket to the unicast ethernet address. + auto norecv_addr = rcv1_addr; + reinterpret_cast<sockaddr_in*>(&norecv_addr.addr)->sin_addr = + eth_if_addr_.sin_addr; + ASSERT_THAT(bind(norcv->get(), reinterpret_cast<sockaddr*>(&norecv_addr.addr), + norecv_addr.addr_len), + SyscallSucceedsWithValue(0)); + + // Broadcast a test message. + auto dst_addr = V4Broadcast(); + reinterpret_cast<sockaddr_in*>(&dst_addr.addr)->sin_port = port; + constexpr char kTestMsg[] = "hello, world"; + EXPECT_THAT( + sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0, + reinterpret_cast<sockaddr*>(&dst_addr.addr), dst_addr.addr_len), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + + // Verify that the receiving sockets received the test message. + char buf[sizeof(kTestMsg)] = {}; + EXPECT_THAT(recv(rcvr1->get(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg))); + memset(buf, 0, sizeof(buf)); + EXPECT_THAT(recv(rcvr2->get(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg))); + + // Verify that the non-receiving socket did not receive the test message. + memset(buf, 0, sizeof(buf)); + EXPECT_THAT(RetryEINTR(recv)(norcv->get(), buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Verifies that a UDP broadcast can be sent and then received back on the same +// socket that is bound to the broadcast address (255.255.255.255). +// FIXME(b/141938460): This can be combined with the next test +// (UDPBroadcastSendRecvOnSocketBoundToAny). +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + UDPBroadcastSendRecvOnSocketBoundToBroadcast) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Enable SO_BROADCAST. + ASSERT_THAT(setsockopt(sender->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + // Bind the sender to the broadcast address. + auto src_addr = V4Broadcast(); + ASSERT_THAT(bind(sender->get(), reinterpret_cast<sockaddr*>(&src_addr.addr), + src_addr.addr_len), + SyscallSucceedsWithValue(0)); + socklen_t src_sz = src_addr.addr_len; + ASSERT_THAT(getsockname(sender->get(), + reinterpret_cast<sockaddr*>(&src_addr.addr), &src_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(src_sz, src_addr.addr_len); + + // Send the message. + auto dst_addr = V4Broadcast(); + reinterpret_cast<sockaddr_in*>(&dst_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&src_addr.addr)->sin_port; + constexpr char kTestMsg[] = "hello, world"; + EXPECT_THAT( + sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0, + reinterpret_cast<sockaddr*>(&dst_addr.addr), dst_addr.addr_len), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + + // Verify that the message was received. + char buf[sizeof(kTestMsg)] = {}; + EXPECT_THAT(RetryEINTR(recv)(sender->get(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg))); +} + +// Verifies that a UDP broadcast can be sent and then received back on the same +// socket that is bound to the ANY address (0.0.0.0). +// FIXME(b/141938460): This can be combined with the previous test +// (UDPBroadcastSendRecvOnSocketBoundToBroadcast). +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + UDPBroadcastSendRecvOnSocketBoundToAny) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Enable SO_BROADCAST. + ASSERT_THAT(setsockopt(sender->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + // Bind the sender to the ANY address. + auto src_addr = V4Any(); + ASSERT_THAT(bind(sender->get(), reinterpret_cast<sockaddr*>(&src_addr.addr), + src_addr.addr_len), + SyscallSucceedsWithValue(0)); + socklen_t src_sz = src_addr.addr_len; + ASSERT_THAT(getsockname(sender->get(), + reinterpret_cast<sockaddr*>(&src_addr.addr), &src_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(src_sz, src_addr.addr_len); + + // Send the message. + auto dst_addr = V4Broadcast(); + reinterpret_cast<sockaddr_in*>(&dst_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&src_addr.addr)->sin_port; + constexpr char kTestMsg[] = "hello, world"; + EXPECT_THAT( + sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0, + reinterpret_cast<sockaddr*>(&dst_addr.addr), dst_addr.addr_len), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + + // Verify that the message was received. + char buf[sizeof(kTestMsg)] = {}; + EXPECT_THAT(RetryEINTR(recv)(sender->get(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg))); +} + +// Verifies that a UDP broadcast fails to send on a socket with SO_BROADCAST +// disabled. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendBroadcast) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Broadcast a test message without having enabled SO_BROADCAST on the sending + // socket. + auto addr = V4Broadcast(); + reinterpret_cast<sockaddr_in*>(&addr.addr)->sin_port = htons(12345); + constexpr char kTestMsg[] = "hello, world"; + + EXPECT_THAT(sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0, + reinterpret_cast<sockaddr*>(&addr.addr), addr.addr_len), + SyscallFailsWithErrno(EACCES)); +} + +// Verifies that a UDP unicast on an unbound socket reaches its destination. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendUnicastOnUnbound) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto rcvr = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the receiver and retrieve its address and port number. + sockaddr_in addr = {}; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = htons(0); + ASSERT_THAT(bind(rcvr->get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceedsWithValue(0)); + memset(&addr, 0, sizeof(addr)); + socklen_t addr_sz = sizeof(addr); + ASSERT_THAT(getsockname(rcvr->get(), + reinterpret_cast<struct sockaddr*>(&addr), &addr_sz), + SyscallSucceedsWithValue(0)); + + // Send a test message to the receiver. + constexpr char kTestMsg[] = "hello, world"; + ASSERT_THAT(sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0, + reinterpret_cast<struct sockaddr*>(&addr), addr_sz), + SyscallSucceedsWithValue(sizeof(kTestMsg))); + char buf[sizeof(kTestMsg)] = {}; + ASSERT_THAT(recv(rcvr->get(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(kTestMsg))); +} + +// Check that multicast packets won't be delivered to the sending socket with no +// set interface or group membership. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + TestSendMulticastSelfNoGroup) { + // FIXME(b/125485338): A group membership is not required for external + // multicast on gVisor. + SKIP_IF(IsRunningOnGvisor()); + + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + auto bind_addr = V4Any(); + ASSERT_THAT(bind(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr), + bind_addr.addr_len), + SyscallSucceeds()); + socklen_t bind_addr_len = bind_addr.addr_len; + ASSERT_THAT( + getsockname(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr), + &bind_addr_len), + SyscallSucceeds()); + EXPECT_EQ(bind_addr_len, bind_addr.addr_len); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&bind_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we did not receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT( + RetryEINTR(recv)(socket->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Check that multicast packets will be delivered to the sending socket without +// setting an interface. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastSelf) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + auto bind_addr = V4Any(); + ASSERT_THAT(bind(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr), + bind_addr.addr_len), + SyscallSucceeds()); + socklen_t bind_addr_len = bind_addr.addr_len; + ASSERT_THAT( + getsockname(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr), + &bind_addr_len), + SyscallSucceeds()); + EXPECT_EQ(bind_addr_len, bind_addr.addr_len); + + // Register to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + ASSERT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&bind_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(socket->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast packets won't be delivered to the sending socket with no +// set interface and IP_MULTICAST_LOOP disabled. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + TestSendMulticastSelfLoopOff) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + auto bind_addr = V4Any(); + ASSERT_THAT(bind(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr), + bind_addr.addr_len), + SyscallSucceeds()); + socklen_t bind_addr_len = bind_addr.addr_len; + ASSERT_THAT( + getsockname(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr), + &bind_addr_len), + SyscallSucceeds()); + EXPECT_EQ(bind_addr_len, bind_addr.addr_len); + + // Disable multicast looping. + EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_MULTICAST_LOOP, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + // Register to receive multicast packets. + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&bind_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(socket->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we did not receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + EXPECT_THAT( + RetryEINTR(recv)(socket->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Check that multicast packets won't be delivered to another socket with no +// set interface or group membership. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastNoGroup) { + // FIXME(b/125485338): A group membership is not required for external + // multicast on gVisor. + SKIP_IF(IsRunningOnGvisor()); + + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we did not receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Check that multicast packets will be delivered to another socket without +// setting an interface. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticast) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that multicast packets won't be delivered to another socket with no +// set interface and IP_MULTICAST_LOOP disabled on the sending socket. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + TestSendMulticastSenderNoLoop) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Disable multicast looping on the sender. + EXPECT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_LOOP, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + EXPECT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we did not receive the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Check that multicast packets will be delivered to the sending socket without +// setting an interface and IP_MULTICAST_LOOP disabled on the receiving socket. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + TestSendMulticastReceiverNoLoop) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Disable multicast looping on the receiver. + ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_MULTICAST_LOOP, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT(RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); +} + +// Check that two sockets can join the same multicast group at the same time, +// and both will receive data on it when bound to the ANY address. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + TestSendMulticastToTwoBoundToAny) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + std::unique_ptr<FileDescriptor> receivers[2] = { + ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket())}; + + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + auto receiver_addr = V4Any(); + int bound_port = 0; + for (auto& receiver : receivers) { + ASSERT_THAT(setsockopt(receiver->get(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + // Bind to ANY to receive multicast packets. + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + EXPECT_EQ( + htonl(INADDR_ANY), + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr); + // On the first iteration, save the port we are bound to. On the second + // iteration, verify the port is the same as the one from the first + // iteration. In other words, both sockets listen on the same port. + if (bound_port == 0) { + bound_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + } else { + EXPECT_EQ(bound_port, + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port); + } + + // Register to receive multicast packets. + ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, + &group, sizeof(group)), + SyscallSucceeds()); + } + + // Send a multicast packet to the group and verify both receivers get it. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + for (auto& receiver : receivers) { + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT( + RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); + } +} + +// Check that two sockets can join the same multicast group at the same time, +// and both will receive data on it when bound to the multicast address. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + TestSendMulticastToTwoBoundToMulticastAddress) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + std::unique_ptr<FileDescriptor> receivers[2] = { + ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket())}; + + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + auto receiver_addr = V4Multicast(); + int bound_port = 0; + for (auto& receiver : receivers) { + ASSERT_THAT(setsockopt(receiver->get(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + EXPECT_EQ( + inet_addr(kMulticastAddress), + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr); + // On the first iteration, save the port we are bound to. On the second + // iteration, verify the port is the same as the one from the first + // iteration. In other words, both sockets listen on the same port. + if (bound_port == 0) { + bound_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + } else { + EXPECT_EQ( + inet_addr(kMulticastAddress), + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr); + EXPECT_EQ(bound_port, + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port); + } + + // Register to receive multicast packets. + ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, + &group, sizeof(group)), + SyscallSucceeds()); + } + + // Send a multicast packet to the group and verify both receivers get it. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + for (auto& receiver : receivers) { + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT( + RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); + } +} + +// Check that two sockets can join the same multicast group at the same time, +// and with one bound to the wildcard address and the other bound to the +// multicast address, both will receive data. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + TestSendMulticastToTwoBoundToAnyAndMulticastAddress) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + std::unique_ptr<FileDescriptor> receivers[2] = { + ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket())}; + + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + // The first receiver binds to the wildcard address. + auto receiver_addr = V4Any(); + int bound_port = 0; + for (auto& receiver : receivers) { + ASSERT_THAT(setsockopt(receiver->get(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + // On the first iteration, save the port we are bound to and change the + // receiver address from V4Any to V4Multicast so the second receiver binds + // to that. On the second iteration, verify the port is the same as the one + // from the first iteration but the address is different. + if (bound_port == 0) { + EXPECT_EQ( + htonl(INADDR_ANY), + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr); + bound_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + receiver_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port = + bound_port; + } else { + EXPECT_EQ( + inet_addr(kMulticastAddress), + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr); + EXPECT_EQ(bound_port, + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port); + } + + // Register to receive multicast packets. + ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, + &group, sizeof(group)), + SyscallSucceeds()); + } + + // Send a multicast packet to the group and verify both receivers get it. + auto send_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + for (auto& receiver : receivers) { + char recv_buf[sizeof(send_buf)] = {}; + ASSERT_THAT( + RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); + } +} + +// Check that when receiving a looped-back multicast packet, its source address +// is not a multicast address. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + IpMulticastLoopbackFromAddr) { + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + int receiver_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + + ip_mreq group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Connect to the multicast address. This binds us to the outgoing interface + // and allows us to get its IP (to be compared against the src-IP on the + // receiver side). + auto sendto_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = receiver_port; + ASSERT_THAT(RetryEINTR(connect)( + sender->get(), reinterpret_cast<sockaddr*>(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceeds()); + auto sender_addr = V4EmptyAddress(); + ASSERT_THAT( + getsockname(sender->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr), + &sender_addr.addr_len), + SyscallSucceeds()); + ASSERT_EQ(sizeof(struct sockaddr_in), sender_addr.addr_len); + sockaddr_in* sender_addr_in = + reinterpret_cast<sockaddr_in*>(&sender_addr.addr); + + // Send a multicast packet. + char send_buf[4] = {}; + ASSERT_THAT(RetryEINTR(send)(sender->get(), send_buf, sizeof(send_buf), 0), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Receive a multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + auto src_addr = V4EmptyAddress(); + ASSERT_THAT( + RetryEINTR(recvfrom)(receiver->get(), recv_buf, sizeof(recv_buf), 0, + reinterpret_cast<sockaddr*>(&src_addr.addr), + &src_addr.addr_len), + SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_EQ(sizeof(struct sockaddr_in), src_addr.addr_len); + sockaddr_in* src_addr_in = reinterpret_cast<sockaddr_in*>(&src_addr.addr); + + // Verify that the received source IP:port matches the sender one. + EXPECT_EQ(sender_addr_in->sin_port, src_addr_in->sin_port); + EXPECT_EQ(sender_addr_in->sin_addr.s_addr, src_addr_in->sin_addr.s_addr); +} + +// Check that when setting the IP_MULTICAST_IF option to both an index pointing +// to the loopback interface and an address pointing to the non-loopback +// interface, a multicast packet sent out uses the latter as its source address. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + IpMulticastLoopbackIfNicAndAddr) { + // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its + // IPv4 address on eth0. + SKIP_IF(!got_if_infos_); + + // Create receiver, bind to ANY and join the multicast group. + auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver_addr = V4Any(); + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast<sockaddr*>(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + int receiver_port = + reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port; + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = lo_if_idx_; + ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), + SyscallSucceeds()); + + // Set outgoing multicast interface config, with NIC and addr pointing to + // different interfaces. + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + ip_mreqn iface = {}; + iface.imr_ifindex = lo_if_idx_; + iface.imr_address = eth_if_addr_.sin_addr; + ASSERT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()); + + // Send a multicast packet. + auto sendto_addr = V4Multicast(); + reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = receiver_port; + char send_buf[4] = {}; + ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Receive a multicast packet. + char recv_buf[sizeof(send_buf)] = {}; + auto src_addr = V4EmptyAddress(); + ASSERT_THAT( + RetryEINTR(recvfrom)(receiver->get(), recv_buf, sizeof(recv_buf), 0, + reinterpret_cast<sockaddr*>(&src_addr.addr), + &src_addr.addr_len), + SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_EQ(sizeof(struct sockaddr_in), src_addr.addr_len); + sockaddr_in* src_addr_in = reinterpret_cast<sockaddr_in*>(&src_addr.addr); + + // FIXME (b/137781162): When sending a multicast packet use the proper logic + // to determine the packet's src-IP. + SKIP_IF(IsRunningOnGvisor()); + + // Verify the received source address. + EXPECT_EQ(eth_if_addr_.sin_addr.s_addr, src_addr_in->sin_addr.s_addr); +} + +// Check that when we are bound to one interface we can set IP_MULTICAST_IF to +// another interface. +TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, + IpMulticastLoopbackBindToOneIfSetMcastIfToAnother) { + // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its + // IPv4 address on eth0. + SKIP_IF(!got_if_infos_); + + // FIXME (b/137790511): When bound to one interface it is not possible to set + // IP_MULTICAST_IF to a different interface. + SKIP_IF(IsRunningOnGvisor()); + + // Create sender and bind to eth interface. + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + ASSERT_THAT(bind(sender->get(), reinterpret_cast<sockaddr*>(ð_if_addr_), + sizeof(eth_if_addr_)), + SyscallSucceeds()); + + // Run through all possible combinations of index and address for + // IP_MULTICAST_IF that selects the loopback interface. + struct { + int imr_ifindex; + struct in_addr imr_address; + } test_data[] = { + {lo_if_idx_, {}}, + {0, lo_if_addr_.sin_addr}, + {lo_if_idx_, lo_if_addr_.sin_addr}, + {lo_if_idx_, eth_if_addr_.sin_addr}, + }; + for (auto t : test_data) { + ip_mreqn iface = {}; + iface.imr_ifindex = t.imr_ifindex; + iface.imr_address = t.imr_address; + EXPECT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), + SyscallSucceeds()) + << "imr_index=" << iface.imr_ifindex + << " imr_address=" << GetAddr4Str(&iface.imr_address); + } +} +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h new file mode 100644 index 000000000..10b90b1e0 --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h @@ -0,0 +1,46 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_EXTERNAL_NETWORKING_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_EXTERNAL_NETWORKING_H_ + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to unbound IPv4 UDP sockets in a sandbox +// with external networking support. +class IPv4UDPUnboundExternalNetworkingSocketTest : public SimpleSocketTest { + protected: + void SetUp(); + + IfAddrHelper if_helper_; + + // got_if_infos_ is set to false if SetUp() could not obtain all interface + // infos that we need. + bool got_if_infos_; + + // Interface infos. + int lo_if_idx_; + int eth_if_idx_; + sockaddr_in lo_if_addr_; + sockaddr_in eth_if_addr_; +}; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_EXTERNAL_NETWORKING_H_ diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc new file mode 100644 index 000000000..f6e64c157 --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc @@ -0,0 +1,39 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h" + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketKind> GetSockets() { + return ApplyVec<SocketKind>( + IPv4UDPUnboundSocket, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})); +} + +INSTANTIATE_TEST_SUITE_P(IPv4UDPUnboundSockets, + IPv4UDPUnboundExternalNetworkingSocketTest, + ::testing::ValuesIn(GetSockets())); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc new file mode 100644 index 000000000..f121c044d --- /dev/null +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc @@ -0,0 +1,32 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_ipv4_udp_unbound.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +INSTANTIATE_TEST_SUITE_P( + IPv4UDPSockets, IPv4UDPUnboundSocketTest, + ::testing::ValuesIn(ApplyVec<SocketKind>(IPv4UDPUnboundSocket, + AllBitwiseCombinations(List<int>{ + 0, SOCK_NONBLOCK})))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc new file mode 100644 index 000000000..15d4b85a7 --- /dev/null +++ b/test/syscalls/linux/socket_netdevice.cc @@ -0,0 +1,184 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "gtest/gtest.h" +#include "absl/base/internal/endian.h" +#include "test/syscalls/linux/socket_netlink_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Tests for netdevice queries. + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +TEST(NetdeviceTest, Loopback) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + // Prepare the request. + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check for a non-zero interface index. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + + // Check that the loopback is zero hardware address. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFHWADDR, &ifr), SyscallSucceeds()); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[0], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[1], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[2], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[3], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[4], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[5], 0); +} + +TEST(NetdeviceTest, Netmask) { + // We need an interface index to identify the loopback device. + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + + // Use a netlink socket to get the netmask, which we'll then compare to the + // netmask obtained via ioctl. + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + // Iterate through messages until we find the one containing the prefix length + // (i.e. netmask) for the loopback device. + int prefixlen = -1; + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), + [&](const struct nlmsghdr* hdr) { + EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWADDR), Eq(NLMSG_DONE))); + + EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI) + << std::hex << hdr->nlmsg_flags; + + EXPECT_EQ(hdr->nlmsg_seq, kSeq); + EXPECT_EQ(hdr->nlmsg_pid, port); + + if (hdr->nlmsg_type != RTM_NEWADDR) { + return; + } + + // RTM_NEWADDR contains at least the header and ifaddrmsg. + EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct ifaddrmsg)); + + struct ifaddrmsg* ifaddrmsg = + reinterpret_cast<struct ifaddrmsg*>(NLMSG_DATA(hdr)); + if (ifaddrmsg->ifa_index == static_cast<uint32_t>(ifr.ifr_ifindex) && + ifaddrmsg->ifa_family == AF_INET) { + prefixlen = ifaddrmsg->ifa_prefixlen; + } + }, + false)); + + ASSERT_GE(prefixlen, 0); + + // Netmask is stored big endian in struct sockaddr_in, so we do the same for + // comparison. + uint32_t mask = 0xffffffff << (32 - prefixlen); + mask = absl::gbswap_32(mask); + + // Check that the loopback interface has the correct subnet mask. + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + ASSERT_THAT(ioctl(sock.get(), SIOCGIFNETMASK, &ifr), SyscallSucceeds()); + EXPECT_EQ(ifr.ifr_netmask.sa_family, AF_INET); + struct sockaddr_in* sin = + reinterpret_cast<struct sockaddr_in*>(&ifr.ifr_netmask); + EXPECT_EQ(sin->sin_addr.s_addr, mask); +} + +TEST(NetdeviceTest, InterfaceName) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + // Prepare the request. + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check for a non-zero interface index. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + + // Check that SIOCGIFNAME finds the loopback interface. + snprintf(ifr.ifr_name, IFNAMSIZ, "foo"); + ASSERT_THAT(ioctl(sock.get(), SIOCGIFNAME, &ifr), SyscallSucceeds()); + EXPECT_STREQ(ifr.ifr_name, "lo"); +} + +TEST(NetdeviceTest, InterfaceFlags) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + // Prepare the request. + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check that SIOCGIFFLAGS marks the interface with IFF_LOOPBACK, IFF_UP, and + // IFF_RUNNING. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFFLAGS, &ifr), SyscallSucceeds()); + EXPECT_EQ(ifr.ifr_flags & IFF_UP, IFF_UP); + EXPECT_EQ(ifr.ifr_flags & IFF_RUNNING, IFF_RUNNING); +} + +TEST(NetdeviceTest, InterfaceMTU) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + // Prepare the request. + struct ifreq ifr = {}; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check that SIOCGIFMTU returns a nonzero MTU. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFMTU, &ifr), SyscallSucceeds()); + EXPECT_GT(ifr.ifr_mtu, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink.cc b/test/syscalls/linux/socket_netlink.cc new file mode 100644 index 000000000..4ec0fd4fa --- /dev/null +++ b/test/syscalls/linux/socket_netlink.cc @@ -0,0 +1,153 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <linux/netlink.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Tests for all netlink socket protocols. + +namespace gvisor { +namespace testing { + +namespace { + +// NetlinkTest parameter is the protocol to test. +using NetlinkTest = ::testing::TestWithParam<int>; + +// Netlink sockets must be SOCK_DGRAM or SOCK_RAW. +TEST_P(NetlinkTest, Types) { + const int protocol = GetParam(); + + EXPECT_THAT(socket(AF_NETLINK, SOCK_STREAM, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_SEQPACKET, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_RDM, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_DCCP, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_PACKET, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + + int fd; + EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_DGRAM, protocol), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_RAW, protocol), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_P(NetlinkTest, AutomaticPort) { + const int protocol = GetParam(); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, protocol)); + + struct sockaddr_nl addr = {}; + addr.nl_family = AF_NETLINK; + + EXPECT_THAT( + bind(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallSucceeds()); + + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, sizeof(addr)); + // This is the only netlink socket in the process, so it should get the PID as + // the port id. + // + // N.B. Another process could theoretically have explicitly reserved our pid + // as a port ID, but that is very unlikely. + EXPECT_EQ(addr.nl_pid, getpid()); +} + +// Calling connect automatically binds to an automatic port. +TEST_P(NetlinkTest, ConnectBinds) { + const int protocol = GetParam(); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, protocol)); + + struct sockaddr_nl addr = {}; + addr.nl_family = AF_NETLINK; + + EXPECT_THAT(connect(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, sizeof(addr)); + + // Each test is running in a pid namespace, so another process can explicitly + // reserve our pid as a port ID. In this case, a negative portid value will be + // set. + if (static_cast<pid_t>(addr.nl_pid) > 0) { + EXPECT_EQ(addr.nl_pid, getpid()); + } + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + + // Connecting again is allowed, but keeps the same port. + EXPECT_THAT(connect(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + + addrlen = sizeof(addr); + EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, sizeof(addr)); + EXPECT_EQ(addr.nl_pid, getpid()); +} + +TEST_P(NetlinkTest, GetPeerName) { + const int protocol = GetParam(); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, protocol)); + + struct sockaddr_nl addr = {}; + socklen_t addrlen = sizeof(addr); + + EXPECT_THAT(getpeername(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, sizeof(addr)); + EXPECT_EQ(addr.nl_family, AF_NETLINK); + // Peer is the kernel if we didn't connect elsewhere. + EXPECT_EQ(addr.nl_pid, 0); +} + +INSTANTIATE_TEST_SUITE_P(ProtocolTest, NetlinkTest, + ::testing::Values(NETLINK_ROUTE, + NETLINK_KOBJECT_UEVENT)); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc new file mode 100644 index 000000000..e6647a1c3 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -0,0 +1,935 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <ifaddrs.h> +#include <linux/if.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include <iostream> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" +#include "test/syscalls/linux/socket_netlink_route_util.h" +#include "test/syscalls/linux/socket_netlink_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Tests for NETLINK_ROUTE sockets. + +namespace gvisor { +namespace testing { + +namespace { + +constexpr uint32_t kSeq = 12345; + +using ::testing::AnyOf; +using ::testing::Eq; + +// Parameters for SockOptTest. They are: +// 0: Socket option to query. +// 1: A predicate to run on the returned sockopt value. Should return true if +// the value is considered ok. +// 2: A description of what the sockopt value is expected to be. Should complete +// the sentence "<value> was unexpected, expected <description>" +using SockOptTest = ::testing::TestWithParam< + std::tuple<int, std::function<bool(int)>, std::string>>; + +TEST_P(SockOptTest, GetSockOpt) { + int sockopt = std::get<0>(GetParam()); + auto verifier = std::get<1>(GetParam()); + std::string verifier_description = std::get<2>(GetParam()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); + + int res; + socklen_t len = sizeof(res); + + EXPECT_THAT(getsockopt(fd.get(), SOL_SOCKET, sockopt, &res, &len), + SyscallSucceeds()); + + EXPECT_EQ(len, sizeof(res)); + EXPECT_TRUE(verifier(res)) << absl::StrFormat( + "getsockopt(%d, SOL_SOCKET, %d, &res, &len) => res=%d was unexpected, " + "expected %s", + fd.get(), sockopt, res, verifier_description); +} + +std::function<bool(int)> IsPositive() { + return [](int val) { return val > 0; }; +} + +std::function<bool(int)> IsEqual(int target) { + return [target](int val) { return val == target; }; +} + +INSTANTIATE_TEST_SUITE_P( + NetlinkRouteTest, SockOptTest, + ::testing::Values( + std::make_tuple(SO_SNDBUF, IsPositive(), "positive send buffer size"), + std::make_tuple(SO_RCVBUF, IsPositive(), + "positive receive buffer size"), + std::make_tuple(SO_TYPE, IsEqual(SOCK_RAW), + absl::StrFormat("SOCK_RAW (%d)", SOCK_RAW)), + std::make_tuple(SO_DOMAIN, IsEqual(AF_NETLINK), + absl::StrFormat("AF_NETLINK (%d)", AF_NETLINK)), + std::make_tuple(SO_PROTOCOL, IsEqual(NETLINK_ROUTE), + absl::StrFormat("NETLINK_ROUTE (%d)", NETLINK_ROUTE)), + std::make_tuple(SO_PASSCRED, IsEqual(0), "0"))); + +// Validates the reponses to RTM_GETLINK + NLM_F_DUMP. +void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) { + EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWLINK), Eq(NLMSG_DONE))); + + EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI) + << std::hex << hdr->nlmsg_flags; + + EXPECT_EQ(hdr->nlmsg_seq, seq); + EXPECT_EQ(hdr->nlmsg_pid, port); + + if (hdr->nlmsg_type != RTM_NEWLINK) { + return; + } + + // RTM_NEWLINK contains at least the header and ifinfomsg. + EXPECT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); + + // TODO(mpratt): Check ifinfomsg contents and following attrs. +} + +TEST(NetlinkRouteTest, GetLinkDump) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + // Loopback is common among all tests, check that it's found. + bool loopbackFound = false; + ASSERT_NO_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { + CheckGetLinkResponse(hdr, kSeq, port); + if (hdr->nlmsg_type != RTM_NEWLINK) { + return; + } + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); + const struct ifinfomsg* msg = + reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr)); + std::cout << "Found interface idx=" << msg->ifi_index + << ", type=" << std::hex << msg->ifi_type << std::endl; + if (msg->ifi_type == ARPHRD_LOOPBACK) { + loopbackFound = true; + EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0); + } + })); + EXPECT_TRUE(loopbackFound); +} + +// CheckLinkMsg checks a netlink message against an expected link. +void CheckLinkMsg(const struct nlmsghdr* hdr, const Link& link) { + ASSERT_THAT(hdr->nlmsg_type, Eq(RTM_NEWLINK)); + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); + const struct ifinfomsg* msg = + reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr)); + EXPECT_EQ(msg->ifi_index, link.index); + + const struct rtattr* rta = FindRtAttr(hdr, msg, IFLA_IFNAME); + EXPECT_NE(nullptr, rta) << "IFLA_IFNAME not found in message."; + if (rta != nullptr) { + std::string name(reinterpret_cast<const char*>(RTA_DATA(rta))); + EXPECT_EQ(name, link.name); + } +} + +TEST(NetlinkRouteTest, GetLinkByIndex) { + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.ifm.ifi_index = loopback_link.index; + + bool found = false; + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), + [&](const struct nlmsghdr* hdr) { + CheckLinkMsg(hdr, loopback_link); + found = true; + }, + false)); + EXPECT_TRUE(found) << "Netlink response does not contain any links."; +} + +TEST(NetlinkRouteTest, GetLinkByName) { + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + struct rtattr rtattr; + char ifname[IFNAMSIZ]; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.rtattr.rta_type = IFLA_IFNAME; + req.rtattr.rta_len = RTA_LENGTH(loopback_link.name.size() + 1); + strncpy(req.ifname, loopback_link.name.c_str(), sizeof(req.ifname)); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); + + bool found = false; + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), + [&](const struct nlmsghdr* hdr) { + CheckLinkMsg(hdr, loopback_link); + found = true; + }, + false)); + EXPECT_TRUE(found) << "Netlink response does not contain any links."; +} + +TEST(NetlinkRouteTest, GetLinkByIndexNotFound) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.ifm.ifi_index = 1234590; + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENODEV, ::testing::_)); +} + +TEST(NetlinkRouteTest, GetLinkByNameNotFound) { + const std::string name = "nodevice?!"; + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + struct rtattr rtattr; + char ifname[IFNAMSIZ]; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.rtattr.rta_type = IFLA_IFNAME; + req.rtattr.rta_len = RTA_LENGTH(name.size() + 1); + strncpy(req.ifname, name.c_str(), sizeof(req.ifname)); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENODEV, ::testing::_)); +} + +TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + // If type & 0x3 is equal to 0x2, this means a get request + // which doesn't require CAP_SYS_ADMIN. + req.hdr.nlmsg_type = ((__RTM_MAX + 1024) & (~0x3)) | 0x2; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(EOPNOTSUPP, ::testing::_)); +} + +TEST(NetlinkRouteTest, MsgHdrMsgTrunc) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + // No destination required; it defaults to pid 0, the kernel. + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + // Small enough to ensure that the response doesn't fit. + constexpr size_t kBufferSize = 10; + std::vector<char> buf(kBufferSize); + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + ASSERT_THAT(RetryEINTR(recvmsg)(fd.get(), &msg, 0), + SyscallSucceedsWithValue(kBufferSize)); + EXPECT_EQ((msg.msg_flags & MSG_TRUNC), MSG_TRUNC); +} + +TEST(NetlinkRouteTest, MsgTruncMsgHdrMsgTrunc) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + // No destination required; it defaults to pid 0, the kernel. + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + // Small enough to ensure that the response doesn't fit. + constexpr size_t kBufferSize = 10; + std::vector<char> buf(kBufferSize); + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + int res = 0; + ASSERT_THAT(res = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC), + SyscallSucceeds()); + EXPECT_GT(res, kBufferSize); + EXPECT_EQ((msg.msg_flags & MSG_TRUNC), MSG_TRUNC); +} + +TEST(NetlinkRouteTest, ControlMessageIgnored) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct request { + struct nlmsghdr control_hdr; + struct nlmsghdr message_hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + + // This control message is ignored. We still receive a response for the + // following RTM_GETLINK. + req.control_hdr.nlmsg_len = sizeof(req.control_hdr); + req.control_hdr.nlmsg_type = NLMSG_DONE; + req.control_hdr.nlmsg_seq = kSeq; + + req.message_hdr.nlmsg_len = sizeof(req.message_hdr) + sizeof(req.ifm); + req.message_hdr.nlmsg_type = RTM_GETLINK; + req.message_hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.message_hdr.nlmsg_seq = kSeq; + + req.ifm.ifi_family = AF_UNSPEC; + + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), + [&](const struct nlmsghdr* hdr) { + CheckGetLinkResponse(hdr, kSeq, port); + }, + false)); +} + +TEST(NetlinkRouteTest, GetAddrDump) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), + [&](const struct nlmsghdr* hdr) { + EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWADDR), Eq(NLMSG_DONE))); + + EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI) + << std::hex << hdr->nlmsg_flags; + + EXPECT_EQ(hdr->nlmsg_seq, kSeq); + EXPECT_EQ(hdr->nlmsg_pid, port); + + if (hdr->nlmsg_type != RTM_NEWADDR) { + return; + } + + // RTM_NEWADDR contains at least the header and ifaddrmsg. + EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct ifaddrmsg)); + + // TODO(mpratt): Check ifaddrmsg contents and following attrs. + }, + false)); +} + +TEST(NetlinkRouteTest, LookupAll) { + struct ifaddrs* if_addr_list = nullptr; + auto cleanup = Cleanup([&if_addr_list]() { freeifaddrs(if_addr_list); }); + + // Not a syscall but we can use the syscall matcher as glibc sets errno. + ASSERT_THAT(getifaddrs(&if_addr_list), SyscallSucceeds()); + + int count = 0; + for (struct ifaddrs* i = if_addr_list; i; i = i->ifa_next) { + if (!i->ifa_addr || (i->ifa_addr->sa_family != AF_INET && + i->ifa_addr->sa_family != AF_INET6)) { + continue; + } + count++; + } + ASSERT_GT(count, 0); +} + +TEST(NetlinkRouteTest, AddAddr) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifaddrmsg ifa; + struct rtattr rtattr; + struct in_addr addr; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_type = RTM_NEWADDR; + req.hdr.nlmsg_seq = kSeq; + req.ifa.ifa_family = AF_INET; + req.ifa.ifa_prefixlen = 24; + req.ifa.ifa_flags = 0; + req.ifa.ifa_scope = 0; + req.ifa.ifa_index = loopback_link.index; + req.rtattr.rta_type = IFA_LOCAL; + req.rtattr.rta_len = RTA_LENGTH(sizeof(req.addr)); + inet_pton(AF_INET, "10.0.0.1", &req.addr); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifa)) + NLMSG_ALIGN(req.rtattr.rta_len); + + // Create should succeed, as no such address in kernel. + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK; + EXPECT_NO_ERRNO( + NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len)); + + // Replace an existing address should succeed. + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_ACK; + req.hdr.nlmsg_seq++; + EXPECT_NO_ERRNO( + NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len)); + + // Create exclusive should fail, as we created the address above. + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK; + req.hdr.nlmsg_seq++; + EXPECT_THAT( + NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len), + PosixErrorIs(EEXIST, ::testing::_)); +} + +// GetRouteDump tests a RTM_GETROUTE + NLM_F_DUMP request. +TEST(NetlinkRouteTest, GetRouteDump) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct request { + struct nlmsghdr hdr; + struct rtmsg rtm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETROUTE; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rtm.rtm_family = AF_UNSPEC; + + bool routeFound = false; + bool dstFound = true; + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), + [&](const struct nlmsghdr* hdr) { + // Validate the reponse to RTM_GETROUTE + NLM_F_DUMP. + EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWROUTE), Eq(NLMSG_DONE))); + + EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI) + << std::hex << hdr->nlmsg_flags; + + EXPECT_EQ(hdr->nlmsg_seq, kSeq); + EXPECT_EQ(hdr->nlmsg_pid, port); + + // The test should not proceed if it's not a RTM_NEWROUTE message. + if (hdr->nlmsg_type != RTM_NEWROUTE) { + return; + } + + // RTM_NEWROUTE contains at least the header and rtmsg. + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct rtmsg))); + const struct rtmsg* msg = + reinterpret_cast<const struct rtmsg*>(NLMSG_DATA(hdr)); + // NOTE: rtmsg fields are char fields. + std::cout << "Found route table=" << static_cast<int>(msg->rtm_table) + << ", protocol=" << static_cast<int>(msg->rtm_protocol) + << ", scope=" << static_cast<int>(msg->rtm_scope) + << ", type=" << static_cast<int>(msg->rtm_type); + + int len = RTM_PAYLOAD(hdr); + bool rtDstFound = false; + for (struct rtattr* attr = RTM_RTA(msg); RTA_OK(attr, len); + attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == RTA_DST) { + char address[INET_ADDRSTRLEN] = {}; + inet_ntop(AF_INET, RTA_DATA(attr), address, sizeof(address)); + std::cout << ", dst=" << address; + rtDstFound = true; + } + } + + std::cout << std::endl; + + if (msg->rtm_table == RT_TABLE_MAIN) { + routeFound = true; + dstFound = rtDstFound && dstFound; + } + }, + false)); + // At least one route found in main route table. + EXPECT_TRUE(routeFound); + // Found RTA_DST for each route in main table. + EXPECT_TRUE(dstFound); +} + +// GetRouteRequest tests a RTM_GETROUTE request with RTM_F_LOOKUP_TABLE flag. +TEST(NetlinkRouteTest, GetRouteRequest) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct request { + struct nlmsghdr hdr; + struct rtmsg rtm; + struct nlattr nla; + struct in_addr sin_addr; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETROUTE; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + + req.rtm.rtm_family = AF_INET; + req.rtm.rtm_dst_len = 32; + req.rtm.rtm_src_len = 0; + req.rtm.rtm_tos = 0; + req.rtm.rtm_table = RT_TABLE_UNSPEC; + req.rtm.rtm_protocol = RTPROT_UNSPEC; + req.rtm.rtm_scope = RT_SCOPE_UNIVERSE; + req.rtm.rtm_type = RTN_UNSPEC; + req.rtm.rtm_flags = RTM_F_LOOKUP_TABLE; + + req.nla.nla_len = 8; + req.nla.nla_type = RTA_DST; + inet_aton("127.0.0.2", &req.sin_addr); + + bool rtDstFound = false; + ASSERT_NO_ERRNO(NetlinkRequestResponseSingle( + fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) { + // Validate the reponse to RTM_GETROUTE request with RTM_F_LOOKUP_TABLE + // flag. + EXPECT_THAT(hdr->nlmsg_type, RTM_NEWROUTE); + + EXPECT_TRUE(hdr->nlmsg_flags == 0) << std::hex << hdr->nlmsg_flags; + + EXPECT_EQ(hdr->nlmsg_seq, kSeq); + EXPECT_EQ(hdr->nlmsg_pid, port); + + // RTM_NEWROUTE contains at least the header and rtmsg. + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct rtmsg))); + const struct rtmsg* msg = + reinterpret_cast<const struct rtmsg*>(NLMSG_DATA(hdr)); + + // NOTE: rtmsg fields are char fields. + std::cout << "Found route table=" << static_cast<int>(msg->rtm_table) + << ", protocol=" << static_cast<int>(msg->rtm_protocol) + << ", scope=" << static_cast<int>(msg->rtm_scope) + << ", type=" << static_cast<int>(msg->rtm_type); + + EXPECT_EQ(msg->rtm_family, AF_INET); + EXPECT_EQ(msg->rtm_dst_len, 32); + EXPECT_TRUE((msg->rtm_flags & RTM_F_CLONED) == RTM_F_CLONED) + << std::hex << msg->rtm_flags; + + int len = RTM_PAYLOAD(hdr); + std::cout << ", len=" << len; + for (struct rtattr* attr = RTM_RTA(msg); RTA_OK(attr, len); + attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == RTA_DST) { + char address[INET_ADDRSTRLEN] = {}; + inet_ntop(AF_INET, RTA_DATA(attr), address, sizeof(address)); + std::cout << ", dst=" << address; + rtDstFound = true; + } else if (attr->rta_type == RTA_OIF) { + const char* oif = reinterpret_cast<const char*>(RTA_DATA(attr)); + std::cout << ", oif=" << oif; + } + } + + std::cout << std::endl; + })); + // Found RTA_DST for RTM_F_LOOKUP_TABLE. + EXPECT_TRUE(rtDstFound); +} + +// RecvmsgTrunc tests the recvmsg MSG_TRUNC flag with zero length output +// buffer. MSG_TRUNC with a zero length buffer should consume subsequent +// messages off the socket. +TEST(NetlinkRouteTest, RecvmsgTrunc) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + iov.iov_base = NULL; + iov.iov_len = 0; + + int trunclen, trunclen2; + + // Note: This test assumes at least two messages are returned by the + // RTM_GETADDR request. That means at least one RTM_NEWLINK message and one + // NLMSG_DONE message. We cannot read all the messages without blocking + // because we would need to read the message into a buffer and check the + // nlmsg_type for NLMSG_DONE. However, the test depends on reading into a + // zero-length buffer. + + // First, call recvmsg with MSG_TRUNC. This will read the full message from + // the socket and return it's full length. Subsequent calls to recvmsg will + // read the next messages from the socket. + ASSERT_THAT(trunclen = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC), + SyscallSucceeds()); + + // Message should always be truncated. However, While the destination iov is + // zero length, MSG_TRUNC returns the size of the next message so it should + // not be zero. + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + ASSERT_NE(trunclen, 0); + // Returned length is at least the header and ifaddrmsg. + EXPECT_GE(trunclen, sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)); + + // Reset the msg_flags to make sure that the recvmsg call is setting them + // properly. + msg.msg_flags = 0; + + // Make a second recvvmsg call to get the next message. + ASSERT_THAT(trunclen2 = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC), + SyscallSucceeds()); + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + ASSERT_NE(trunclen2, 0); + + // Assert that the received messages are not the same. + // + // We are calling recvmsg with a zero length buffer so we have no way to + // inspect the messages to make sure they are not equal in value. The best + // we can do is to compare their lengths. + ASSERT_NE(trunclen, trunclen2); +} + +// RecvmsgTruncPeek tests recvmsg with the combination of the MSG_TRUNC and +// MSG_PEEK flags and a zero length output buffer. This is normally used to +// read the full length of the next message on the socket without consuming +// it, so a properly sized buffer can be allocated to store the message. This +// test tests that scenario. +TEST(NetlinkRouteTest, RecvmsgTruncPeek) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + int type = -1; + do { + int peeklen; + int len; + + iov.iov_base = NULL; + iov.iov_len = 0; + + // Call recvmsg with MSG_PEEK and MSG_TRUNC. This will peek at the message + // and return it's full length. + // See: MSG_TRUNC http://man7.org/linux/man-pages/man2/recv.2.html + ASSERT_THAT( + peeklen = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_PEEK | MSG_TRUNC), + SyscallSucceeds()); + + // Message should always be truncated. + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + ASSERT_NE(peeklen, 0); + + // Reset the message flags for the next call. + msg.msg_flags = 0; + + // Make the actual call to recvmsg to get the actual data. We will use + // the length returned from the peek call for the allocated buffer size.. + std::vector<char> buf(peeklen); + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + ASSERT_THAT(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0), + SyscallSucceeds()); + + // Message should not be truncated since we allocated the correct buffer + // size. + EXPECT_NE(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + + // MSG_PEEK should have left data on the socket and the subsequent call + // with should have retrieved the same data. Both calls should have + // returned the message's full length so they should be equal. + ASSERT_NE(len, 0); + ASSERT_EQ(peeklen, len); + + for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data()); + NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) { + type = hdr->nlmsg_type; + } + } while (type != NLMSG_DONE && type != NLMSG_ERROR); +} + +// No SCM_CREDENTIALS are received without SO_PASSCRED set. +TEST(NetlinkRouteTest, NoPasscredNoCreds) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + ASSERT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOff, + sizeof(kSockOptOff)), + SyscallSucceeds()); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + iov.iov_base = NULL; + iov.iov_len = 0; + + char control[CMSG_SPACE(sizeof(struct ucred))] = {}; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + // Note: This test assumes at least one message is returned by the + // RTM_GETADDR request. + ASSERT_THAT(RetryEINTR(recvmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + // No control messages. + EXPECT_EQ(CMSG_FIRSTHDR(&msg), nullptr); +} + +// SCM_CREDENTIALS are received with SO_PASSCRED set. +TEST(NetlinkRouteTest, PasscredCreds) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + ASSERT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + iov.iov_base = NULL; + iov.iov_len = 0; + + char control[CMSG_SPACE(sizeof(struct ucred))] = {}; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + // Note: This test assumes at least one message is returned by the + // RTM_GETADDR request. + ASSERT_THAT(RetryEINTR(recvmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + struct ucred creds; + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(creds))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); + + memcpy(&creds, CMSG_DATA(cmsg), sizeof(creds)); + + // The peer is the kernel, which is "PID" 0. + EXPECT_EQ(creds.pid, 0); + // The kernel identifies as root. Also allow nobody in case this test is + // running in a userns without root mapped. + EXPECT_THAT(creds.uid, AnyOf(Eq(0), Eq(65534))); + EXPECT_THAT(creds.gid, AnyOf(Eq(0), Eq(65534))); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_route_util.cc b/test/syscalls/linux/socket_netlink_route_util.cc new file mode 100644 index 000000000..bde1dbb4d --- /dev/null +++ b/test/syscalls/linux/socket_netlink_route_util.cc @@ -0,0 +1,162 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_netlink_route_util.h" + +#include <linux/if.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include "test/syscalls/linux/socket_netlink_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr uint32_t kSeq = 12345; + +} // namespace + +PosixError DumpLinks( + const FileDescriptor& fd, uint32_t seq, + const std::function<void(const struct nlmsghdr* hdr)>& fn) { + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = seq; + req.ifm.ifi_family = AF_UNSPEC; + + return NetlinkRequestResponse(fd, &req, sizeof(req), fn, false); +} + +PosixErrorOr<std::vector<Link>> DumpLinks() { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + std::vector<Link> links; + RETURN_IF_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { + if (hdr->nlmsg_type != RTM_NEWLINK || + hdr->nlmsg_len < NLMSG_SPACE(sizeof(struct ifinfomsg))) { + return; + } + const struct ifinfomsg* msg = + reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr)); + const auto* rta = FindRtAttr(hdr, msg, IFLA_IFNAME); + if (rta == nullptr) { + // Ignore links that do not have a name. + return; + } + + links.emplace_back(); + links.back().index = msg->ifi_index; + links.back().type = msg->ifi_type; + links.back().name = + std::string(reinterpret_cast<const char*>(RTA_DATA(rta))); + })); + return links; +} + +PosixErrorOr<Link> LoopbackLink() { + ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); + for (const auto& link : links) { + if (link.type == ARPHRD_LOOPBACK) { + return link; + } + } + return PosixError(ENOENT, "loopback link not found"); +} + +PosixError LinkAddLocalAddr(int index, int family, int prefixlen, + const void* addr, int addrlen) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifaddrmsg ifaddr; + char attrbuf[512]; + }; + + struct request req = {}; + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifaddr)); + req.hdr.nlmsg_type = RTM_NEWADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = kSeq; + req.ifaddr.ifa_index = index; + req.ifaddr.ifa_family = family; + req.ifaddr.ifa_prefixlen = prefixlen; + + struct rtattr* rta = reinterpret_cast<struct rtattr*>( + reinterpret_cast<int8_t*>(&req) + NLMSG_ALIGN(req.hdr.nlmsg_len)); + rta->rta_type = IFA_LOCAL; + rta->rta_len = RTA_LENGTH(addrlen); + req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + RTA_LENGTH(addrlen); + memcpy(RTA_DATA(rta), addr, addrlen); + + return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len); +} + +PosixError LinkChangeFlags(int index, unsigned int flags, unsigned int change) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifinfo; + char pad[NLMSG_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo)); + req.hdr.nlmsg_type = RTM_NEWLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = kSeq; + req.ifinfo.ifi_index = index; + req.ifinfo.ifi_flags = flags; + req.ifinfo.ifi_change = change; + + return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len); +} + +PosixError LinkSetMacAddr(int index, const void* addr, int addrlen) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifinfo; + char attrbuf[512]; + }; + + struct request req = {}; + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo)); + req.hdr.nlmsg_type = RTM_NEWLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = kSeq; + req.ifinfo.ifi_index = index; + + struct rtattr* rta = reinterpret_cast<struct rtattr*>( + reinterpret_cast<int8_t*>(&req) + NLMSG_ALIGN(req.hdr.nlmsg_len)); + rta->rta_type = IFLA_ADDRESS; + rta->rta_len = RTA_LENGTH(addrlen); + req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + RTA_LENGTH(addrlen); + memcpy(RTA_DATA(rta), addr, addrlen); + + return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_route_util.h b/test/syscalls/linux/socket_netlink_route_util.h new file mode 100644 index 000000000..149c4a7f6 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_route_util.h @@ -0,0 +1,55 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_ + +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include <vector> + +#include "test/syscalls/linux/socket_netlink_util.h" + +namespace gvisor { +namespace testing { + +struct Link { + int index; + int16_t type; + std::string name; +}; + +PosixError DumpLinks(const FileDescriptor& fd, uint32_t seq, + const std::function<void(const struct nlmsghdr* hdr)>& fn); + +PosixErrorOr<std::vector<Link>> DumpLinks(); + +// Returns the loopback link on the system. ENOENT if not found. +PosixErrorOr<Link> LoopbackLink(); + +// LinkAddLocalAddr sets IFA_LOCAL attribute on the interface. +PosixError LinkAddLocalAddr(int index, int family, int prefixlen, + const void* addr, int addrlen); + +// LinkChangeFlags changes interface flags. E.g. IFF_UP. +PosixError LinkChangeFlags(int index, unsigned int flags, unsigned int change); + +// LinkSetMacAddr sets IFLA_ADDRESS attribute of the interface. +PosixError LinkSetMacAddr(int index, const void* addr, int addrlen); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_ diff --git a/test/syscalls/linux/socket_netlink_uevent.cc b/test/syscalls/linux/socket_netlink_uevent.cc new file mode 100644 index 000000000..da425bed4 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_uevent.cc @@ -0,0 +1,83 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <linux/filter.h> +#include <linux/netlink.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_netlink_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Tests for NETLINK_KOBJECT_UEVENT sockets. +// +// gVisor never sends any messages on these sockets, so we don't test the events +// themselves. + +namespace gvisor { +namespace testing { + +namespace { + +// SO_PASSCRED can be enabled. Since no messages are sent in gVisor, we don't +// actually test receiving credentials. +TEST(NetlinkUeventTest, PassCred) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_KOBJECT_UEVENT)); + + EXPECT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); +} + +// SO_DETACH_FILTER fails without a filter already installed. +TEST(NetlinkUeventTest, DetachNoFilter) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_KOBJECT_UEVENT)); + + int opt; + EXPECT_THAT( + setsockopt(fd.get(), SOL_SOCKET, SO_DETACH_FILTER, &opt, sizeof(opt)), + SyscallFailsWithErrno(ENOENT)); +} + +// We can attach a BPF filter. +TEST(NetlinkUeventTest, AttachFilter) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_KOBJECT_UEVENT)); + + // Minimal BPF program: a single ret. + struct sock_filter filter = {0x6, 0, 0, 0}; + struct sock_fprog prog = {}; + prog.len = 1; + prog.filter = &filter; + + EXPECT_THAT( + setsockopt(fd.get(), SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)), + SyscallSucceeds()); + + int opt; + EXPECT_THAT( + setsockopt(fd.get(), SOL_SOCKET, SO_DETACH_FILTER, &opt, sizeof(opt)), + SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc new file mode 100644 index 000000000..952eecfe8 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -0,0 +1,187 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_netlink_util.h" + +#include <linux/if_arp.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <sys/socket.h> + +#include <vector> + +#include "absl/strings/str_cat.h" +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +PosixErrorOr<FileDescriptor> NetlinkBoundSocket(int protocol) { + FileDescriptor fd; + ASSIGN_OR_RETURN_ERRNO(fd, Socket(AF_NETLINK, SOCK_RAW, protocol)); + + struct sockaddr_nl addr = {}; + addr.nl_family = AF_NETLINK; + + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + MaybeSave(); + + return std::move(fd); +} + +PosixErrorOr<uint32_t> NetlinkPortID(int fd) { + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + + RETURN_ERROR_IF_SYSCALL_FAIL( + getsockname(fd, reinterpret_cast<struct sockaddr*>(&addr), &addrlen)); + MaybeSave(); + + return static_cast<uint32_t>(addr.nl_pid); +} + +PosixError NetlinkRequestResponse( + const FileDescriptor& fd, void* request, size_t len, + const std::function<void(const struct nlmsghdr* hdr)>& fn, + bool expect_nlmsgerr) { + struct iovec iov = {}; + iov.iov_base = request; + iov.iov_len = len; + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + // No destination required; it defaults to pid 0, the kernel. + + RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(sendmsg)(fd.get(), &msg, 0)); + + constexpr size_t kBufferSize = 4096; + std::vector<char> buf(kBufferSize); + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + // If NLM_F_MULTI is set, response is a series of messages that ends with a + // NLMSG_DONE message. + int type = -1; + int flags = 0; + do { + int len; + RETURN_ERROR_IF_SYSCALL_FAIL(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0)); + + // We don't bother with the complexity of dealing with truncated messages. + // We must allocate a large enough buffer up front. + if ((msg.msg_flags & MSG_TRUNC) == MSG_TRUNC) { + return PosixError(EIO, + absl::StrCat("Received truncated message with flags: ", + msg.msg_flags)); + } + + for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data()); + NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) { + fn(hdr); + flags = hdr->nlmsg_flags; + type = hdr->nlmsg_type; + // Done should include an integer payload for dump_done_errno. + // See net/netlink/af_netlink.c:netlink_dump + // Some tools like the 'ip' tool check the minimum length of the + // NLMSG_DONE message. + if (type == NLMSG_DONE) { + EXPECT_GE(hdr->nlmsg_len, NLMSG_LENGTH(sizeof(int))); + } + } + } while ((flags & NLM_F_MULTI) && type != NLMSG_DONE && type != NLMSG_ERROR); + + if (expect_nlmsgerr) { + EXPECT_EQ(type, NLMSG_ERROR); + } else if (flags & NLM_F_MULTI) { + EXPECT_EQ(type, NLMSG_DONE); + } + return NoError(); +} + +PosixError NetlinkRequestResponseSingle( + const FileDescriptor& fd, void* request, size_t len, + const std::function<void(const struct nlmsghdr* hdr)>& fn) { + struct iovec iov = {}; + iov.iov_base = request; + iov.iov_len = len; + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + // No destination required; it defaults to pid 0, the kernel. + + RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(sendmsg)(fd.get(), &msg, 0)); + + constexpr size_t kBufferSize = 4096; + std::vector<char> buf(kBufferSize); + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + int ret; + RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(recvmsg)(fd.get(), &msg, 0)); + + // We don't bother with the complexity of dealing with truncated messages. + // We must allocate a large enough buffer up front. + if ((msg.msg_flags & MSG_TRUNC) == MSG_TRUNC) { + return PosixError( + EIO, + absl::StrCat("Received truncated message with flags: ", msg.msg_flags)); + } + + for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data()); + NLMSG_OK(hdr, ret); hdr = NLMSG_NEXT(hdr, ret)) { + fn(hdr); + } + + return NoError(); +} + +PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq, + void* request, size_t len) { + // Dummy negative number for no error message received. + // We won't get a negative error number so there will be no confusion. + int err = -42; + RETURN_IF_ERRNO(NetlinkRequestResponse( + fd, request, len, + [&](const struct nlmsghdr* hdr) { + EXPECT_EQ(NLMSG_ERROR, hdr->nlmsg_type); + EXPECT_EQ(hdr->nlmsg_seq, seq); + EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct nlmsgerr)); + + const struct nlmsgerr* msg = + reinterpret_cast<const struct nlmsgerr*>(NLMSG_DATA(hdr)); + err = -msg->error; + }, + true)); + return PosixError(err); +} + +const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr, + const struct ifinfomsg* msg, int16_t attr) { + const int ifi_space = NLMSG_SPACE(sizeof(*msg)); + int attrlen = hdr->nlmsg_len - ifi_space; + const struct rtattr* rta = reinterpret_cast<const struct rtattr*>( + reinterpret_cast<const uint8_t*>(hdr) + NLMSG_ALIGN(ifi_space)); + for (; RTA_OK(rta, attrlen); rta = RTA_NEXT(rta, attrlen)) { + if (rta->rta_type == attr) { + return rta; + } + } + return nullptr; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h new file mode 100644 index 000000000..e13ead406 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_util.h @@ -0,0 +1,62 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_ + +#include <sys/socket.h> +// socket.h has to be included before if_arp.h. +#include <linux/if_arp.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" + +namespace gvisor { +namespace testing { + +// Returns a bound netlink socket. +PosixErrorOr<FileDescriptor> NetlinkBoundSocket(int protocol); + +// Returns the port ID of the passed socket. +PosixErrorOr<uint32_t> NetlinkPortID(int fd); + +// Send the passed request and call fn on all response netlink messages. +// +// To be used on requests with NLM_F_MULTI reponses. +PosixError NetlinkRequestResponse( + const FileDescriptor& fd, void* request, size_t len, + const std::function<void(const struct nlmsghdr* hdr)>& fn, + bool expect_nlmsgerr); + +// Send the passed request and call fn on all response netlink messages. +// +// To be used on requests without NLM_F_MULTI reponses. +PosixError NetlinkRequestResponseSingle( + const FileDescriptor& fd, void* request, size_t len, + const std::function<void(const struct nlmsghdr* hdr)>& fn); + +// Send the passed request then expect and return an ack or error. +PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq, + void* request, size_t len); + +// Find rtnetlink attribute in message. +const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr, + const struct ifinfomsg* msg, int16_t attr); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_ diff --git a/test/syscalls/linux/socket_non_blocking.cc b/test/syscalls/linux/socket_non_blocking.cc new file mode 100644 index 000000000..c3520cadd --- /dev/null +++ b/test/syscalls/linux/socket_non_blocking.cc @@ -0,0 +1,62 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_non_blocking.h" + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +TEST_P(NonBlockingSocketPairTest, ReadNothingAvailable) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[20] = {}; + ASSERT_THAT(ReadFd(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(NonBlockingSocketPairTest, RecvNothingAvailable) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[20] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(NonBlockingSocketPairTest, RecvMsgNothingAvailable) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct iovec iov; + char buf[20] = {}; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EAGAIN)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_non_blocking.h b/test/syscalls/linux/socket_non_blocking.h new file mode 100644 index 000000000..bd3e02fd2 --- /dev/null +++ b/test/syscalls/linux/socket_non_blocking.h @@ -0,0 +1,29 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected non-blocking sockets. +using NonBlockingSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_ diff --git a/test/syscalls/linux/socket_non_stream.cc b/test/syscalls/linux/socket_non_stream.cc new file mode 100644 index 000000000..c61817f14 --- /dev/null +++ b/test/syscalls/linux/socket_non_stream.cc @@ -0,0 +1,337 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_non_stream.h" + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(NonStreamSocketPairTest, SendMsgTooLarge) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int sndbuf; + socklen_t length = sizeof(sndbuf); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length), + SyscallSucceeds()); + + // Make the call too large to fit in the send buffer. + const int buffer_size = 3 * sndbuf; + + EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, false /* reader */), + SyscallFailsWithErrno(EMSGSIZE)); +} + +// Stream sockets allow data sent with a single (e.g. write, sendmsg) syscall +// to be read in pieces with multiple (e.g. read, recvmsg) syscalls. +// +// SplitRecv checks that control messages can only be read on the first (e.g. +// read, recvmsg) syscall, even if it doesn't provide space for the control +// message. +TEST_P(NonStreamSocketPairTest, SplitRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data) / 2]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +// Stream sockets allow data sent with multiple sends to be read in a single +// recv. Datagram sockets do not. +// +// SingleRecv checks that only a single message is readable in a single recv. +TEST_P(NonStreamSocketPairTest, SingleRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0), + SyscallSucceedsWithValue(sizeof(sent_data2))); + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); +} + +TEST_P(NonStreamSocketPairTest, RecvmsgMsghdrFlagMsgTrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data) / 2] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(received_data))); + + // Check that msghdr flags were updated. + EXPECT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); +} + +// Stream sockets allow data sent with multiple sends to be peeked at in a +// single recv. Datagram sockets (except for unix sockets) do not. +// +// SinglePeek checks that only a single message is peekable in a single recv. +TEST_P(NonStreamSocketPairTest, SinglePeek) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0), + SyscallSucceedsWithValue(sizeof(sent_data2))); + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + for (int i = 0; i < 3; i++) { + memset(received_data, 0, sizeof(received_data)); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_PEEK), + SyscallSucceedsWithValue(sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + } + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(sent_data1), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(sent_data2), 0), + SyscallSucceedsWithValue(sizeof(sent_data2))); + EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2))); +} + +TEST_P(NonStreamSocketPairTest, MsgTruncTruncation) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data) / 2, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + + // Check that we didn't get any extra data. + EXPECT_NE(0, memcmp(sent_data + sizeof(sent_data) / 2, + received_data + sizeof(received_data) / 2, + sizeof(sent_data) / 2)); +} + +TEST_P(NonStreamSocketPairTest, MsgTruncTruncationRecvmsgMsghdrFlagMsgTrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data) / 2] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(received_data))); + + // Check that msghdr flags were updated. + EXPECT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); +} + +TEST_P(NonStreamSocketPairTest, MsgTruncSameSize) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(NonStreamSocketPairTest, MsgTruncNotFull) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[2 * sizeof(sent_data)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +// This test tests reading from a socket with MSG_TRUNC and a zero length +// receive buffer. The user should be able to get the message length. +TEST_P(NonStreamSocketPairTest, RecvmsgMsgTruncZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // The receive buffer is of zero length. + char received_data[0] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + // The syscall succeeds returning the full size of the message on the socket. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // Check that MSG_TRUNC is set on msghdr flags. + EXPECT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); +} + +// This test tests reading from a socket with MSG_TRUNC | MSG_PEEK and a zero +// length receive buffer. The user should be able to get the message length +// without reading data off the socket. +TEST_P(NonStreamSocketPairTest, RecvmsgMsgTruncMsgPeekZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // The receive buffer is of zero length. + char peek_data[0] = {}; + + struct iovec peek_iov; + peek_iov.iov_base = peek_data; + peek_iov.iov_len = sizeof(peek_data); + struct msghdr peek_msg = {}; + peek_msg.msg_flags = -1; + peek_msg.msg_iov = &peek_iov; + peek_msg.msg_iovlen = 1; + + // The syscall succeeds returning the full size of the message on the socket. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &peek_msg, + MSG_TRUNC | MSG_PEEK), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // Check that MSG_TRUNC is set on msghdr flags because the receive buffer is + // smaller than the message size. + EXPECT_EQ(peek_msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + + char received_data[sizeof(sent_data)] = {}; + + struct iovec received_iov; + received_iov.iov_base = received_data; + received_iov.iov_len = sizeof(received_data); + struct msghdr received_msg = {}; + received_msg.msg_flags = -1; + received_msg.msg_iov = &received_iov; + received_msg.msg_iovlen = 1; + + // Next we can read the actual data. + ASSERT_THAT( + RetryEINTR(recvmsg)(sockets->second_fd(), &received_msg, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + // Check that MSG_TRUNC is not set on msghdr flags because we read the whole + // message. + EXPECT_EQ(received_msg.msg_flags & MSG_TRUNC, 0); +} + +// This test tests reading from a socket with MSG_TRUNC | MSG_PEEK and a zero +// length receive buffer and MSG_DONTWAIT. The user should be able to get an +// EAGAIN or EWOULDBLOCK error response. +TEST_P(NonStreamSocketPairTest, RecvmsgTruncPeekDontwaitZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // NOTE: We don't send any data on the socket. + + // The receive buffer is of zero length. + char peek_data[0] = {}; + + struct iovec peek_iov; + peek_iov.iov_base = peek_data; + peek_iov.iov_len = sizeof(peek_data); + struct msghdr peek_msg = {}; + peek_msg.msg_flags = -1; + peek_msg.msg_iov = &peek_iov; + peek_msg.msg_iovlen = 1; + + // recvmsg fails with EAGAIN because no data is available on the socket. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &peek_msg, + MSG_TRUNC | MSG_PEEK | MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_non_stream.h b/test/syscalls/linux/socket_non_stream.h new file mode 100644 index 000000000..469fbe6a2 --- /dev/null +++ b/test/syscalls/linux/socket_non_stream.h @@ -0,0 +1,29 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected non-stream sockets. +using NonStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_ diff --git a/test/syscalls/linux/socket_non_stream_blocking.cc b/test/syscalls/linux/socket_non_stream_blocking.cc new file mode 100644 index 000000000..b052f6e61 --- /dev/null +++ b/test/syscalls/linux/socket_non_stream_blocking.cc @@ -0,0 +1,85 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_non_stream_blocking.h" + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(BlockingNonStreamSocketPairTest, RecvLessThanBufferWaitAll) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data) * 2] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_WAITALL), + SyscallSucceedsWithValue(sizeof(sent_data))); +} + +// This test tests reading from a socket with MSG_TRUNC | MSG_PEEK and a zero +// length receive buffer and MSG_DONTWAIT. The recvmsg call should block on +// reading the data. +TEST_P(BlockingNonStreamSocketPairTest, + RecvmsgTruncPeekDontwaitZeroLenBlocking) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // NOTE: We don't initially send any data on the socket. + const int data_size = 10; + char sent_data[data_size]; + RandomizeBuffer(sent_data, data_size); + + // The receive buffer is of zero length. + char peek_data[0] = {}; + + struct iovec peek_iov; + peek_iov.iov_base = peek_data; + peek_iov.iov_len = sizeof(peek_data); + struct msghdr peek_msg = {}; + peek_msg.msg_flags = -1; + peek_msg.msg_iov = &peek_iov; + peek_msg.msg_iovlen = 1; + + ScopedThread t([&]() { + // The syscall succeeds returning the full size of the message on the + // socket. This should block until there is data on the socket. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &peek_msg, + MSG_TRUNC | MSG_PEEK), + SyscallSucceedsWithValue(data_size)); + }); + + absl::SleepFor(absl::Seconds(1)); + ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), sent_data, data_size, 0), + SyscallSucceedsWithValue(data_size)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_non_stream_blocking.h b/test/syscalls/linux/socket_non_stream_blocking.h new file mode 100644 index 000000000..6e205a039 --- /dev/null +++ b/test/syscalls/linux/socket_non_stream_blocking.h @@ -0,0 +1,30 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of blocking connected non-stream +// sockets. +using BlockingNonStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_ diff --git a/test/syscalls/linux/socket_stream.cc b/test/syscalls/linux/socket_stream.cc new file mode 100644 index 000000000..6522b2e01 --- /dev/null +++ b/test/syscalls/linux/socket_stream.cc @@ -0,0 +1,178 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_stream.h" + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(StreamSocketPairTest, SplitRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data) / 2]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data + sizeof(received_data), received_data, + sizeof(received_data))); +} + +// Stream sockets allow data sent with multiple sends to be read in a single +// recv. +// +// CoalescedRecv checks that multiple messages are readable in a single recv. +TEST_P(StreamSocketPairTest, CoalescedRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0), + SyscallSucceedsWithValue(sizeof(sent_data2))); + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); +} + +TEST_P(StreamSocketPairTest, WriteOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + const char str[] = "abc"; + ASSERT_THAT(write(sockets->second_fd(), str, 3), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(StreamSocketPairTest, RecvmsgMsghdrFlagsNoMsgTrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data) / 2] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(received_data))); + + // Check that msghdr flags were cleared (MSG_TRUNC was not set). + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, 0); +} + +TEST_P(StreamSocketPairTest, RecvmsgTruncZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[0] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC), + SyscallSucceedsWithValue(0)); + + // Check that msghdr flags were cleared (MSG_TRUNC was not set). + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, 0); +} + +TEST_P(StreamSocketPairTest, RecvmsgTruncPeekZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[0] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT( + RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC | MSG_PEEK), + SyscallSucceedsWithValue(0)); + + // Check that msghdr flags were cleared (MSG_TRUNC was not set). + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, 0); +} + +TEST_P(StreamSocketPairTest, MsgTrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data) / 2, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_stream.h b/test/syscalls/linux/socket_stream.h new file mode 100644 index 000000000..b837b8f8c --- /dev/null +++ b/test/syscalls/linux/socket_stream.h @@ -0,0 +1,30 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of blocking and non-blocking +// connected stream sockets. +using StreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_ diff --git a/test/syscalls/linux/socket_stream_blocking.cc b/test/syscalls/linux/socket_stream_blocking.cc new file mode 100644 index 000000000..538ee2268 --- /dev/null +++ b/test/syscalls/linux/socket_stream_blocking.cc @@ -0,0 +1,163 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_stream_blocking.h" + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(BlockingStreamSocketPairTest, BlockPartialWriteClosed) { + // FIXME(b/35921550): gVisor doesn't support SO_SNDBUF on UDS, nor does it + // enforce any limit; it will write arbitrary amounts of data without + // blocking. + SKIP_IF(IsRunningOnGvisor()); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int buffer_size; + socklen_t length = sizeof(buffer_size); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &buffer_size, &length), + SyscallSucceeds()); + + int wfd = sockets->first_fd(); + ScopedThread t([wfd, buffer_size]() { + std::vector<char> buf(2 * buffer_size); + // Write more than fits in the buffer. Blocks then returns partial write + // when the other end is closed. The next call returns EPIPE. + // + // N.B. writes occur in chunks, so we may see less than buffer_size from + // the first call. + ASSERT_THAT(write(wfd, buf.data(), buf.size()), + SyscallSucceedsWithValue(::testing::Gt(0))); + ASSERT_THAT(write(wfd, buf.data(), buf.size()), + ::testing::AnyOf(SyscallFailsWithErrno(EPIPE), + SyscallFailsWithErrno(ECONNRESET))); + }); + + // Leave time for write to become blocked. + absl::SleepFor(absl::Seconds(1)); + + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); +} + +// Random save may interrupt the call to sendmsg() in SendLargeSendMsg(), +// causing the write to be incomplete and the test to hang. +TEST_P(BlockingStreamSocketPairTest, SendMsgTooLarge_NoRandomSave) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int sndbuf; + socklen_t length = sizeof(sndbuf); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length), + SyscallSucceeds()); + + // Make the call too large to fit in the send buffer. + const int buffer_size = 3 * sndbuf; + + EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, true /* reader */), + SyscallSucceedsWithValue(buffer_size)); +} + +TEST_P(BlockingStreamSocketPairTest, RecvLessThanBuffer) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[200] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); +} + +// Test that MSG_WAITALL causes recv to block until all requested data is +// received. Random save can interrupt blocking and cause received data to be +// returned, even if the amount received is less than the full requested amount. +TEST_P(BlockingStreamSocketPairTest, RecvLessThanBufferWaitAll_NoRandomSave) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + constexpr auto kDuration = absl::Milliseconds(200); + auto before = Now(CLOCK_MONOTONIC); + + const ScopedThread t([&]() { + absl::SleepFor(kDuration); + + // Don't let saving after the write interrupt the blocking recv. + const DisableSave ds; + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + }); + + char received_data[sizeof(sent_data) * 2] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_WAITALL), + SyscallSucceedsWithValue(sizeof(received_data))); + + auto after = Now(CLOCK_MONOTONIC); + EXPECT_GE(after - before, kDuration); +} + +TEST_P(BlockingStreamSocketPairTest, SendTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + std::vector<char> buf(kPageSize); + // We don't know how much data the socketpair will buffer, so we may do an + // arbitrarily large number of writes; saving after each write causes this + // test's time to explode. + const DisableSave ds; + for (;;) { + int ret; + ASSERT_THAT( + ret = RetryEINTR(send)(sockets->first_fd(), buf.data(), buf.size(), 0), + ::testing::AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(EAGAIN))); + if (ret == -1) { + break; + } + } +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_stream_blocking.h b/test/syscalls/linux/socket_stream_blocking.h new file mode 100644 index 000000000..9fd19ff90 --- /dev/null +++ b/test/syscalls/linux/socket_stream_blocking.h @@ -0,0 +1,30 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of blocking connected stream +// sockets. +using BlockingStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_ diff --git a/test/syscalls/linux/socket_stream_nonblock.cc b/test/syscalls/linux/socket_stream_nonblock.cc new file mode 100644 index 000000000..74d608741 --- /dev/null +++ b/test/syscalls/linux/socket_stream_nonblock.cc @@ -0,0 +1,49 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_stream_nonblock.h" + +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +using ::testing::Le; + +TEST_P(NonBlockingStreamSocketPairTest, SendMsgTooLarge) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int sndbuf; + socklen_t length = sizeof(sndbuf); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length), + SyscallSucceeds()); + + // Make the call too large to fit in the send buffer. + const int buffer_size = 3 * sndbuf; + + EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, false /* reader */), + SyscallSucceedsWithValue(Le(buffer_size))); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_stream_nonblock.h b/test/syscalls/linux/socket_stream_nonblock.h new file mode 100644 index 000000000..c3b7fad91 --- /dev/null +++ b/test/syscalls/linux/socket_stream_nonblock.h @@ -0,0 +1,30 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of non-blocking connected stream +// sockets. +using NonBlockingStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_ diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc new file mode 100644 index 000000000..53b678e94 --- /dev/null +++ b/test/syscalls/linux/socket_test_util.cc @@ -0,0 +1,907 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_test_util.h" + +#include <arpa/inet.h> +#include <poll.h> +#include <sys/socket.h> + +#include <memory> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/time/clock.h" +#include "absl/types/optional.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +Creator<SocketPair> SyscallSocketPairCreator(int domain, int type, + int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { + int pair[2]; + RETURN_ERROR_IF_SYSCALL_FAIL(socketpair(domain, type, protocol, pair)); + MaybeSave(); // Save on successful creation. + return absl::make_unique<FDSocketPair>(pair[0], pair[1]); + }; +} + +Creator<FileDescriptor> SyscallSocketCreator(int domain, int type, + int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> { + int fd = 0; + RETURN_ERROR_IF_SYSCALL_FAIL(fd = socket(domain, type, protocol)); + MaybeSave(); // Save on successful creation. + return absl::make_unique<FileDescriptor>(fd); + }; +} + +PosixErrorOr<struct sockaddr_un> UniqueUnixAddr(bool abstract, int domain) { + struct sockaddr_un addr = {}; + std::string path = NewTempAbsPathInDir("/tmp"); + if (path.size() >= sizeof(addr.sun_path)) { + return PosixError(EINVAL, + "Unable to generate a temp path of appropriate length"); + } + + if (abstract) { + // Indicate that the path is in the abstract namespace. + path[0] = 0; + } + memcpy(addr.sun_path, path.c_str(), path.length()); + addr.sun_family = domain; + return addr; +} + +Creator<SocketPair> AcceptBindSocketPairCreator(bool abstract, int domain, + int type, int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un bind_addr, + UniqueUnixAddr(abstract, domain)); + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un extra_addr, + UniqueUnixAddr(abstract, domain)); + + int bound; + RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(bound, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr))); + MaybeSave(); // Successful bind. + RETURN_ERROR_IF_SYSCALL_FAIL(listen(bound, /* backlog = */ 5)); + MaybeSave(); // Successful listen. + + int connected; + RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL( + connect(connected, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr))); + MaybeSave(); // Successful connect. + + int accepted; + RETURN_ERROR_IF_SYSCALL_FAIL( + accepted = accept4(bound, nullptr, nullptr, + type & (SOCK_NONBLOCK | SOCK_CLOEXEC))); + MaybeSave(); // Successful connect. + + // Cleanup no longer needed resources. + RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); + MaybeSave(); // Dropped original socket. + + // Only unlink if path is not in abstract namespace. + if (bind_addr.sun_path[0] != 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(unlink(bind_addr.sun_path)); + MaybeSave(); // Unlinked path. + } + + // accepted is before connected to destruct connected before accepted. + // Destructors for nonstatic member objects are called in the reverse order + // in which they appear in the class declaration. + return absl::make_unique<AddrFDSocketPair>(accepted, connected, bind_addr, + extra_addr); + }; +} + +Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type, + int protocol) { + return AcceptBindSocketPairCreator(/* abstract= */ false, domain, type, + protocol); +} + +Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type, + int protocol) { + return AcceptBindSocketPairCreator(/* abstract= */ true, domain, type, + protocol); +} + +Creator<SocketPair> BidirectionalBindSocketPairCreator(bool abstract, + int domain, int type, + int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1, + UniqueUnixAddr(abstract, domain)); + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2, + UniqueUnixAddr(abstract, domain)); + + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(sock1, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1))); + MaybeSave(); // Successful bind. + + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(sock2, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2))); + MaybeSave(); // Successful bind. + + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock1, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2))); + MaybeSave(); // Successful connect. + + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock2, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1))); + MaybeSave(); // Successful connect. + + // Cleanup no longer needed resources. + + // Only unlink if path is not in abstract namespace. + if (addr1.sun_path[0] != 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr1.sun_path)); + MaybeSave(); // Successful unlink. + } + + // Only unlink if path is not in abstract namespace. + if (addr2.sun_path[0] != 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr2.sun_path)); + MaybeSave(); // Successful unlink. + } + + return absl::make_unique<FDSocketPair>(sock1, sock2); + }; +} + +Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain, + int type, + int protocol) { + return BidirectionalBindSocketPairCreator(/* abstract= */ false, domain, type, + protocol); +} + +Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain, + int type, + int protocol) { + return BidirectionalBindSocketPairCreator(/* abstract= */ true, domain, type, + protocol); +} + +Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type, + int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { + struct sockaddr_un addr = {}; + constexpr char kSocketGoferPath[] = "/socket"; + memcpy(addr.sun_path, kSocketGoferPath, sizeof(kSocketGoferPath)); + addr.sun_family = domain; + + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock1, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + MaybeSave(); // Successful connect. + + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock2, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + MaybeSave(); // Successful connect. + + // Make and close another socketpair to ensure that the duped ends of the + // first socketpair get closed. + // + // The problem is that there is no way to atomically send and close an FD. + // The closest that we can do is send and then immediately close the FD, + // which is what we do in the gofer. The gofer won't respond to another + // request until the reply is sent and the FD is closed, so forcing the + // gofer to handle another request will ensure that this has happened. + for (int i = 0; i < 2; i++) { + int sock; + RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol)); + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + RETURN_ERROR_IF_SYSCALL_FAIL(close(sock)); + } + + return absl::make_unique<FDSocketPair>(sock1, sock2); + }; +} + +Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags) { + return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { + constexpr char kSocketGoferPath[] = "/socket"; + + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = + open(kSocketGoferPath, O_RDWR | flags)); + MaybeSave(); // Successful socket creation. + + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = + open(kSocketGoferPath, O_RDWR | flags)); + MaybeSave(); // Successful socket creation. + + return absl::make_unique<FDSocketPair>(sock1, sock2); + }; +} + +Creator<SocketPair> UnboundSocketPairCreator(bool abstract, int domain, + int type, int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1, + UniqueUnixAddr(abstract, domain)); + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2, + UniqueUnixAddr(abstract, domain)); + + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2); + }; +} + +Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type, + int protocol) { + return UnboundSocketPairCreator(/* abstract= */ false, domain, type, + protocol); +} + +Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type, + int protocol) { + return UnboundSocketPairCreator(/* abstract= */ true, domain, type, protocol); +} + +void LocalhostAddr(struct sockaddr_in* addr, bool dual_stack) { + addr->sin_family = AF_INET; + addr->sin_port = htons(0); + inet_pton(AF_INET, "127.0.0.1", + reinterpret_cast<void*>(&addr->sin_addr.s_addr)); +} + +void LocalhostAddr(struct sockaddr_in6* addr, bool dual_stack) { + addr->sin6_family = AF_INET6; + addr->sin6_port = htons(0); + if (dual_stack) { + inet_pton(AF_INET6, "::ffff:127.0.0.1", + reinterpret_cast<void*>(&addr->sin6_addr.s6_addr)); + } else { + inet_pton(AF_INET6, "::1", + reinterpret_cast<void*>(&addr->sin6_addr.s6_addr)); + } + addr->sin6_scope_id = 0; +} + +template <typename T> +PosixErrorOr<T> BindIP(int fd, bool dual_stack) { + T addr = {}; + LocalhostAddr(&addr, dual_stack); + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(fd, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + socklen_t addrlen = sizeof(addr); + RETURN_ERROR_IF_SYSCALL_FAIL( + getsockname(fd, reinterpret_cast<struct sockaddr*>(&addr), &addrlen)); + return addr; +} + +template <typename T> +PosixErrorOr<T> TCPBindAndListen(int fd, bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO(T addr, BindIP<T>(fd, dual_stack)); + RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd, /* backlog = */ 5)); + return addr; +} + +template <typename T> +PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> +CreateTCPConnectAcceptSocketPair(int bound, int connected, int type, + bool dual_stack, T bind_addr) { + int connect_result = 0; + RETURN_ERROR_IF_SYSCALL_FAIL( + (connect_result = RetryEINTR(connect)( + connected, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr))) == -1 && + errno == EINPROGRESS + ? 0 + : connect_result); + MaybeSave(); // Successful connect. + + if (connect_result == -1) { + struct pollfd connect_poll = {connected, POLLOUT | POLLERR | POLLHUP, 0}; + RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&connect_poll, 1, 0)); + int error = 0; + socklen_t errorlen = sizeof(error); + RETURN_ERROR_IF_SYSCALL_FAIL( + getsockopt(connected, SOL_SOCKET, SO_ERROR, &error, &errorlen)); + errno = error; + RETURN_ERROR_IF_SYSCALL_FAIL( + /* connect */ error == 0 ? 0 : -1); + } + + int accepted = -1; + struct pollfd accept_poll = {bound, POLLIN, 0}; + while (accepted == -1) { + RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&accept_poll, 1, 0)); + + RETURN_ERROR_IF_SYSCALL_FAIL( + (accepted = RetryEINTR(accept4)( + bound, nullptr, nullptr, type & (SOCK_NONBLOCK | SOCK_CLOEXEC))) == + -1 && + errno == EAGAIN + ? 0 + : accepted); + } + MaybeSave(); // Successful accept. + + T extra_addr = {}; + LocalhostAddr(&extra_addr, dual_stack); + return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr, + extra_addr); +} + +template <typename T> +PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair( + int bound, int connected, int type, bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO(T bind_addr, TCPBindAndListen<T>(bound, dual_stack)); + + auto result = CreateTCPConnectAcceptSocketPair(bound, connected, type, + dual_stack, bind_addr); + + // Cleanup no longer needed resources. + RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); + MaybeSave(); // Successful close. + + return result; +} + +Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type, + int protocol, + bool dual_stack) { + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + int bound; + RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + int connected; + RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + if (domain == AF_INET) { + return CreateTCPAcceptBindSocketPair<sockaddr_in>(bound, connected, type, + dual_stack); + } + return CreateTCPAcceptBindSocketPair<sockaddr_in6>(bound, connected, type, + dual_stack); + }; +} + +Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator( + int domain, int type, int protocol, bool dual_stack) { + // These are lazily initialized below, on the first call to the returned + // lambda. These values are private to each returned lambda, but shared across + // invocations of a specific lambda. + // + // The sharing allows pairs created with the same parameters to share a + // listener. This prevents future connects from failing if the connecting + // socket selects a port which had previously been used by a listening socket + // that still has some connections in TIME-WAIT. + // + // The lazy initialization is to avoid creating sockets during parameter + // enumeration. This is important because parameters are enumerated during the + // build process where networking may not be available. + auto listener = std::make_shared<absl::optional<int>>(absl::optional<int>()); + auto addr4 = std::make_shared<absl::optional<sockaddr_in>>( + absl::optional<sockaddr_in>()); + auto addr6 = std::make_shared<absl::optional<sockaddr_in6>>( + absl::optional<sockaddr_in6>()); + + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + int connected; + RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + // Share the listener across invocations. + if (!listener->has_value()) { + int fd = socket(domain, type, protocol); + if (fd < 0) { + return PosixError(errno, absl::StrCat("socket(", domain, ", ", type, + ", ", protocol, ")")); + } + listener->emplace(fd); + MaybeSave(); // Successful socket creation. + } + + // Bind the listener once, but create a new connect/accept pair each + // time. + if (domain == AF_INET) { + if (!addr4->has_value()) { + addr4->emplace( + TCPBindAndListen<sockaddr_in>(listener->value(), dual_stack) + .ValueOrDie()); + } + return CreateTCPConnectAcceptSocketPair(listener->value(), connected, + type, dual_stack, addr4->value()); + } + if (!addr6->has_value()) { + addr6->emplace( + TCPBindAndListen<sockaddr_in6>(listener->value(), dual_stack) + .ValueOrDie()); + } + return CreateTCPConnectAcceptSocketPair(listener->value(), connected, type, + dual_stack, addr6->value()); + }; +} + +template <typename T> +PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateUDPBoundSocketPair( + int sock1, int sock2, int type, bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO(T addr1, BindIP<T>(sock1, dual_stack)); + ASSIGN_OR_RETURN_ERRNO(T addr2, BindIP<T>(sock2, dual_stack)); + + return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2); +} + +template <typename T> +PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> +CreateUDPBidirectionalBindSocketPair(int sock1, int sock2, int type, + bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO( + auto socks, CreateUDPBoundSocketPair<T>(sock1, sock2, type, dual_stack)); + + // Connect sock1 to sock2. + RETURN_ERROR_IF_SYSCALL_FAIL(connect(socks->first_fd(), socks->second_addr(), + socks->second_addr_size())); + MaybeSave(); // Successful connection. + + // Connect sock2 to sock1. + RETURN_ERROR_IF_SYSCALL_FAIL(connect(socks->second_fd(), socks->first_addr(), + socks->first_addr_size())); + MaybeSave(); // Successful connection. + + return socks; +} + +Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type, + int protocol, + bool dual_stack) { + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + if (domain == AF_INET) { + return CreateUDPBidirectionalBindSocketPair<sockaddr_in>( + sock1, sock2, type, dual_stack); + } + return CreateUDPBidirectionalBindSocketPair<sockaddr_in6>(sock1, sock2, + type, dual_stack); + }; +} + +Creator<SocketPair> UDPUnboundSocketPairCreator(int domain, int type, + int protocol, bool dual_stack) { + return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + return absl::make_unique<FDSocketPair>(sock1, sock2); + }; +} + +SocketPairKind Reversed(SocketPairKind const& base) { + auto const& creator = base.creator; + return SocketPairKind{ + absl::StrCat("reversed ", base.description), base.domain, base.type, + base.protocol, + [creator]() -> PosixErrorOr<std::unique_ptr<ReversedSocketPair>> { + ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator()); + return absl::make_unique<ReversedSocketPair>(std::move(creator_value)); + }}; +} + +Creator<FileDescriptor> UnboundSocketCreator(int domain, int type, + int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> { + int sock; + RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + return absl::make_unique<FileDescriptor>(sock); + }; +} + +std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec) { + return ApplyVecToVec<SocketPairKind>(std::vector<Middleware>{NoOp, Reversed}, + vec); +} + +SocketPairKind NoOp(SocketPairKind const& base) { return base; } + +void TransferTest(int fd1, int fd2) { + char buf1[20]; + RandomizeBuffer(buf1, sizeof(buf1)); + ASSERT_THAT(WriteFd(fd1, buf1, sizeof(buf1)), + SyscallSucceedsWithValue(sizeof(buf1))); + + char buf2[20]; + ASSERT_THAT(ReadFd(fd2, buf2, sizeof(buf2)), + SyscallSucceedsWithValue(sizeof(buf2))); + + EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1))); + + RandomizeBuffer(buf1, sizeof(buf1)); + ASSERT_THAT(WriteFd(fd2, buf1, sizeof(buf1)), + SyscallSucceedsWithValue(sizeof(buf1))); + + ASSERT_THAT(ReadFd(fd1, buf2, sizeof(buf2)), + SyscallSucceedsWithValue(sizeof(buf2))); + + EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1))); +} + +// Initializes the given buffer with random data. +void RandomizeBuffer(char* ptr, size_t len) { + uint32_t seed = time(nullptr); + for (size_t i = 0; i < len; ++i) { + ptr[i] = static_cast<char>(rand_r(&seed)); + } +} + +size_t CalculateUnixSockAddrLen(const char* sun_path) { + // Abstract addresses always return the full length. + if (sun_path[0] == 0) { + return sizeof(sockaddr_un); + } + // Filesystem addresses use the address length plus the 2 byte sun_family + // and null terminator. + return strlen(sun_path) + 3; +} + +struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_un& addr) { + struct sockaddr_storage addr_storage = {}; + memcpy(&addr_storage, &addr, sizeof(addr)); + return addr_storage; +} + +struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in& addr) { + struct sockaddr_storage addr_storage = {}; + memcpy(&addr_storage, &addr, sizeof(addr)); + return addr_storage; +} + +struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in6& addr) { + struct sockaddr_storage addr_storage = {}; + memcpy(&addr_storage, &addr, sizeof(addr)); + return addr_storage; +} + +SocketKind SimpleSocket(int fam, int type, int proto) { + return SocketKind{ + absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto), fam, + type, proto, SyscallSocketCreator(fam, type, proto)}; +} + +ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets, + size_t size, bool reader) { + const int rfd = sockets->second_fd(); + ScopedThread t([rfd, size, reader] { + if (!reader) { + return; + } + + // Potentially too many syscalls in the loop. + const DisableSave ds; + + std::vector<char> buf(size); + size_t total = 0; + + while (total < size) { + int ret = read(rfd, buf.data(), buf.size()); + if (ret == -1 && errno == EAGAIN) { + continue; + } + if (ret > 0) { + total += ret; + } + + // Assert to return on first failure. + ASSERT_THAT(ret, SyscallSucceeds()); + } + }); + + std::vector<char> buf(size); + + struct iovec iov = {}; + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + return RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0); +} + +namespace internal { +PosixErrorOr<int> TryPortAvailable(int port, AddressFamily family, + SocketType type, bool reuse_addr) { + if (port < 0) { + return PosixError(EINVAL, "Invalid port"); + } + + // Both Ipv6 and Dualstack are AF_INET6. + int sock_fam = (family == AddressFamily::kIpv4 ? AF_INET : AF_INET6); + int sock_type = (type == SocketType::kTcp ? SOCK_STREAM : SOCK_DGRAM); + ASSIGN_OR_RETURN_ERRNO(auto fd, Socket(sock_fam, sock_type, 0)); + + if (reuse_addr) { + int one = 1; + RETURN_ERROR_IF_SYSCALL_FAIL( + setsockopt(fd.get(), SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))); + } + + // Try to bind. + sockaddr_storage storage = {}; + int storage_size = 0; + if (family == AddressFamily::kIpv4) { + sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&storage); + storage_size = sizeof(*addr); + addr->sin_family = AF_INET; + addr->sin_port = htons(port); + addr->sin_addr.s_addr = htonl(INADDR_ANY); + } else { + sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&storage); + storage_size = sizeof(*addr); + addr->sin6_family = AF_INET6; + addr->sin6_port = htons(port); + if (family == AddressFamily::kDualStack) { + inet_pton(AF_INET6, "::ffff:0.0.0.0", + reinterpret_cast<void*>(&addr->sin6_addr.s6_addr)); + } else { + addr->sin6_addr = in6addr_any; + } + } + + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(fd.get(), reinterpret_cast<sockaddr*>(&storage), storage_size)); + + // If the user specified 0 as the port, we will return the port that the + // kernel gave us, otherwise we will validate that this socket bound to the + // requested port. + sockaddr_storage bound_storage = {}; + socklen_t bound_storage_size = sizeof(bound_storage); + RETURN_ERROR_IF_SYSCALL_FAIL( + getsockname(fd.get(), reinterpret_cast<sockaddr*>(&bound_storage), + &bound_storage_size)); + + int available_port = -1; + if (bound_storage.ss_family == AF_INET) { + sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&bound_storage); + available_port = ntohs(addr->sin_port); + } else if (bound_storage.ss_family == AF_INET6) { + sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&bound_storage); + available_port = ntohs(addr->sin6_port); + } else { + return PosixError(EPROTOTYPE, "Getsockname returned invalid family"); + } + + // If we requested a specific port make sure our bound port is that port. + if (port != 0 && available_port != port) { + return PosixError(EINVAL, + absl::StrCat("Bound port ", available_port, + " was not equal to requested port ", port)); + } + + // If we're trying to do a TCP socket, let's also try to listen. + if (type == SocketType::kTcp) { + RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd.get(), 1)); + } + + return available_port; +} +} // namespace internal + +PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size) { + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg->msg_iov = &iov; + msg->msg_iovlen = 1; + + int ret; + RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(sendmsg)(sock, msg, 0)); + return ret; +} + +void RecvNoData(int sock) { + char data = 0; + struct iovec iov; + iov.iov_base = &data; + iov.iov_len = 1; + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +TestAddress V4Any() { + TestAddress t("V4Any"); + t.addr.ss_family = AF_INET; + t.addr_len = sizeof(sockaddr_in); + reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = htonl(INADDR_ANY); + return t; +} + +TestAddress V4Loopback() { + TestAddress t("V4Loopback"); + t.addr.ss_family = AF_INET; + t.addr_len = sizeof(sockaddr_in); + reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = + htonl(INADDR_LOOPBACK); + return t; +} + +TestAddress V4MappedAny() { + TestAddress t("V4MappedAny"); + t.addr.ss_family = AF_INET6; + t.addr_len = sizeof(sockaddr_in6); + inet_pton(AF_INET6, "::ffff:0.0.0.0", + reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr.s6_addr); + return t; +} + +TestAddress V4MappedLoopback() { + TestAddress t("V4MappedLoopback"); + t.addr.ss_family = AF_INET6; + t.addr_len = sizeof(sockaddr_in6); + inet_pton(AF_INET6, "::ffff:127.0.0.1", + reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr.s6_addr); + return t; +} + +TestAddress V4Multicast() { + TestAddress t("V4Multicast"); + t.addr.ss_family = AF_INET; + t.addr_len = sizeof(sockaddr_in); + reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = + inet_addr(kMulticastAddress); + return t; +} + +TestAddress V4Broadcast() { + TestAddress t("V4Broadcast"); + t.addr.ss_family = AF_INET; + t.addr_len = sizeof(sockaddr_in); + reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = + htonl(INADDR_BROADCAST); + return t; +} + +TestAddress V6Any() { + TestAddress t("V6Any"); + t.addr.ss_family = AF_INET6; + t.addr_len = sizeof(sockaddr_in6); + reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = in6addr_any; + return t; +} + +TestAddress V6Loopback() { + TestAddress t("V6Loopback"); + t.addr.ss_family = AF_INET6; + t.addr_len = sizeof(sockaddr_in6); + reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = in6addr_loopback; + return t; +} + +// Checksum computes the internet checksum of a buffer. +uint16_t Checksum(uint16_t* buf, ssize_t buf_size) { + // Add up the 16-bit values in the buffer. + uint32_t total = 0; + for (unsigned int i = 0; i < buf_size; i += sizeof(*buf)) { + total += *buf; + buf++; + } + + // If buf has an odd size, add the remaining byte. + if (buf_size % 2) { + total += *(reinterpret_cast<unsigned char*>(buf) - 1); + } + + // This carries any bits past the lower 16 until everything fits in 16 bits. + while (total >> 16) { + uint16_t lower = total & 0xffff; + uint16_t upper = total >> 16; + total = lower + upper; + } + + return ~total; +} + +uint16_t IPChecksum(struct iphdr ip) { + return Checksum(reinterpret_cast<uint16_t*>(&ip), sizeof(ip)); +} + +// The pseudo-header defined in RFC 768 for calculating the UDP checksum. +struct udp_pseudo_hdr { + uint32_t srcip; + uint32_t destip; + char zero; + char protocol; + uint16_t udplen; +}; + +uint16_t UDPChecksum(struct iphdr iphdr, struct udphdr udphdr, + const char* payload, ssize_t payload_len) { + struct udp_pseudo_hdr phdr = {}; + phdr.srcip = iphdr.saddr; + phdr.destip = iphdr.daddr; + phdr.zero = 0; + phdr.protocol = IPPROTO_UDP; + phdr.udplen = udphdr.len; + + ssize_t buf_size = sizeof(phdr) + sizeof(udphdr) + payload_len; + char* buf = static_cast<char*>(malloc(buf_size)); + memcpy(buf, &phdr, sizeof(phdr)); + memcpy(buf + sizeof(phdr), &udphdr, sizeof(udphdr)); + memcpy(buf + sizeof(phdr) + sizeof(udphdr), payload, payload_len); + + uint16_t csum = Checksum(reinterpret_cast<uint16_t*>(buf), buf_size); + free(buf); + return csum; +} + +uint16_t ICMPChecksum(struct icmphdr icmphdr, const char* payload, + ssize_t payload_len) { + ssize_t buf_size = sizeof(icmphdr) + payload_len; + char* buf = static_cast<char*>(malloc(buf_size)); + memcpy(buf, &icmphdr, sizeof(icmphdr)); + memcpy(buf + sizeof(icmphdr), payload, payload_len); + + uint16_t csum = Checksum(reinterpret_cast<uint16_t*>(buf), buf_size); + free(buf); + return csum; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h new file mode 100644 index 000000000..734b48b96 --- /dev/null +++ b/test/syscalls/linux/socket_test_util.h @@ -0,0 +1,518 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_ + +#include <errno.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#include <netinet/udp.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <functional> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// Wrapper for socket(2) that returns a FileDescriptor. +inline PosixErrorOr<FileDescriptor> Socket(int family, int type, int protocol) { + int fd = socket(family, type, protocol); + MaybeSave(); + if (fd < 0) { + return PosixError( + errno, absl::StrFormat("socket(%d, %d, %d)", family, type, protocol)); + } + return FileDescriptor(fd); +} + +// Wrapper for accept(2) that returns a FileDescriptor. +inline PosixErrorOr<FileDescriptor> Accept(int sockfd, sockaddr* addr, + socklen_t* addrlen) { + int fd = RetryEINTR(accept)(sockfd, addr, addrlen); + MaybeSave(); + if (fd < 0) { + return PosixError( + errno, absl::StrFormat("accept(%d, %p, %p)", sockfd, addr, addrlen)); + } + return FileDescriptor(fd); +} + +// Wrapper for accept4(2) that returns a FileDescriptor. +inline PosixErrorOr<FileDescriptor> Accept4(int sockfd, sockaddr* addr, + socklen_t* addrlen, int flags) { + int fd = RetryEINTR(accept4)(sockfd, addr, addrlen, flags); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, absl::StrFormat("accept4(%d, %p, %p, %#x)", sockfd, + addr, addrlen, flags)); + } + return FileDescriptor(fd); +} + +inline ssize_t SendFd(int fd, void* buf, size_t count, int flags) { + return internal::ApplyFileIoSyscall( + [&](size_t completed) { + return sendto(fd, static_cast<char*>(buf) + completed, + count - completed, flags, nullptr, 0); + }, + count); +} + +PosixErrorOr<struct sockaddr_un> UniqueUnixAddr(bool abstract, int domain); + +// A Creator<T> is a function that attempts to create and return a new T. (This +// is copy/pasted from cloud/gvisor/api/sandbox_util.h and is just duplicated +// here for clarity.) +template <typename T> +using Creator = std::function<PosixErrorOr<std::unique_ptr<T>>()>; + +// A SocketPair represents a pair of socket file descriptors owned by the +// SocketPair. +class SocketPair { + public: + virtual ~SocketPair() = default; + + virtual int first_fd() const = 0; + virtual int second_fd() const = 0; + virtual int release_first_fd() = 0; + virtual int release_second_fd() = 0; + virtual const struct sockaddr* first_addr() const = 0; + virtual const struct sockaddr* second_addr() const = 0; + virtual size_t first_addr_size() const = 0; + virtual size_t second_addr_size() const = 0; + virtual size_t first_addr_len() const = 0; + virtual size_t second_addr_len() const = 0; +}; + +// A FDSocketPair is a SocketPair that consists of only a pair of file +// descriptors. +class FDSocketPair : public SocketPair { + public: + FDSocketPair(int first_fd, int second_fd) + : first_(first_fd), second_(second_fd) {} + FDSocketPair(std::unique_ptr<FileDescriptor> first_fd, + std::unique_ptr<FileDescriptor> second_fd) + : first_(first_fd->release()), second_(second_fd->release()) {} + + int first_fd() const override { return first_.get(); } + int second_fd() const override { return second_.get(); } + int release_first_fd() override { return first_.release(); } + int release_second_fd() override { return second_.release(); } + const struct sockaddr* first_addr() const override { return nullptr; } + const struct sockaddr* second_addr() const override { return nullptr; } + size_t first_addr_size() const override { return 0; } + size_t second_addr_size() const override { return 0; } + size_t first_addr_len() const override { return 0; } + size_t second_addr_len() const override { return 0; } + + private: + FileDescriptor first_; + FileDescriptor second_; +}; + +// CalculateUnixSockAddrLen calculates the length returned by recvfrom(2) and +// recvmsg(2) for Unix sockets. +size_t CalculateUnixSockAddrLen(const char* sun_path); + +// A AddrFDSocketPair is a SocketPair that consists of a pair of file +// descriptors in addition to a pair of socket addresses. +class AddrFDSocketPair : public SocketPair { + public: + AddrFDSocketPair(int first_fd, int second_fd, + const struct sockaddr_un& first_address, + const struct sockaddr_un& second_address) + : first_(first_fd), + second_(second_fd), + first_addr_(to_storage(first_address)), + second_addr_(to_storage(second_address)), + first_len_(CalculateUnixSockAddrLen(first_address.sun_path)), + second_len_(CalculateUnixSockAddrLen(second_address.sun_path)), + first_size_(sizeof(first_address)), + second_size_(sizeof(second_address)) {} + + AddrFDSocketPair(int first_fd, int second_fd, + const struct sockaddr_in& first_address, + const struct sockaddr_in& second_address) + : first_(first_fd), + second_(second_fd), + first_addr_(to_storage(first_address)), + second_addr_(to_storage(second_address)), + first_len_(sizeof(first_address)), + second_len_(sizeof(second_address)), + first_size_(sizeof(first_address)), + second_size_(sizeof(second_address)) {} + + AddrFDSocketPair(int first_fd, int second_fd, + const struct sockaddr_in6& first_address, + const struct sockaddr_in6& second_address) + : first_(first_fd), + second_(second_fd), + first_addr_(to_storage(first_address)), + second_addr_(to_storage(second_address)), + first_len_(sizeof(first_address)), + second_len_(sizeof(second_address)), + first_size_(sizeof(first_address)), + second_size_(sizeof(second_address)) {} + + int first_fd() const override { return first_.get(); } + int second_fd() const override { return second_.get(); } + int release_first_fd() override { return first_.release(); } + int release_second_fd() override { return second_.release(); } + const struct sockaddr* first_addr() const override { + return reinterpret_cast<const struct sockaddr*>(&first_addr_); + } + const struct sockaddr* second_addr() const override { + return reinterpret_cast<const struct sockaddr*>(&second_addr_); + } + size_t first_addr_size() const override { return first_size_; } + size_t second_addr_size() const override { return second_size_; } + size_t first_addr_len() const override { return first_len_; } + size_t second_addr_len() const override { return second_len_; } + + private: + // to_storage coverts a sockaddr_* to a sockaddr_storage. + static struct sockaddr_storage to_storage(const sockaddr_un& addr); + static struct sockaddr_storage to_storage(const sockaddr_in& addr); + static struct sockaddr_storage to_storage(const sockaddr_in6& addr); + + FileDescriptor first_; + FileDescriptor second_; + const struct sockaddr_storage first_addr_; + const struct sockaddr_storage second_addr_; + const size_t first_len_; + const size_t second_len_; + const size_t first_size_; + const size_t second_size_; +}; + +// SyscallSocketPairCreator returns a Creator<SocketPair> that obtains file +// descriptors by invoking the socketpair() syscall. +Creator<SocketPair> SyscallSocketPairCreator(int domain, int type, + int protocol); + +// SyscallSocketCreator returns a Creator<FileDescriptor> that obtains a file +// descriptor by invoking the socket() syscall. +Creator<FileDescriptor> SyscallSocketCreator(int domain, int type, + int protocol); + +// FilesystemBidirectionalBindSocketPairCreator returns a Creator<SocketPair> +// that obtains file descriptors by invoking the bind() and connect() syscalls +// on filesystem paths. Only works for DGRAM sockets. +Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain, + int type, + int protocol); + +// AbstractBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by invoking the bind() and connect() syscalls on +// abstract namespace paths. Only works for DGRAM sockets. +Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain, + int type, + int protocol); + +// SocketpairGoferSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by connect() syscalls on two sockets with socketpair +// gofer paths. +Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type, + int protocol); + +// SocketpairGoferFileSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by open() syscalls on socketpair gofer paths. +Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags); + +// FilesystemAcceptBindSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by invoking the accept() and bind() syscalls on +// a filesystem path. Only works for STREAM and SEQPACKET sockets. +Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type, + int protocol); + +// AbstractAcceptBindSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by invoking the accept() and bind() syscalls on a +// abstract namespace path. Only works for STREAM and SEQPACKET sockets. +Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type, + int protocol); + +// FilesystemUnboundSocketPairCreator returns a Creator<SocketPair> that obtains +// file descriptors by invoking the socket() syscall and generates a filesystem +// path for binding. +Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type, + int protocol); + +// AbstractUnboundSocketPairCreator returns a Creator<SocketPair> that obtains +// file descriptors by invoking the socket() syscall and generates an abstract +// path for binding. +Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type, + int protocol); + +// TCPAcceptBindSocketPairCreator returns a Creator<SocketPair> that obtains +// file descriptors by invoking the accept() and bind() syscalls on TCP sockets. +Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type, + int protocol, + bool dual_stack); + +// TCPAcceptBindPersistentListenerSocketPairCreator is like +// TCPAcceptBindSocketPairCreator, except it uses the same listening socket to +// create all SocketPairs. +Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator( + int domain, int type, int protocol, bool dual_stack); + +// UDPBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by invoking the bind() and connect() syscalls on UDP +// sockets. +Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type, + int protocol, + bool dual_stack); + +// UDPUnboundSocketPairCreator returns a Creator<SocketPair> that obtains file +// descriptors by creating UDP sockets. +Creator<SocketPair> UDPUnboundSocketPairCreator(int domain, int type, + int protocol, bool dual_stack); + +// UnboundSocketCreator returns a Creator<FileDescriptor> that obtains a file +// descriptor by creating a socket. +Creator<FileDescriptor> UnboundSocketCreator(int domain, int type, + int protocol); + +// A SocketPairKind couples a human-readable description of a socket pair with +// a function that creates such a socket pair. +struct SocketPairKind { + std::string description; + int domain; + int type; + int protocol; + Creator<SocketPair> creator; + + // Create creates a socket pair of this kind. + PosixErrorOr<std::unique_ptr<SocketPair>> Create() const { return creator(); } +}; + +// A SocketKind couples a human-readable description of a socket with +// a function that creates such a socket. +struct SocketKind { + std::string description; + int domain; + int type; + int protocol; + Creator<FileDescriptor> creator; + + // Create creates a socket pair of this kind. + PosixErrorOr<std::unique_ptr<FileDescriptor>> Create() const { + return creator(); + } +}; + +// A ReversedSocketPair wraps another SocketPair but flips the first and second +// file descriptors. ReversedSocketPair is used to test socket pairs that +// should be symmetric. +class ReversedSocketPair : public SocketPair { + public: + explicit ReversedSocketPair(std::unique_ptr<SocketPair> base) + : base_(std::move(base)) {} + + int first_fd() const override { return base_->second_fd(); } + int second_fd() const override { return base_->first_fd(); } + int release_first_fd() override { return base_->release_second_fd(); } + int release_second_fd() override { return base_->release_first_fd(); } + const struct sockaddr* first_addr() const override { + return base_->second_addr(); + } + const struct sockaddr* second_addr() const override { + return base_->first_addr(); + } + size_t first_addr_size() const override { return base_->second_addr_size(); } + size_t second_addr_size() const override { return base_->first_addr_size(); } + size_t first_addr_len() const override { return base_->second_addr_len(); } + size_t second_addr_len() const override { return base_->first_addr_len(); } + + private: + std::unique_ptr<SocketPair> base_; +}; + +// Reversed returns a SocketPairKind that represents SocketPairs created by +// flipping the file descriptors provided by another SocketPair. +SocketPairKind Reversed(SocketPairKind const& base); + +// IncludeReversals returns a vector<SocketPairKind> that returns all +// SocketPairKinds in `vec` as well as all SocketPairKinds obtained by flipping +// the file descriptors provided by the kinds in `vec`. +std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec); + +// A Middleware is a function wraps a SocketPairKind. +using Middleware = std::function<SocketPairKind(SocketPairKind)>; + +// Reversed returns a SocketPairKind that represents SocketPairs created by +// flipping the file descriptors provided by another SocketPair. +template <typename T> +Middleware SetSockOpt(int level, int optname, T* value) { + return [=](SocketPairKind const& base) { + auto const& creator = base.creator; + return SocketPairKind{ + absl::StrCat("setsockopt(", level, ", ", optname, ", ", *value, ") ", + base.description), + base.domain, base.type, base.protocol, + [creator, level, optname, + value]() -> PosixErrorOr<std::unique_ptr<SocketPair>> { + ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator()); + if (creator_value->first_fd() >= 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(setsockopt( + creator_value->first_fd(), level, optname, value, sizeof(T))); + } + if (creator_value->second_fd() >= 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(setsockopt( + creator_value->second_fd(), level, optname, value, sizeof(T))); + } + return creator_value; + }}; + }; +} + +constexpr int kSockOptOn = 1; +constexpr int kSockOptOff = 0; + +// NoOp returns the same SocketPairKind that it is passed. +SocketPairKind NoOp(SocketPairKind const& base); + +// TransferTest tests that data can be send back and fourth between two +// specified FDs. Note that calls to this function should be wrapped in +// ASSERT_NO_FATAL_FAILURE(). +void TransferTest(int fd1, int fd2); + +// Fills [buf, buf+len) with random bytes. +void RandomizeBuffer(char* buf, size_t len); + +// Base test fixture for tests that operate on pairs of connected sockets. +class SocketPairTest : public ::testing::TestWithParam<SocketPairKind> { + protected: + SocketPairTest() { + // gUnit uses printf, so so will we. + printf("Testing with %s\n", GetParam().description.c_str()); + fflush(stdout); + } + + PosixErrorOr<std::unique_ptr<SocketPair>> NewSocketPair() const { + return GetParam().Create(); + } +}; + +// Base test fixture for tests that operate on simple Sockets. +class SimpleSocketTest : public ::testing::TestWithParam<SocketKind> { + protected: + SimpleSocketTest() { + // gUnit uses printf, so so will we. + printf("Testing with %s\n", GetParam().description.c_str()); + } + + PosixErrorOr<std::unique_ptr<FileDescriptor>> NewSocket() const { + return GetParam().Create(); + } +}; + +SocketKind SimpleSocket(int fam, int type, int proto); + +// Send a buffer of size 'size' to sockets->first_fd(), returning the result of +// sendmsg. +// +// If reader, read from second_fd() until size bytes have been read. +ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets, + size_t size, bool reader); + +// Initializes the given buffer with random data. +void RandomizeBuffer(char* ptr, size_t len); + +enum class AddressFamily { kIpv4 = 1, kIpv6 = 2, kDualStack = 3 }; +enum class SocketType { kUdp = 1, kTcp = 2 }; + +// Returns a PosixError or a port that is available. If 0 is specified as the +// port it will bind port 0 (and allow the kernel to select any free port). +// Otherwise, it will try to bind the specified port and validate that it can be +// used for the requested family and socket type. The final option is +// reuse_addr. This specifies whether SO_REUSEADDR should be applied before a +// bind(2) attempt. SO_REUSEADDR means that sockets in TIME_WAIT states or other +// bound UDP sockets would not cause an error on bind(2). This option should be +// set if subsequent calls to bind on the returned port will also use +// SO_REUSEADDR. +// +// Note: That this test will attempt to bind the ANY address for the respective +// protocol. +PosixErrorOr<int> PortAvailable(int port, AddressFamily family, SocketType type, + bool reuse_addr); + +// FreeAvailablePort is used to return a port that was obtained by using +// the PortAvailable helper with port 0. +PosixError FreeAvailablePort(int port); + +// SendMsg converts a buffer to an iovec and adds it to msg before sending it. +PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size); + +// RecvNoData checks that no data is receivable on sock. +void RecvNoData(int sock); + +// Base test fixture for tests that apply to all kinds of pairs of connected +// sockets. +using AllSocketPairTest = SocketPairTest; + +struct TestAddress { + std::string description; + sockaddr_storage addr; + socklen_t addr_len; + + int family() const { return addr.ss_family; } + explicit TestAddress(std::string description = "") + : description(std::move(description)), addr(), addr_len() {} +}; + +constexpr char kMulticastAddress[] = "224.0.2.1"; +constexpr char kBroadcastAddress[] = "255.255.255.255"; + +TestAddress V4Any(); +TestAddress V4Broadcast(); +TestAddress V4Loopback(); +TestAddress V4MappedAny(); +TestAddress V4MappedLoopback(); +TestAddress V4Multicast(); +TestAddress V6Any(); +TestAddress V6Loopback(); + +// Compute the internet checksum of an IP header. +uint16_t IPChecksum(struct iphdr ip); + +// Compute the internet checksum of a UDP header. +uint16_t UDPChecksum(struct iphdr iphdr, struct udphdr udphdr, + const char* payload, ssize_t payload_len); + +// Compute the internet checksum of an ICMP header. +uint16_t ICMPChecksum(struct icmphdr icmphdr, const char* payload, + ssize_t payload_len); + +namespace internal { +PosixErrorOr<int> TryPortAvailable(int port, AddressFamily family, + SocketType type, bool reuse_addr); +} // namespace internal + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_ diff --git a/test/syscalls/linux/socket_test_util_impl.cc b/test/syscalls/linux/socket_test_util_impl.cc new file mode 100644 index 000000000..ef661a0e3 --- /dev/null +++ b/test/syscalls/linux/socket_test_util_impl.cc @@ -0,0 +1,28 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +PosixErrorOr<int> PortAvailable(int port, AddressFamily family, SocketType type, + bool reuse_addr) { + return internal::TryPortAvailable(port, family, type, reuse_addr); +} + +PosixError FreeAvailablePort(int port) { return NoError(); } + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc new file mode 100644 index 000000000..591cab3fd --- /dev/null +++ b/test/syscalls/linux/socket_unix.cc @@ -0,0 +1,274 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_unix.h" + +#include <errno.h> +#include <net/if.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +// This file contains tests specific to Unix domain sockets. It does not contain +// tests for UDS control messages. Those belong in socket_unix_cmsg.cc. +// +// This file is a generic socket test file. It must be built with another file +// that provides the test types. + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(UnixSocketPairTest, InvalidGetSockOpt) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + int opt; + socklen_t optlen = sizeof(opt); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, -1, &opt, &optlen), + SyscallFailsWithErrno(ENOPROTOOPT)); +} + +TEST_P(UnixSocketPairTest, BindToBadName) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + constexpr char kBadName[] = "/some/path/that/does/not/exist"; + sockaddr_un sockaddr; + sockaddr.sun_family = AF_LOCAL; + memcpy(sockaddr.sun_path, kBadName, sizeof(kBadName)); + + EXPECT_THAT( + bind(pair->first_fd(), reinterpret_cast<struct sockaddr*>(&sockaddr), + sizeof(sockaddr)), + SyscallFailsWithErrno(ENOENT)); +} + +TEST_P(UnixSocketPairTest, BindToBadFamily) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + constexpr char kBadName[] = "/some/path/that/does/not/exist"; + sockaddr_un sockaddr; + sockaddr.sun_family = AF_INET; + memcpy(sockaddr.sun_path, kBadName, sizeof(kBadName)); + + EXPECT_THAT( + bind(pair->first_fd(), reinterpret_cast<struct sockaddr*>(&sockaddr), + sizeof(sockaddr)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(UnixSocketPairTest, RecvmmsgTimeoutAfterRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + char received_data[sizeof(sent_data) * 2]; + std::vector<struct mmsghdr> msgs(2); + std::vector<struct iovec> iovs(msgs.size()); + const int chunk_size = sizeof(received_data) / msgs.size(); + for (size_t i = 0; i < msgs.size(); i++) { + iovs[i].iov_len = chunk_size; + iovs[i].iov_base = &received_data[i * chunk_size]; + msgs[i].msg_hdr.msg_iov = &iovs[i]; + msgs[i].msg_hdr.msg_iovlen = 1; + } + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + struct timespec timeout = {0, 1}; + ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->second_fd(), &msgs[0], msgs.size(), + 0, &timeout), + SyscallSucceedsWithValue(1)); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + EXPECT_EQ(chunk_size, msgs[0].msg_len); +} + +TEST_P(UnixSocketPairTest, TIOCINQSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + if (IsRunningOnGvisor()) { + // TODO(gvisor.dev/issue/273): Inherited host UDS don't support TIOCINQ. + // Skip the test. + int size = -1; + int ret = ioctl(sockets->first_fd(), TIOCINQ, &size); + SKIP_IF(ret == -1 && errno == ENOTTY); + } + + int size = -1; + EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, 0); + + const char some_data[] = "dangerzone"; + ASSERT_THAT( + RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0), + SyscallSucceeds()); + EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, sizeof(some_data)); + + // Linux only reports the first message's size, which is wrong. We test for + // the behavior described in the man page. + SKIP_IF(!IsRunningOnGvisor()); + + ASSERT_THAT( + RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0), + SyscallSucceeds()); + EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, sizeof(some_data) * 2); +} + +TEST_P(UnixSocketPairTest, TIOCOUTQSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + if (IsRunningOnGvisor()) { + // TODO(gvisor.dev/issue/273): Inherited host UDS don't support TIOCOUTQ. + // Skip the test. + int size = -1; + int ret = ioctl(sockets->second_fd(), TIOCOUTQ, &size); + SKIP_IF(ret == -1 && errno == ENOTTY); + } + + int size = -1; + EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, 0); + + // Linux reports bogus numbers which are related to its internal allocations. + // We test for the behavior described in the man page. + SKIP_IF(!IsRunningOnGvisor()); + + const char some_data[] = "dangerzone"; + ASSERT_THAT( + RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0), + SyscallSucceeds()); + EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, sizeof(some_data)); + + ASSERT_THAT( + RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0), + SyscallSucceeds()); + EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, sizeof(some_data) * 2); +} + +TEST_P(UnixSocketPairTest, NetdeviceIoctlsSucceed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Prepare the request. + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check that the ioctl either succeeds or fails with ENODEV. + int err = ioctl(sockets->first_fd(), SIOCGIFINDEX, &ifr); + if (err < 0) { + ASSERT_EQ(errno, ENODEV); + } +} + +TEST_P(UnixSocketPairTest, Shutdown) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + const std::string data = "abc"; + ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), data.size()), + SyscallSucceedsWithValue(data.size())); + + ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RDWR), SyscallSucceeds()); + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RDWR), SyscallSucceeds()); + + // Shutting down a socket does not clear the buffer. + char buf[3]; + ASSERT_THAT(ReadFd(sockets->second_fd(), buf, data.size()), + SyscallSucceedsWithValue(data.size())); + EXPECT_EQ(data, absl::string_view(buf, data.size())); +} + +TEST_P(UnixSocketPairTest, ShutdownRead) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RD), SyscallSucceeds()); + + // When the socket is shutdown for read, read behavior varies between + // different socket types. This is covered by the various ReadOneSideClosed + // test cases. + + // ... and the peer cannot write. + const std::string data = "abc"; + EXPECT_THAT(WriteFd(sockets->second_fd(), data.c_str(), data.size()), + SyscallFailsWithErrno(EPIPE)); + + // ... but the socket can still write. + ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), data.size()), + SyscallSucceedsWithValue(data.size())); + + // ... and the peer can still read. + char buf[3]; + EXPECT_THAT(ReadFd(sockets->second_fd(), buf, data.size()), + SyscallSucceedsWithValue(data.size())); + EXPECT_EQ(data, absl::string_view(buf, data.size())); +} + +TEST_P(UnixSocketPairTest, ShutdownWrite) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_WR), SyscallSucceeds()); + + // When the socket is shutdown for write, it cannot write. + const std::string data = "abc"; + EXPECT_THAT(WriteFd(sockets->first_fd(), data.c_str(), data.size()), + SyscallFailsWithErrno(EPIPE)); + + // ... and the peer read behavior varies between different socket types. This + // is covered by the various ReadOneSideClosed test cases. + + // ... but the peer can still write. + char buf[3]; + ASSERT_THAT(WriteFd(sockets->second_fd(), data.c_str(), data.size()), + SyscallSucceedsWithValue(data.size())); + + // ... and the socket can still read. + EXPECT_THAT(ReadFd(sockets->first_fd(), buf, data.size()), + SyscallSucceedsWithValue(data.size())); + EXPECT_EQ(data, absl::string_view(buf, data.size())); +} + +TEST_P(UnixSocketPairTest, SocketReopenFromProcfs) { + // TODO(gvisor.dev/issue/1624): In VFS1, we return EIO instead of ENXIO (see + // b/122310852). Remove this skip once VFS1 is deleted. + SKIP_IF(IsRunningWithVFS1()); + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Opening a socket pair via /proc/self/fd/X is a ENXIO. + for (const int fd : {sockets->first_fd(), sockets->second_fd()}) { + ASSERT_THAT(Open(absl::StrCat("/proc/self/fd/", fd), O_WRONLY), + PosixErrorIs(ENXIO, ::testing::_)); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix.h b/test/syscalls/linux/socket_unix.h new file mode 100644 index 000000000..3625cc404 --- /dev/null +++ b/test/syscalls/linux/socket_unix.h @@ -0,0 +1,29 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected unix sockets. +using UnixSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_ diff --git a/test/syscalls/linux/socket_unix_abstract_nonblock.cc b/test/syscalls/linux/socket_unix_abstract_nonblock.cc new file mode 100644 index 000000000..8bef76b67 --- /dev/null +++ b/test/syscalls/linux/socket_unix_abstract_nonblock.cc @@ -0,0 +1,39 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{SOCK_NONBLOCK})); +} + +INSTANTIATE_TEST_SUITE_P( + NonBlockingAbstractUnixSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_blocking_local.cc b/test/syscalls/linux/socket_unix_blocking_local.cc new file mode 100644 index 000000000..77cb8c6d6 --- /dev/null +++ b/test/syscalls/linux/socket_unix_blocking_local.cc @@ -0,0 +1,45 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + std::vector<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM}), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + std::vector<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM}), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + std::vector<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM})); +} + +INSTANTIATE_TEST_SUITE_P( + NonBlockingUnixDomainSockets, BlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_cmsg.cc b/test/syscalls/linux/socket_unix_cmsg.cc new file mode 100644 index 000000000..a16899493 --- /dev/null +++ b/test/syscalls/linux/socket_unix_cmsg.cc @@ -0,0 +1,1501 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_unix_cmsg.h" + +#include <errno.h> +#include <net/if.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +// This file contains tests for control message in Unix domain sockets. +// +// This file is a generic socket test file. It must be built with another file +// that provides the test types. + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(UnixSocketPairCmsgTest, BasicFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +TEST_P(UnixSocketPairCmsgTest, BasicTwoFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + int sent_fds[] = {pair1->second_fd(), pair2->second_fd()}; + + ASSERT_NO_FATAL_FAILURE( + SendFDs(sockets->first_fd(), sent_fds, 2, sent_data, sizeof(sent_data))); + + char received_data[20]; + int received_fds[] = {-1, -1}; + + ASSERT_NO_FATAL_FAILURE(RecvFDs(sockets->second_fd(), received_fds, 2, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[0], pair1->first_fd())); + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[1], pair2->first_fd())); +} + +TEST_P(UnixSocketPairCmsgTest, BasicThreeFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + auto pair3 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + int sent_fds[] = {pair1->second_fd(), pair2->second_fd(), pair3->second_fd()}; + + ASSERT_NO_FATAL_FAILURE( + SendFDs(sockets->first_fd(), sent_fds, 3, sent_data, sizeof(sent_data))); + + char received_data[20]; + int received_fds[] = {-1, -1, -1}; + + ASSERT_NO_FATAL_FAILURE(RecvFDs(sockets->second_fd(), received_fds, 3, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[0], pair1->first_fd())); + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[1], pair2->first_fd())); + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[2], pair3->first_fd())); +} + +TEST_P(UnixSocketPairCmsgTest, BadFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + int sent_fd = -1; + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(sent_fd))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(sizeof(sent_fd)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), &sent_fd, sizeof(sent_fd)); + + struct iovec iov; + iov.iov_base = sent_data; + iov.iov_len = sizeof(sent_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EBADF)); +} + +TEST_P(UnixSocketPairCmsgTest, ShortCmsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + int sent_fd = -1; + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(sent_fd))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = 1; + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), &sent_fd, sizeof(sent_fd)); + + struct iovec iov; + iov.iov_base = sent_data; + iov.iov_len = sizeof(sent_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// BasicFDPassNoSpace starts off by sending a single FD just like BasicFDPass. +// The difference is that when calling recvmsg, no space for FDs is provided, +// only space for the cmsg header. +TEST_P(UnixSocketPairCmsgTest, BasicFDPassNoSpace) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(0)); + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(msg.msg_controllen, 0); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +// BasicFDPassNoSpaceMsgCtrunc sends an FD, but does not provide any space to +// receive it. It then verifies that the MSG_CTRUNC flag is set in the msghdr. +TEST_P(UnixSocketPairCmsgTest, BasicFDPassNoSpaceMsgCtrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(0)); + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + char received_data[sizeof(sent_data)]; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(msg.msg_controllen, 0); + EXPECT_EQ(msg.msg_flags, MSG_CTRUNC); +} + +// BasicFDPassNullControlMsgCtrunc sends an FD and sets contradictory values for +// msg_controllen and msg_control. msg_controllen is set to the correct size to +// accommodate the FD, but msg_control is set to NULL. In this case, msg_control +// should override msg_controllen. +TEST_P(UnixSocketPairCmsgTest, BasicFDPassNullControlMsgCtrunc) { + // FIXME(gvisor.dev/issue/207): Fix handling of NULL msg_control. + SKIP_IF(IsRunningOnGvisor()); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + msg.msg_controllen = CMSG_SPACE(1); + + char received_data[sizeof(sent_data)]; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(msg.msg_controllen, 0); + EXPECT_EQ(msg.msg_flags, MSG_CTRUNC); +} + +// BasicFDPassNotEnoughSpaceMsgCtrunc sends an FD, but does not provide enough +// space to receive it. It then verifies that the MSG_CTRUNC flag is set in the +// msghdr. +TEST_P(UnixSocketPairCmsgTest, BasicFDPassNotEnoughSpaceMsgCtrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(0) + 1); + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + char received_data[sizeof(sent_data)]; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(msg.msg_controllen, 0); + EXPECT_EQ(msg.msg_flags, MSG_CTRUNC); +} + +// BasicThreeFDPassTruncationMsgCtrunc sends three FDs, but only provides enough +// space to receive two of them. It then verifies that the MSG_CTRUNC flag is +// set in the msghdr. +TEST_P(UnixSocketPairCmsgTest, BasicThreeFDPassTruncationMsgCtrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + auto pair3 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + int sent_fds[] = {pair1->second_fd(), pair2->second_fd(), pair3->second_fd()}; + + ASSERT_NO_FATAL_FAILURE( + SendFDs(sockets->first_fd(), sent_fds, 3, sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(2 * sizeof(int))); + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + char received_data[sizeof(sent_data)]; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(msg.msg_flags, MSG_CTRUNC); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(2 * sizeof(int))); + EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET); + EXPECT_EQ(cmsg->cmsg_type, SCM_RIGHTS); +} + +// BasicFDPassUnalignedRecv starts off by sending a single FD just like +// BasicFDPass. The difference is that when calling recvmsg, the length of the +// receive data is only aligned on a 4 byte boundry instead of the normal 8. +TEST_P(UnixSocketPairCmsgTest, BasicFDPassUnalignedRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFDUnaligned( + sockets->second_fd(), &fd, received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +// BasicFDPassUnalignedRecvNoMsgTrunc sends one FD and only provides enough +// space to receive just it. (Normally the minimum amount of space one would +// provide would be enough space for two FDs.) It then verifies that the +// MSG_CTRUNC flag is not set in the msghdr. +TEST_P(UnixSocketPairCmsgTest, BasicFDPassUnalignedRecvNoMsgTrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int)) - sizeof(int)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[sizeof(sent_data)] = {}; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(msg.msg_flags, 0); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET); + EXPECT_EQ(cmsg->cmsg_type, SCM_RIGHTS); +} + +// BasicTwoFDPassUnalignedRecvTruncationMsgTrunc sends two FDs, but only +// provides enough space to receive one of them. It then verifies that the +// MSG_CTRUNC flag is set in the msghdr. +TEST_P(UnixSocketPairCmsgTest, BasicTwoFDPassUnalignedRecvTruncationMsgTrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + int sent_fds[] = {pair->first_fd(), pair->second_fd()}; + + ASSERT_NO_FATAL_FAILURE( + SendFDs(sockets->first_fd(), sent_fds, 2, sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + // CMSG_SPACE rounds up to two FDs, we only want one. + char control[CMSG_SPACE(sizeof(int)) - sizeof(int)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[sizeof(sent_data)] = {}; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(msg.msg_flags, MSG_CTRUNC); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET); + EXPECT_EQ(cmsg->cmsg_type, SCM_RIGHTS); +} + +TEST_P(UnixSocketPairCmsgTest, ConcurrentBasicFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + int sockfd1 = sockets->first_fd(); + auto recv_func = [sockfd1, sent_data]() { + char received_data[20]; + int fd = -1; + RecvSingleFD(sockfd1, &fd, received_data, sizeof(received_data)); + ASSERT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + char buf[20]; + ASSERT_THAT(ReadFd(fd, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(WriteFd(fd, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + }; + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->second_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + ScopedThread t(recv_func); + + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(WriteFd(pair->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[20]; + ASSERT_THAT(ReadFd(pair->first_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + t.Join(); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +// FDPassNoRecv checks that the control message can be safely ignored by using +// read(2) instead of recvmsg(2). +TEST_P(UnixSocketPairCmsgTest, FDPassNoRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + // Read while ignoring the passed FD. + char received_data[20]; + ASSERT_THAT( + ReadFd(sockets->second_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + // Check that the socket still works for reads and writes. + ASSERT_NO_FATAL_FAILURE( + TransferTest(sockets->first_fd(), sockets->second_fd())); +} + +// FDPassInterspersed1 checks that sent control messages cannot be read before +// their associated data has been read. +TEST_P(UnixSocketPairCmsgTest, FDPassInterspersed1) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char written_data[20]; + RandomizeBuffer(written_data, sizeof(written_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), written_data, sizeof(written_data)), + SyscallSucceedsWithValue(sizeof(written_data))); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + // Check that we don't get a control message, but do get the data. + char received_data[20]; + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)); + EXPECT_EQ(0, memcmp(written_data, received_data, sizeof(written_data))); +} + +// FDPassInterspersed2 checks that sent control messages cannot be read after +// their associated data has been read while ignoring the control message by +// using read(2) instead of recvmsg(2). +TEST_P(UnixSocketPairCmsgTest, FDPassInterspersed2) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char written_data[20]; + RandomizeBuffer(written_data, sizeof(written_data)); + ASSERT_THAT(WriteFd(sockets->first_fd(), written_data, sizeof(written_data)), + SyscallSucceedsWithValue(sizeof(written_data))); + + char received_data[20]; + ASSERT_THAT( + ReadFd(sockets->second_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + EXPECT_EQ(0, memcmp(written_data, received_data, sizeof(written_data))); +} + +TEST_P(UnixSocketPairCmsgTest, FDPassNotCoalesced) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(), + sent_data1, sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(), + sent_data2, sizeof(sent_data2))); + + char received_data1[sizeof(sent_data1) + sizeof(sent_data2)]; + int received_fd1 = -1; + + RecvSingleFD(sockets->second_fd(), &received_fd1, received_data1, + sizeof(received_data1), sizeof(sent_data1)); + + EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1))); + TransferTest(pair1->first_fd(), pair1->second_fd()); + + char received_data2[sizeof(sent_data1) + sizeof(sent_data2)]; + int received_fd2 = -1; + + RecvSingleFD(sockets->second_fd(), &received_fd2, received_data2, + sizeof(received_data2), sizeof(sent_data2)); + + EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2))); + TransferTest(pair2->first_fd(), pair2->second_fd()); +} + +TEST_P(UnixSocketPairCmsgTest, FDPassPeek) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char peek_data[20]; + int peek_fd = -1; + PeekSingleFD(sockets->second_fd(), &peek_fd, peek_data, sizeof(peek_data)); + EXPECT_EQ(0, memcmp(sent_data, peek_data, sizeof(sent_data))); + TransferTest(peek_fd, pair->first_fd()); + EXPECT_THAT(close(peek_fd), SyscallSucceeds()); + + char received_data[20]; + int received_fd = -1; + RecvSingleFD(sockets->second_fd(), &received_fd, received_data, + sizeof(received_data)); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + TransferTest(received_fd, pair->first_fd()); + EXPECT_THAT(close(received_fd), SyscallSucceeds()); +} + +TEST_P(UnixSocketPairCmsgTest, BasicCredPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + EXPECT_EQ(sent_creds.pid, received_creds.pid); + EXPECT_EQ(sent_creds.uid, received_creds.uid); + EXPECT_EQ(sent_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairCmsgTest, SendNullCredsBeforeSoPassCredRecvEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairCmsgTest, SendNullCredsAfterSoPassCredRecvEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + SetSoPassCred(sockets->second_fd()); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + char received_data[20]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairCmsgTest, SendNullCredsBeforeSoPassCredSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->first_fd()); + + char received_data[20]; + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairCmsgTest, SendNullCredsAfterSoPassCredSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + SetSoPassCred(sockets->first_fd()); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + char received_data[20]; + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairCmsgTest, + SendNullCredsBeforeSoPassCredRecvEndAfterSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + SetSoPassCred(sockets->first_fd()); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairCmsgTest, WriteBeforeSoPassCredRecvEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairCmsgTest, WriteAfterSoPassCredRecvEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[20]; + + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairCmsgTest, WriteBeforeSoPassCredSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + SetSoPassCred(sockets->first_fd()); + + char received_data[20]; + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairCmsgTest, WriteAfterSoPassCredSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->first_fd()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[20]; + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairCmsgTest, WriteBeforeSoPassCredRecvEndAfterSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + SetSoPassCred(sockets->first_fd()); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairCmsgTest, CredPassTruncated) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + struct msghdr msg = {}; + char control[CMSG_SPACE(0) + sizeof(pid_t)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[sizeof(sent_data)] = {}; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + EXPECT_EQ(msg.msg_controllen, sizeof(control)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, sizeof(control)); + EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET); + EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); + + pid_t pid = 0; + memcpy(&pid, CMSG_DATA(cmsg), sizeof(pid)); + EXPECT_EQ(pid, sent_creds.pid); +} + +// CredPassNoMsgCtrunc passes a full set of credentials. It then verifies that +// receiving the full set does not result in MSG_CTRUNC being set in the msghdr. +TEST_P(UnixSocketPairCmsgTest, CredPassNoMsgCtrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(struct ucred))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[sizeof(sent_data)] = {}; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + // The control message should not be truncated. + EXPECT_EQ(msg.msg_flags, 0); + EXPECT_EQ(msg.msg_controllen, sizeof(control)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct ucred))); + EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET); + EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); +} + +// CredPassNoSpaceMsgCtrunc passes a full set of credentials. It then receives +// the data without providing space for any credentials and verifies that +// MSG_CTRUNC is set in the msghdr. +TEST_P(UnixSocketPairCmsgTest, CredPassNoSpaceMsgCtrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + struct msghdr msg = {}; + char control[CMSG_SPACE(0)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[sizeof(sent_data)] = {}; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + // The control message should be truncated. + EXPECT_EQ(msg.msg_flags, MSG_CTRUNC); + EXPECT_EQ(msg.msg_controllen, sizeof(control)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, sizeof(control)); + EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET); + EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); +} + +// CredPassTruncatedMsgCtrunc passes a full set of credentials. It then receives +// the data while providing enough space for only the first field of the +// credentials and verifies that MSG_CTRUNC is set in the msghdr. +TEST_P(UnixSocketPairCmsgTest, CredPassTruncatedMsgCtrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + struct msghdr msg = {}; + char control[CMSG_SPACE(0) + sizeof(pid_t)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[sizeof(sent_data)] = {}; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + // The control message should be truncated. + EXPECT_EQ(msg.msg_flags, MSG_CTRUNC); + EXPECT_EQ(msg.msg_controllen, sizeof(control)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, sizeof(control)); + EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET); + EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); +} + +TEST_P(UnixSocketPairCmsgTest, SoPassCred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int opt; + socklen_t optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); + + SetSoPassCred(sockets->first_fd()); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_TRUE(opt); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); + + int zero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &zero, + sizeof(zero)), + SyscallSucceeds()); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); +} + +TEST_P(UnixSocketPairCmsgTest, NoDataCredPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct msghdr msg = {}; + + struct iovec iov; + iov.iov_base = sent_data; + iov.iov_len = sizeof(sent_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + char control[CMSG_SPACE(0)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(0); + + ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(UnixSocketPairCmsgTest, NoPassCred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + char received_data[20]; + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairCmsgTest, CredAndFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendCredsAndFD(sockets->first_fd(), sent_creds, + pair->second_fd(), sent_data, + sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds, + &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + EXPECT_EQ(sent_creds.pid, received_creds.pid); + EXPECT_EQ(sent_creds.uid, received_creds.uid); + EXPECT_EQ(sent_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +TEST_P(UnixSocketPairCmsgTest, FDPassBeforeSoPassCred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds, + &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +TEST_P(UnixSocketPairCmsgTest, FDPassAfterSoPassCred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + SetSoPassCred(sockets->second_fd()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + struct ucred received_creds; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds, + &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +TEST_P(UnixSocketPairCmsgTest, CloexecDroppedWhenFDPassed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = ASSERT_NO_ERRNO_AND_VALUE( + UnixDomainSocketPair(SOCK_SEQPACKET | SOCK_CLOEXEC).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data))); + + EXPECT_THAT(fcntl(fd, F_GETFD), SyscallSucceedsWithValue(0)); +} + +TEST_P(UnixSocketPairCmsgTest, CloexecRecvFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + char received_data[20]; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CMSG_CLOEXEC), + SyscallSucceedsWithValue(sizeof(received_data))); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS); + + int fd = -1; + memcpy(&fd, CMSG_DATA(cmsg), sizeof(int)); + + EXPECT_THAT(fcntl(fd, F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); +} + +TEST_P(UnixSocketPairCmsgTest, FDPassAfterSoPassCredWithoutCredSpace) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + SetSoPassCred(sockets->second_fd()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + char control[CMSG_LEN(0)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[20]; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + EXPECT_EQ(msg.msg_controllen, sizeof(control)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, sizeof(control)); + EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET); + EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); +} + +// This test will validate that MSG_CTRUNC as an input flag to recvmsg will +// not appear as an output flag on the control message when truncation doesn't +// happen. +TEST_P(UnixSocketPairCmsgTest, MsgCtruncInputIsNoop) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int)) /* we're passing a single fd */]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + char received_data[20]; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CTRUNC), + SyscallSucceedsWithValue(sizeof(received_data))); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS); + + // Now we should verify that MSG_CTRUNC wasn't set as an output flag. + EXPECT_EQ(msg.msg_flags & MSG_CTRUNC, 0); +} + +TEST_P(UnixSocketPairCmsgTest, FDPassAfterSoPassCredWithoutCredHeaderSpace) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + SetSoPassCred(sockets->second_fd()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + char control[CMSG_LEN(0) / 2]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[20]; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + EXPECT_EQ(msg.msg_controllen, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_cmsg.h b/test/syscalls/linux/socket_unix_cmsg.h new file mode 100644 index 000000000..431606903 --- /dev/null +++ b/test/syscalls/linux/socket_unix_cmsg.h @@ -0,0 +1,30 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_CMSG_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_CMSG_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected unix sockets about +// control messages. +using UnixSocketPairCmsgTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_CMSG_H_ diff --git a/test/syscalls/linux/socket_unix_dgram.cc b/test/syscalls/linux/socket_unix_dgram.cc new file mode 100644 index 000000000..af0df4fb4 --- /dev/null +++ b/test/syscalls/linux/socket_unix_dgram.cc @@ -0,0 +1,45 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_unix_dgram.h" + +#include <stdio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(DgramUnixSocketPairTest, WriteOneSideClosed) { + // FIXME(b/35925052): gVisor datagram sockets return EPIPE instead of + // ECONNREFUSED. + SKIP_IF(IsRunningOnGvisor()); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->second_fd(), kStr, 3), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_dgram.h b/test/syscalls/linux/socket_unix_dgram.h new file mode 100644 index 000000000..0764ef85b --- /dev/null +++ b/test/syscalls/linux/socket_unix_dgram.h @@ -0,0 +1,29 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected dgram unix sockets. +using DgramUnixSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_ diff --git a/test/syscalls/linux/socket_unix_dgram_local.cc b/test/syscalls/linux/socket_unix_dgram_local.cc new file mode 100644 index 000000000..31d2d5216 --- /dev/null +++ b/test/syscalls/linux/socket_unix_dgram_local.cc @@ -0,0 +1,58 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_stream.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix_dgram.h" +#include "test/syscalls/linux/socket_unix_non_stream.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_RAW}, + List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_RAW}, + List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_RAW}, + List<int>{0, SOCK_NONBLOCK})))); +} + +INSTANTIATE_TEST_SUITE_P( + DgramUnixSockets, DgramUnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + DgramUnixSockets, UnixNonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + DgramUnixSockets, NonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc new file mode 100644 index 000000000..2db8b68d3 --- /dev/null +++ b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc @@ -0,0 +1,57 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of connected non-blocking dgram +// unix sockets. +using NonBlockingDgramUnixSocketPairTest = SocketPairTest; + +TEST_P(NonBlockingDgramUnixSocketPairTest, ReadOneSideClosed) { + if (IsRunningOnGvisor()) { + // FIXME(b/70803293): gVisor datagram sockets return 0 instead of + // EAGAIN. + return; + } + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + char data[10] = {}; + ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)), + SyscallFailsWithErrno(EAGAIN)); +} + +INSTANTIATE_TEST_SUITE_P( + NonBlockingDgramUnixSockets, NonBlockingDgramUnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(std::vector<SocketPairKind>{ + UnixDomainSocketPair(SOCK_DGRAM | SOCK_NONBLOCK), + FilesystemBoundUnixDomainSocketPair(SOCK_DGRAM | SOCK_NONBLOCK), + AbstractBoundUnixDomainSocketPair(SOCK_DGRAM | SOCK_NONBLOCK), + }))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_domain.cc b/test/syscalls/linux/socket_unix_domain.cc new file mode 100644 index 000000000..f7dff8b4d --- /dev/null +++ b/test/syscalls/linux/socket_unix_domain.cc @@ -0,0 +1,39 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_filesystem_nonblock.cc b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc new file mode 100644 index 000000000..6700b4d90 --- /dev/null +++ b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc @@ -0,0 +1,39 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{SOCK_NONBLOCK})); +} + +INSTANTIATE_TEST_SUITE_P( + NonBlockingFilesystemUnixSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_non_stream.cc b/test/syscalls/linux/socket_unix_non_stream.cc new file mode 100644 index 000000000..884319e1d --- /dev/null +++ b/test/syscalls/linux/socket_unix_non_stream.cc @@ -0,0 +1,256 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_unix_non_stream.h" + +#include <stdio.h> +#include <sys/mman.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/memory_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(UnixNonStreamSocketPairTest, RecvMsgTooLarge) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int rcvbuf; + socklen_t length = sizeof(rcvbuf); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &rcvbuf, &length), + SyscallSucceeds()); + + // Make the call larger than the receive buffer. + const int recv_size = 3 * rcvbuf; + + // Write a message that does fit in the receive buffer. + const int write_size = rcvbuf - kPageSize; + + std::vector<char> write_buf(write_size, 'a'); + const int ret = RetryEINTR(write)(sockets->second_fd(), write_buf.data(), + write_buf.size()); + if (ret < 0 && errno == ENOBUFS) { + // NOTE(b/116636318): Linux may stall the write for a long time and + // ultimately return ENOBUFS. Allow this error, since a retry will likely + // result in the same error. + return; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + std::vector<char> recv_buf(recv_size); + + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->first_fd(), recv_buf.data(), + recv_buf.size(), write_size)); + + recv_buf.resize(write_size); + EXPECT_EQ(recv_buf, write_buf); +} + +// Create a region of anonymous memory of size 'size', which is fragmented in +// FileMem. +// +// ptr contains the start address of the region. The returned vector contains +// all of the mappings to be unmapped when done. +PosixErrorOr<std::vector<Mapping>> CreateFragmentedRegion(const int size, + void** ptr) { + Mapping region; + ASSIGN_OR_RETURN_ERRNO(region, Mmap(nullptr, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); + + *ptr = region.ptr(); + + // Don't save hundreds of times for all of these mmaps. + DisableSave ds; + + std::vector<Mapping> pages; + + // Map and commit a single page at a time, mapping and committing an unrelated + // page between each call to force FileMem fragmentation. + for (uintptr_t addr = region.addr(); addr < region.endaddr(); + addr += kPageSize) { + Mapping page; + ASSIGN_OR_RETURN_ERRNO( + page, + Mmap(reinterpret_cast<void*>(addr), kPageSize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0)); + *reinterpret_cast<volatile char*>(page.ptr()) = 42; + + pages.emplace_back(std::move(page)); + + // Unrelated page elsewhere. + ASSIGN_OR_RETURN_ERRNO(page, + Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); + *reinterpret_cast<volatile char*>(page.ptr()) = 42; + + pages.emplace_back(std::move(page)); + } + + // The mappings above have taken ownership of the region. + region.release(); + + return std::move(pages); +} + +// A contiguous iov that is heavily fragmented in FileMem can still be sent +// successfully. See b/115833655. +TEST_P(UnixNonStreamSocketPairTest, FragmentedSendMsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + const int buffer_size = UIO_MAXIOV * kPageSize; + // Extra page for message header overhead. + const int sndbuf = buffer_size + kPageSize; + // N.B. setsockopt(SO_SNDBUF) doubles the passed value. + const int set_sndbuf = sndbuf / 2; + + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &set_sndbuf, sizeof(set_sndbuf)), + SyscallSucceeds()); + + int actual_sndbuf = 0; + socklen_t length = sizeof(actual_sndbuf); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &actual_sndbuf, &length), + SyscallSucceeds()); + + if (actual_sndbuf != sndbuf) { + // Unable to get the sndbuf we want. + // + // N.B. At minimum, the socketpair gofer should provide a socket that is + // already the correct size. + // + // TODO(b/35921550): When internal UDS support SO_SNDBUF, we can assert that + // we always get the right SO_SNDBUF on gVisor. + GTEST_SKIP() << "SO_SNDBUF = " << actual_sndbuf << ", want " << sndbuf; + } + + // Create a contiguous region of memory of 2*UIO_MAXIOV*PAGE_SIZE. We'll call + // sendmsg with a single iov, but the goal is to get the sentry to split this + // into > UIO_MAXIOV iovs when calling the kernel. + void* ptr; + std::vector<Mapping> pages = + ASSERT_NO_ERRNO_AND_VALUE(CreateFragmentedRegion(buffer_size, &ptr)); + + struct iovec iov = {}; + iov.iov_base = ptr; + iov.iov_len = buffer_size; + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + // NOTE(b/116636318,b/115833655): Linux has poor behavior in the presence of + // physical memory fragmentation. As a result, this may stall for a long time + // and ultimately return ENOBUFS. Allow this error, since it means that we + // made it to the host kernel and started the sendmsg. + EXPECT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0), + AnyOf(SyscallSucceedsWithValue(buffer_size), + SyscallFailsWithErrno(ENOBUFS))); +} + +// A contiguous iov that is heavily fragmented in FileMem can still be received +// into successfully. Regression test for b/115833655. +TEST_P(UnixNonStreamSocketPairTest, FragmentedRecvMsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + const int buffer_size = UIO_MAXIOV * kPageSize; + // Extra page for message header overhead. + const int sndbuf = buffer_size + kPageSize; + // N.B. setsockopt(SO_SNDBUF) doubles the passed value. + const int set_sndbuf = sndbuf / 2; + + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &set_sndbuf, sizeof(set_sndbuf)), + SyscallSucceeds()); + + int actual_sndbuf = 0; + socklen_t length = sizeof(actual_sndbuf); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &actual_sndbuf, &length), + SyscallSucceeds()); + + if (actual_sndbuf != sndbuf) { + // Unable to get the sndbuf we want. + // + // N.B. At minimum, the socketpair gofer should provide a socket that is + // already the correct size. + // + // TODO(b/35921550): When internal UDS support SO_SNDBUF, we can assert that + // we always get the right SO_SNDBUF on gVisor. + GTEST_SKIP() << "SO_SNDBUF = " << actual_sndbuf << ", want " << sndbuf; + } + + std::vector<char> write_buf(buffer_size, 'a'); + const int ret = RetryEINTR(write)(sockets->first_fd(), write_buf.data(), + write_buf.size()); + if (ret < 0 && errno == ENOBUFS) { + // NOTE(b/116636318): Linux may stall the write for a long time and + // ultimately return ENOBUFS. Allow this error, since a retry will likely + // result in the same error. + return; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + // Create a contiguous region of memory of 2*UIO_MAXIOV*PAGE_SIZE. We'll call + // sendmsg with a single iov, but the goal is to get the sentry to split this + // into > UIO_MAXIOV iovs when calling the kernel. + void* ptr; + std::vector<Mapping> pages = + ASSERT_NO_ERRNO_AND_VALUE(CreateFragmentedRegion(buffer_size, &ptr)); + + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg( + sockets->second_fd(), reinterpret_cast<char*>(ptr), buffer_size)); + + EXPECT_EQ(0, memcmp(write_buf.data(), ptr, buffer_size)); +} + +TEST_P(UnixNonStreamSocketPairTest, SendTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + const int buf_size = 5 * kPageSize; + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &buf_size, + sizeof(buf_size)), + SyscallSucceeds()); + EXPECT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_RCVBUF, &buf_size, + sizeof(buf_size)), + SyscallSucceeds()); + + // The buffer size should be big enough to avoid many iterations in the next + // loop. Otherwise, this will slow down cooperative_save tests. + std::vector<char> buf(kPageSize); + for (;;) { + int ret; + ASSERT_THAT( + ret = RetryEINTR(send)(sockets->first_fd(), buf.data(), buf.size(), 0), + ::testing::AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(EAGAIN))); + if (ret == -1) { + break; + } + } +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_non_stream.h b/test/syscalls/linux/socket_unix_non_stream.h new file mode 100644 index 000000000..7478ab172 --- /dev/null +++ b/test/syscalls/linux/socket_unix_non_stream.h @@ -0,0 +1,30 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected non-stream +// unix-domain sockets. +using UnixNonStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_ diff --git a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc new file mode 100644 index 000000000..fddcdf1c5 --- /dev/null +++ b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc @@ -0,0 +1,42 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_stream_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>(UnixDomainSocketPair, + std::vector<int>{SOCK_DGRAM, SOCK_SEQPACKET}), + ApplyVec<SocketPairKind>(FilesystemBoundUnixDomainSocketPair, + std::vector<int>{SOCK_DGRAM, SOCK_SEQPACKET}), + ApplyVec<SocketPairKind>(AbstractBoundUnixDomainSocketPair, + std::vector<int>{SOCK_DGRAM, SOCK_SEQPACKET})); +} + +INSTANTIATE_TEST_SUITE_P( + BlockingNonStreamUnixSockets, BlockingNonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_pair.cc b/test/syscalls/linux/socket_unix_pair.cc new file mode 100644 index 000000000..85999db04 --- /dev/null +++ b/test/syscalls/linux/socket_unix_pair.cc @@ -0,0 +1,44 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix.h" +#include "test/syscalls/linux/socket_unix_cmsg.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}))); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, UnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, UnixSocketPairCmsgTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_pair_nonblock.cc b/test/syscalls/linux/socket_unix_pair_nonblock.cc new file mode 100644 index 000000000..281410a9a --- /dev/null +++ b/test/syscalls/linux/socket_unix_pair_nonblock.cc @@ -0,0 +1,39 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{SOCK_NONBLOCK})); +} + +INSTANTIATE_TEST_SUITE_P( + NonBlockingUnixSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_seqpacket.cc b/test/syscalls/linux/socket_unix_seqpacket.cc new file mode 100644 index 000000000..6d03df4d9 --- /dev/null +++ b/test/syscalls/linux/socket_unix_seqpacket.cc @@ -0,0 +1,67 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_unix_seqpacket.h" + +#include <stdio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(SeqpacketUnixSocketPairTest, WriteOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->second_fd(), kStr, 3), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(SeqpacketUnixSocketPairTest, ReadOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + char data[10] = {}; + ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(SeqpacketUnixSocketPairTest, Sendto) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + constexpr char kPath[] = "\0nonexistent"; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + constexpr char kStr[] = "abc"; + ASSERT_THAT(sendto(sockets->second_fd(), kStr, 3, 0, (struct sockaddr*)&addr, + sizeof(addr)), + SyscallSucceedsWithValue(3)); + + char data[10] = {}; + ASSERT_THAT(read(sockets->first_fd(), data, sizeof(data)), + SyscallSucceedsWithValue(3)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_seqpacket.h b/test/syscalls/linux/socket_unix_seqpacket.h new file mode 100644 index 000000000..30d9b9edf --- /dev/null +++ b/test/syscalls/linux/socket_unix_seqpacket.h @@ -0,0 +1,30 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected seqpacket unix +// sockets. +using SeqpacketUnixSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_ diff --git a/test/syscalls/linux/socket_unix_seqpacket_local.cc b/test/syscalls/linux/socket_unix_seqpacket_local.cc new file mode 100644 index 000000000..69a5f150d --- /dev/null +++ b/test/syscalls/linux/socket_unix_seqpacket_local.cc @@ -0,0 +1,58 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_stream.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix_non_stream.h" +#include "test/syscalls/linux/socket_unix_seqpacket.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})))); +} + +INSTANTIATE_TEST_SUITE_P( + SeqpacketUnixSockets, NonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + SeqpacketUnixSockets, SeqpacketUnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_SUITE_P( + SeqpacketUnixSockets, UnixNonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream.cc b/test/syscalls/linux/socket_unix_stream.cc new file mode 100644 index 000000000..99e77b89e --- /dev/null +++ b/test/syscalls/linux/socket_unix_stream.cc @@ -0,0 +1,125 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <poll.h> +#include <stdio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of connected stream unix sockets. +using StreamUnixSocketPairTest = SocketPairTest; + +TEST_P(StreamUnixSocketPairTest, WriteOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->second_fd(), kStr, 3), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(StreamUnixSocketPairTest, ReadOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + char data[10] = {}; + ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(StreamUnixSocketPairTest, RecvmsgOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Set timeout so that it will not wait for ever. + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, + sizeof(tv)), + SyscallSucceeds()); + + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + + char received_data[10] = {}; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(recvmsg(sockets->second_fd(), &msg, MSG_WAITALL), + SyscallSucceedsWithValue(0)); +} + +TEST_P(StreamUnixSocketPairTest, ReadOneSideClosedWithUnreadData) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RDWR), SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); + + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(ECONNRESET)); +} + +TEST_P(StreamUnixSocketPairTest, Sendto) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + constexpr char kPath[] = "\0nonexistent"; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + constexpr char kStr[] = "abc"; + ASSERT_THAT(sendto(sockets->second_fd(), kStr, 3, 0, (struct sockaddr*)&addr, + sizeof(addr)), + SyscallFailsWithErrno(EISCONN)); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, StreamUnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>(UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{ + 0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>(FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{ + 0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK})))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_stream_blocking_local.cc new file mode 100644 index 000000000..8429bd429 --- /dev/null +++ b/test/syscalls/linux/socket_unix_stream_blocking_local.cc @@ -0,0 +1,40 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_stream_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return { + UnixDomainSocketPair(SOCK_STREAM), + FilesystemBoundUnixDomainSocketPair(SOCK_STREAM), + AbstractBoundUnixDomainSocketPair(SOCK_STREAM), + }; +} + +INSTANTIATE_TEST_SUITE_P( + BlockingStreamUnixSockets, BlockingStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream_local.cc b/test/syscalls/linux/socket_unix_stream_local.cc new file mode 100644 index 000000000..a7e3449a9 --- /dev/null +++ b/test/syscalls/linux/socket_unix_stream_local.cc @@ -0,0 +1,48 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_stream.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}))); +} + +INSTANTIATE_TEST_SUITE_P( + StreamUnixSockets, StreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc new file mode 100644 index 000000000..4b763c8e2 --- /dev/null +++ b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc @@ -0,0 +1,39 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include <vector> + +#include "test/syscalls/linux/socket_stream_nonblock.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +std::vector<SocketPairKind> GetSocketPairs() { + return { + UnixDomainSocketPair(SOCK_STREAM | SOCK_NONBLOCK), + FilesystemBoundUnixDomainSocketPair(SOCK_STREAM | SOCK_NONBLOCK), + AbstractBoundUnixDomainSocketPair(SOCK_STREAM | SOCK_NONBLOCK), + }; +} + +INSTANTIATE_TEST_SUITE_P( + NonBlockingStreamUnixSockets, NonBlockingStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_abstract.cc b/test/syscalls/linux/socket_unix_unbound_abstract.cc new file mode 100644 index 000000000..8b1762000 --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_abstract.cc @@ -0,0 +1,116 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of unbound abstract unix sockets. +using UnboundAbstractUnixSocketPairTest = SocketPairTest; + +TEST_P(UnboundAbstractUnixSocketPairTest, AddressAfterNull) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = + *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr()); + ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0); + SKIP_IF(addr.sun_path[sizeof(addr.sun_path) - 2] != 0 || + addr.sun_path[sizeof(addr.sun_path) - 3] != 0); + + addr.sun_path[sizeof(addr.sun_path) - 2] = 'a'; + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), + reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallSucceeds()); +} + +TEST_P(UnboundAbstractUnixSocketPairTest, ShortAddressNotExtended) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = + *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr()); + ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size() - 1), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(UnboundAbstractUnixSocketPairTest, BindNothing) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + struct sockaddr_un addr = {.sun_family = AF_UNIX}; + ASSERT_THAT(bind(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallSucceeds()); +} + +TEST_P(UnboundAbstractUnixSocketPairTest, GetSockNameFullLength) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + sockaddr_storage addr = {}; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT(getsockname(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, sockets->first_addr_size()); +} + +TEST_P(UnboundAbstractUnixSocketPairTest, GetSockNamePartialLength) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size() - 1), + SyscallSucceeds()); + + sockaddr_storage addr = {}; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT(getsockname(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, sockets->first_addr_size() - 1); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, UnboundAbstractUnixSocketPairTest, + ::testing::ValuesIn(ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET, + SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK})))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_dgram.cc b/test/syscalls/linux/socket_unix_unbound_dgram.cc new file mode 100644 index 000000000..907dca0f1 --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_dgram.cc @@ -0,0 +1,183 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of unbound dgram unix sockets. +using UnboundDgramUnixSocketPairTest = SocketPairTest; + +TEST_P(UnboundDgramUnixSocketPairTest, BindConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(UnboundDgramUnixSocketPairTest, SelfConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(UnboundDgramUnixSocketPairTest, DoubleConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(UnboundDgramUnixSocketPairTest, GetRemoteAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + socklen_t addressLength = sockets->first_addr_size(); + struct sockaddr_storage address = {}; + ASSERT_THAT(getpeername(sockets->second_fd(), (struct sockaddr*)(&address), + &addressLength), + SyscallSucceeds()); + EXPECT_EQ( + 0, memcmp(&address, sockets->first_addr(), sockets->first_addr_size())); +} + +TEST_P(UnboundDgramUnixSocketPairTest, Sendto) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(sendto(sockets->second_fd(), sent_data, sizeof(sent_data), 0, + sockets->first_addr(), sockets->first_addr_size()), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data)]; + ASSERT_THAT(ReadFd(sockets->first_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); +} + +TEST_P(UnboundDgramUnixSocketPairTest, ZeroWriteAllowed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char sent_data[3]; + // Send a zero length packet. + ASSERT_THAT(write(sockets->second_fd(), sent_data, 0), + SyscallSucceedsWithValue(0)); + // Receive the packet. + char received_data[sizeof(sent_data)]; + ASSERT_THAT(read(sockets->first_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UnboundDgramUnixSocketPairTest, Listen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(listen(sockets->first_fd(), 0), SyscallFailsWithErrno(ENOTSUP)); +} + +TEST_P(UnboundDgramUnixSocketPairTest, Accept) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr), + SyscallFailsWithErrno(ENOTSUP)); +} + +TEST_P(UnboundDgramUnixSocketPairTest, SendtoWithoutConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char data = 'a'; + ASSERT_THAT( + RetryEINTR(sendto)(sockets->second_fd(), &data, sizeof(data), 0, + sockets->first_addr(), sockets->first_addr_size()), + SyscallSucceedsWithValue(sizeof(data))); +} + +TEST_P(UnboundDgramUnixSocketPairTest, SendtoWithoutConnectPassCreds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + SetSoPassCred(sockets->first_fd()); + char data = 'a'; + ASSERT_THAT( + RetryEINTR(sendto)(sockets->second_fd(), &data, sizeof(data), 0, + sockets->first_addr(), sockets->first_addr_size()), + SyscallSucceedsWithValue(sizeof(data))); + ucred creds; + creds.pid = -1; + char buf[sizeof(data) + 1]; + ASSERT_NO_FATAL_FAILURE( + RecvCreds(sockets->first_fd(), &creds, buf, sizeof(buf), sizeof(data))); + EXPECT_EQ(0, memcmp(&data, buf, sizeof(data))); + EXPECT_THAT(getpid(), SyscallSucceedsWithValue(creds.pid)); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, UnboundDgramUnixSocketPairTest, + ::testing::ValuesIn(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>(FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{ + 0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK}))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_filesystem.cc b/test/syscalls/linux/socket_unix_unbound_filesystem.cc new file mode 100644 index 000000000..cab912152 --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_filesystem.cc @@ -0,0 +1,84 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of unbound filesystem unix +// sockets. +using UnboundFilesystemUnixSocketPairTest = SocketPairTest; + +TEST_P(UnboundFilesystemUnixSocketPairTest, AddressAfterNull) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = + *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr()); + ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0); + SKIP_IF(addr.sun_path[sizeof(addr.sun_path) - 2] != 0 || + addr.sun_path[sizeof(addr.sun_path) - 3] != 0); + + addr.sun_path[sizeof(addr.sun_path) - 2] = 'a'; + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), + reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(UnboundFilesystemUnixSocketPairTest, GetSockNameLength) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + sockaddr_storage got_addr = {}; + socklen_t got_addr_len = sizeof(got_addr); + ASSERT_THAT( + getsockname(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&got_addr), &got_addr_len), + SyscallSucceeds()); + + sockaddr_un want_addr = + *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr()); + + EXPECT_EQ(got_addr_len, + strlen(want_addr.sun_path) + 1 + sizeof(want_addr.sun_family)); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, UnboundFilesystemUnixSocketPairTest, + ::testing::ValuesIn(ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET, + SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK})))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc new file mode 100644 index 000000000..cb99030f5 --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc @@ -0,0 +1,89 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of unbound seqpacket unix sockets. +using UnboundUnixSeqpacketSocketPairTest = SocketPairTest; + +TEST_P(UnboundUnixSeqpacketSocketPairTest, SendtoWithoutConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char data = 'a'; + ASSERT_THAT(sendto(sockets->second_fd(), &data, sizeof(data), 0, + sockets->first_addr(), sockets->first_addr_size()), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UnboundUnixSeqpacketSocketPairTest, SendtoWithoutConnectIgnoresAddr) { + // FIXME(b/68223466): gVisor tries to find /foo/bar and thus returns ENOENT. + if (IsRunningOnGvisor()) { + return; + } + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // Even a bogus address is completely ignored. + constexpr char kPath[] = "/foo/bar"; + + // Sanity check that kPath doesn't exist. + struct stat s; + ASSERT_THAT(stat(kPath, &s), SyscallFailsWithErrno(ENOENT)); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + char data = 'a'; + ASSERT_THAT( + sendto(sockets->second_fd(), &data, sizeof(data), 0, + reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(ENOTCONN)); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, UnboundUnixSeqpacketSocketPairTest, + ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK})))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_stream.cc b/test/syscalls/linux/socket_unix_unbound_stream.cc new file mode 100644 index 000000000..f185dded3 --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_stream.cc @@ -0,0 +1,733 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of connected unix stream sockets. +using UnixStreamSocketPairTest = SocketPairTest; + +// FDPassPartialRead checks that sent control messages cannot be read after +// any of their associated data has been read while ignoring the control message +// by using read(2) instead of recvmsg(2). +TEST_P(UnixStreamSocketPairTest, FDPassPartialRead) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[sizeof(sent_data) / 2]; + ASSERT_THAT( + ReadFd(sockets->second_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); + + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)); + EXPECT_EQ(0, memcmp(sent_data + sizeof(received_data), received_data, + sizeof(received_data))); +} + +TEST_P(UnixStreamSocketPairTest, FDPassCoalescedRead) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(), + sent_data1, sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(), + sent_data2, sizeof(sent_data2))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + ASSERT_THAT( + ReadFd(sockets->second_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); +} + +// ZeroLengthMessageFDDiscarded checks that control messages associated with +// zero length messages are discarded. +TEST_P(UnixStreamSocketPairTest, ZeroLengthMessageFDDiscarded) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Zero length arrays are invalid in ISO C++, so allocate one of size 1 and + // send a length of 0. + char sent_data1[1] = {}; + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE( + SendSingleFD(sockets->first_fd(), pair->second_fd(), sent_data1, 0)); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data[sizeof(sent_data2)] = {}; + + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)); + EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(received_data))); +} + +// FDPassCoalescedRecv checks that control messages not in the first message are +// preserved in a coalesced recv. +TEST_P(UnixStreamSocketPairTest, FDPassCoalescedRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data) / 2), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data + sizeof(sent_data) / 2, + sizeof(sent_data) / 2)); + + char received_data[sizeof(sent_data)]; + + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +// ReadsNotCoalescedAfterFDPass checks that messages after a message containing +// an FD control message are not coalesced. +TEST_P(UnixStreamSocketPairTest, ReadsNotCoalescedAfterFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data) / 2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data + sizeof(sent_data) / 2, + sizeof(sent_data) / 2), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + char received_data[sizeof(sent_data)]; + + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data), + sizeof(sent_data) / 2)); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(sent_data) / 2)); + + EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data, + sizeof(sent_data) / 2)); +} + +// FDPassNotCombined checks that FD control messages are not combined in a +// coalesced read. +TEST_P(UnixStreamSocketPairTest, FDPassNotCombined) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(), + sent_data, sizeof(sent_data) / 2)); + + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(), + sent_data + sizeof(sent_data) / 2, + sizeof(sent_data) / 2)); + + char received_data[sizeof(sent_data)]; + + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data), + sizeof(sent_data) / 2)); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair1->first_fd())); + + EXPECT_THAT(close(fd), SyscallSucceeds()); + fd = -1; + + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data), + sizeof(sent_data) / 2)); + + EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data, + sizeof(sent_data) / 2)); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair2->first_fd())); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_P(UnixStreamSocketPairTest, CredPassPartialRead) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + int one = 1; + ASSERT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &one, + sizeof(one)), + SyscallSucceeds()); + + for (int i = 0; i < 2; i++) { + char received_data[10]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data + i * sizeof(received_data), received_data, + sizeof(received_data))); + EXPECT_EQ(sent_creds.pid, received_creds.pid); + EXPECT_EQ(sent_creds.uid, received_creds.uid); + EXPECT_EQ(sent_creds.gid, received_creds.gid); + } +} + +// Unix stream sockets peek in the same way as datagram sockets. +// +// SinglePeek checks that only a single message is peekable in a single recv. +TEST_P(UnixStreamSocketPairTest, SinglePeek) { + if (!IsRunningOnGvisor()) { + // Don't run this test on linux kernels newer than 4.3.x Linux kernel commit + // 9f389e35674f5b086edd70ed524ca0f287259725 which changes this behavior. We + // used to target 3.11 compatibility, so disable this test on newer kernels. + // + // NOTE(b/118902768): Bring this up to Linux 4.4 compatibility. + auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); + SKIP_IF(version.major > 4 || (version.major == 4 && version.minor >= 3)); + } + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[40]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), sent_data, + sizeof(sent_data) / 2, 0), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data + sizeof(sent_data) / 2, + sizeof(sent_data) / 2, 0), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + char received_data[sizeof(sent_data)]; + for (int i = 0; i < 3; i++) { + memset(received_data, 0, sizeof(received_data)); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_PEEK), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + } + memset(received_data, 0, sizeof(received_data)); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(sent_data) / 2, 0), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + memset(received_data, 0, sizeof(received_data)); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(sent_data) / 2, 0), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data, + sizeof(sent_data) / 2)); +} + +TEST_P(UnixStreamSocketPairTest, CredsNotCoalescedUp) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + SetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(sent_data1))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(sent_data2))); + + EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2))); + + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixStreamSocketPairTest, CredsNotCoalescedDown) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + UnsetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(sent_data1))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(sent_data2))); + + EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2))); + + want_creds = {0, 65534, 65534}; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixStreamSocketPairTest, CoalescedCredsNoPasscred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + UnsetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); +} + +TEST_P(UnixStreamSocketPairTest, CoalescedCreds1) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixStreamSocketPairTest, CoalescedCreds2) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixStreamSocketPairTest, NonCoalescedDifferingCreds1) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + SetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data1[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds1; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds1, + received_data1, sizeof(sent_data1))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1))); + + struct ucred want_creds1 { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds1.pid, received_creds1.pid); + EXPECT_EQ(want_creds1.uid, received_creds1.uid); + EXPECT_EQ(want_creds1.gid, received_creds1.gid); + + char received_data2[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds2; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds2, + received_data2, sizeof(sent_data2))); + + EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2))); + + struct ucred want_creds2; + ASSERT_THAT(want_creds2.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds2.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds2.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds2.pid, received_creds2.pid); + EXPECT_EQ(want_creds2.uid, received_creds2.uid); + EXPECT_EQ(want_creds2.gid, received_creds2.gid); +} + +TEST_P(UnixStreamSocketPairTest, NonCoalescedDifferingCreds2) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + UnsetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + SetSoPassCred(sockets->second_fd()); + + char received_data1[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds1; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds1, + received_data1, sizeof(sent_data1))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1))); + + struct ucred want_creds1; + ASSERT_THAT(want_creds1.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds1.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds1.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds1.pid, received_creds1.pid); + EXPECT_EQ(want_creds1.uid, received_creds1.uid); + EXPECT_EQ(want_creds1.gid, received_creds1.gid); + + char received_data2[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds2; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds2, + received_data2, sizeof(sent_data2))); + + EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2))); + + struct ucred want_creds2 { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds2.pid, received_creds2.pid); + EXPECT_EQ(want_creds2.uid, received_creds2.uid); + EXPECT_EQ(want_creds2.gid, received_creds2.gid); +} + +TEST_P(UnixStreamSocketPairTest, CoalescedDifferingCreds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + UnsetSoPassCred(sockets->second_fd()); + + char sent_data3[20]; + RandomizeBuffer(sent_data3, sizeof(sent_data3)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data3, sizeof(sent_data3)), + SyscallSucceedsWithValue(sizeof(sent_data3))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2) + + sizeof(sent_data3)]; + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); + EXPECT_EQ(0, memcmp(sent_data3, + received_data + sizeof(sent_data1) + sizeof(sent_data2), + sizeof(sent_data3))); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, UnixStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>(UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{ + 0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>(FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{ + 0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK})))))); + +// Test fixture for tests that apply to pairs of unbound unix stream sockets. +using UnboundUnixStreamSocketPairTest = SocketPairTest; + +TEST_P(UnboundUnixStreamSocketPairTest, SendtoWithoutConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char data = 'a'; + ASSERT_THAT(sendto(sockets->second_fd(), &data, sizeof(data), 0, + sockets->first_addr(), sockets->first_addr_size()), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +TEST_P(UnboundUnixStreamSocketPairTest, SendtoWithoutConnectIgnoresAddr) { + // FIXME(b/68223466): gVisor tries to find /foo/bar and thus returns ENOENT. + if (IsRunningOnGvisor()) { + return; + } + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // Even a bogus address is completely ignored. + constexpr char kPath[] = "/foo/bar"; + + // Sanity check that kPath doesn't exist. + struct stat s; + ASSERT_THAT(stat(kPath, &s), SyscallFailsWithErrno(ENOENT)); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + char data = 'a'; + ASSERT_THAT( + sendto(sockets->second_fd(), &data, sizeof(data), 0, + reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +INSTANTIATE_TEST_SUITE_P( + AllUnixDomainSockets, UnboundUnixStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>(FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{ + 0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK})))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc new file mode 100644 index 000000000..08fc4b1b7 --- /dev/null +++ b/test/syscalls/linux/splice.cc @@ -0,0 +1,699 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <linux/unistd.h> +#include <sys/eventfd.h> +#include <sys/resource.h> +#include <sys/sendfile.h> +#include <sys/time.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SpliceTest, TwoRegularFiles) { + // Create temp files. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor in_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + const FileDescriptor out_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Verify that it is rejected as expected; regardless of offsets. + loff_t in_offset = 0; + loff_t out_offset = 0; + EXPECT_THAT(splice(in_fd.get(), &in_offset, out_fd.get(), &out_offset, 1, 0), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(splice(in_fd.get(), nullptr, out_fd.get(), &out_offset, 1, 0), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(splice(in_fd.get(), &in_offset, out_fd.get(), nullptr, 1, 0), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(splice(in_fd.get(), nullptr, out_fd.get(), nullptr, 1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +int memfd_create(const std::string& name, unsigned int flags) { + return syscall(__NR_memfd_create, name.c_str(), flags); +} + +TEST(SpliceTest, NegativeOffset) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill the pipe. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Open the output file as write only. + int fd; + EXPECT_THAT(fd = memfd_create("negative", 0), SyscallSucceeds()); + const FileDescriptor out_fd(fd); + + loff_t out_offset = 0xffffffffffffffffull; + constexpr int kSize = 2; + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Write offset + size overflows int64. +// +// This is a regression test for b/148041624. +TEST(SpliceTest, WriteOverflow) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill the pipe. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Open the output file. + int fd; + EXPECT_THAT(fd = memfd_create("overflow", 0), SyscallSucceeds()); + const FileDescriptor out_fd(fd); + + // out_offset + kSize overflows INT64_MAX. + loff_t out_offset = 0x7ffffffffffffffeull; + constexpr int kSize = 3; + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SpliceTest, SamePipe) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill the pipe. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Attempt to splice to itself. + EXPECT_THAT(splice(rfd.get(), nullptr, wfd.get(), nullptr, kPageSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(TeeTest, SamePipe) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill the pipe. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Attempt to tee to itself. + EXPECT_THAT(tee(rfd.get(), wfd.get(), kPageSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(TeeTest, RegularFile) { + // Open some file. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor in_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Attempt to tee from the file. + EXPECT_THAT(tee(in_fd.get(), wfd.get(), kPageSize, 0), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(tee(rfd.get(), in_fd.get(), kPageSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SpliceTest, PipeOffsets) { + // Create two new pipes. + int first[2], second[2]; + ASSERT_THAT(pipe(first), SyscallSucceeds()); + const FileDescriptor rfd1(first[0]); + const FileDescriptor wfd1(first[1]); + ASSERT_THAT(pipe(second), SyscallSucceeds()); + const FileDescriptor rfd2(second[0]); + const FileDescriptor wfd2(second[1]); + + // All pipe offsets should be rejected. + loff_t in_offset = 0; + loff_t out_offset = 0; + EXPECT_THAT(splice(rfd1.get(), &in_offset, wfd2.get(), &out_offset, 1, 0), + SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(splice(rfd1.get(), nullptr, wfd2.get(), &out_offset, 1, 0), + SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(splice(rfd1.get(), &in_offset, wfd2.get(), nullptr, 1, 0), + SyscallFailsWithErrno(ESPIPE)); +} + +// Event FDs may be used with splice without an offset. +TEST(SpliceTest, FromEventFD) { + // Open the input eventfd with an initial value so that it is readable. + constexpr uint64_t kEventFDValue = 1; + int efd; + ASSERT_THAT(efd = eventfd(kEventFDValue, 0), SyscallSucceeds()); + const FileDescriptor in_fd(efd); + + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Splice 8-byte eventfd value to pipe. + constexpr int kEventFDSize = 8; + EXPECT_THAT(splice(in_fd.get(), nullptr, wfd.get(), nullptr, kEventFDSize, 0), + SyscallSucceedsWithValue(kEventFDSize)); + + // Contents should be equal. + std::vector<char> rbuf(kEventFDSize); + ASSERT_THAT(read(rfd.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kEventFDSize)); + EXPECT_EQ(memcmp(rbuf.data(), &kEventFDValue, rbuf.size()), 0); +} + +// Event FDs may not be used with splice with an offset. +TEST(SpliceTest, FromEventFDOffset) { + int efd; + ASSERT_THAT(efd = eventfd(0, 0), SyscallSucceeds()); + const FileDescriptor in_fd(efd); + + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Attempt to splice 8-byte eventfd value to pipe with offset. + // + // This is not allowed because eventfd doesn't support pread. + constexpr int kEventFDSize = 8; + loff_t in_off = 0; + EXPECT_THAT(splice(in_fd.get(), &in_off, wfd.get(), nullptr, kEventFDSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Event FDs may not be used with splice with an offset. +TEST(SpliceTest, ToEventFDOffset) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill with a value. + constexpr int kEventFDSize = 8; + std::vector<char> buf(kEventFDSize); + buf[0] = 1; + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kEventFDSize)); + + int efd; + ASSERT_THAT(efd = eventfd(0, 0), SyscallSucceeds()); + const FileDescriptor out_fd(efd); + + // Attempt to splice 8-byte eventfd value to pipe with offset. + // + // This is not allowed because eventfd doesn't support pwrite. + loff_t out_off = 0; + EXPECT_THAT( + splice(rfd.get(), nullptr, out_fd.get(), &out_off, kEventFDSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SpliceTest, ToPipe) { + // Open the input file. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor in_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + + // Fill with some random data. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(in_fd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + ASSERT_THAT(lseek(in_fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Splice to the pipe. + EXPECT_THAT(splice(in_fd.get(), nullptr, wfd.get(), nullptr, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + + // Contents should be equal. + std::vector<char> rbuf(kPageSize); + ASSERT_THAT(read(rfd.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0); +} + +TEST(SpliceTest, ToPipeOffset) { + // Open the input file. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor in_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + + // Fill with some random data. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(in_fd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Splice to the pipe. + loff_t in_offset = kPageSize / 2; + EXPECT_THAT( + splice(in_fd.get(), &in_offset, wfd.get(), nullptr, kPageSize / 2, 0), + SyscallSucceedsWithValue(kPageSize / 2)); + + // Contents should be equal to only the second part. + std::vector<char> rbuf(kPageSize / 2); + ASSERT_THAT(read(rfd.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize / 2)); + EXPECT_EQ(memcmp(rbuf.data(), buf.data() + (kPageSize / 2), rbuf.size()), 0); +} + +TEST(SpliceTest, FromPipe) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill with some random data. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Open the input file. + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor out_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR)); + + // Splice to the output file. + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + + // The offset of the output should be equal to kPageSize. We assert that and + // reset to zero so that we can read the contents and ensure they match. + EXPECT_THAT(lseek(out_fd.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(kPageSize)); + ASSERT_THAT(lseek(out_fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Contents should be equal. + std::vector<char> rbuf(kPageSize); + ASSERT_THAT(read(out_fd.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0); +} + +TEST(SpliceTest, FromPipeOffset) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill with some random data. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Open the input file. + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor out_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR)); + + // Splice to the output file. + loff_t out_offset = kPageSize / 2; + EXPECT_THAT( + splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + + // Content should reflect the splice. We write to a specific offset in the + // file, so the internals should now be allocated sparsely. + std::vector<char> rbuf(kPageSize); + ASSERT_THAT(read(out_fd.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize)); + std::vector<char> zbuf(kPageSize / 2); + memset(zbuf.data(), 0, zbuf.size()); + EXPECT_EQ(memcmp(rbuf.data(), zbuf.data(), zbuf.size()), 0); + EXPECT_EQ(memcmp(rbuf.data() + kPageSize / 2, buf.data(), kPageSize / 2), 0); +} + +TEST(SpliceTest, TwoPipes) { + // Create two new pipes. + int first[2], second[2]; + ASSERT_THAT(pipe(first), SyscallSucceeds()); + const FileDescriptor rfd1(first[0]); + const FileDescriptor wfd1(first[1]); + ASSERT_THAT(pipe(second), SyscallSucceeds()); + const FileDescriptor rfd2(second[0]); + const FileDescriptor wfd2(second[1]); + + // Fill with some random data. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd1.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Splice to the second pipe, using two operations. + EXPECT_THAT( + splice(rfd1.get(), nullptr, wfd2.get(), nullptr, kPageSize / 2, 0), + SyscallSucceedsWithValue(kPageSize / 2)); + EXPECT_THAT( + splice(rfd1.get(), nullptr, wfd2.get(), nullptr, kPageSize / 2, 0), + SyscallSucceedsWithValue(kPageSize / 2)); + + // Content should reflect the splice. + std::vector<char> rbuf(kPageSize); + ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(memcmp(rbuf.data(), buf.data(), kPageSize), 0); +} + +TEST(SpliceTest, TwoPipesCircular) { + // This test deadlocks the sentry on VFS1 because VFS1 splice ordering is + // based on fs.File.UniqueID, which does not prevent circular ordering between + // e.g. inode-level locks taken by fs.FileOperations. + SKIP_IF(IsRunningWithVFS1()); + + // Create two pipes. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor first_rfd(fds[0]); + const FileDescriptor first_wfd(fds[1]); + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor second_rfd(fds[0]); + const FileDescriptor second_wfd(fds[1]); + + // On Linux, each pipe is normally limited to + // include/linux/pipe_fs_i.h:PIPE_DEF_BUFFERS buffers worth of data. + constexpr size_t PIPE_DEF_BUFFERS = 16; + + // Write some data to each pipe. Below we splice 1 byte at a time between + // pipes, which very quickly causes each byte to be stored in a separate + // buffer, so we must ensure that the total amount of data in the system is <= + // PIPE_DEF_BUFFERS bytes. + std::vector<char> buf(PIPE_DEF_BUFFERS / 2); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(first_wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT(write(second_wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Have another thread splice from the second pipe to the first, while we + // splice from the first to the second. The test passes if this does not + // deadlock. + const int kIterations = 1000; + DisableSave ds; + ScopedThread t([&]() { + for (int i = 0; i < kIterations; i++) { + ASSERT_THAT( + splice(second_rfd.get(), nullptr, first_wfd.get(), nullptr, 1, 0), + SyscallSucceedsWithValue(1)); + } + }); + for (int i = 0; i < kIterations; i++) { + ASSERT_THAT( + splice(first_rfd.get(), nullptr, second_wfd.get(), nullptr, 1, 0), + SyscallSucceedsWithValue(1)); + } +} + +TEST(SpliceTest, Blocking) { + // Create two new pipes. + int first[2], second[2]; + ASSERT_THAT(pipe(first), SyscallSucceeds()); + const FileDescriptor rfd1(first[0]); + const FileDescriptor wfd1(first[1]); + ASSERT_THAT(pipe(second), SyscallSucceeds()); + const FileDescriptor rfd2(second[0]); + const FileDescriptor wfd2(second[1]); + + // This thread writes to the main pipe. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ScopedThread t([&]() { + ASSERT_THAT(write(wfd1.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + }); + + // Attempt a splice immediately; it should block. + EXPECT_THAT(splice(rfd1.get(), nullptr, wfd2.get(), nullptr, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + + // Thread should be joinable. + t.Join(); + + // Content should reflect the splice. + std::vector<char> rbuf(kPageSize); + ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(memcmp(rbuf.data(), buf.data(), kPageSize), 0); +} + +TEST(TeeTest, Blocking) { + // Create two new pipes. + int first[2], second[2]; + ASSERT_THAT(pipe(first), SyscallSucceeds()); + const FileDescriptor rfd1(first[0]); + const FileDescriptor wfd1(first[1]); + ASSERT_THAT(pipe(second), SyscallSucceeds()); + const FileDescriptor rfd2(second[0]); + const FileDescriptor wfd2(second[1]); + + // This thread writes to the main pipe. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ScopedThread t([&]() { + ASSERT_THAT(write(wfd1.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + }); + + // Attempt a tee immediately; it should block. + EXPECT_THAT(tee(rfd1.get(), wfd2.get(), kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + + // Thread should be joinable. + t.Join(); + + // Content should reflect the splice, in both pipes. + std::vector<char> rbuf(kPageSize); + ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(memcmp(rbuf.data(), buf.data(), kPageSize), 0); + ASSERT_THAT(read(rfd1.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(memcmp(rbuf.data(), buf.data(), kPageSize), 0); +} + +TEST(TeeTest, BlockingWrite) { + // Create two new pipes. + int first[2], second[2]; + ASSERT_THAT(pipe(first), SyscallSucceeds()); + const FileDescriptor rfd1(first[0]); + const FileDescriptor wfd1(first[1]); + ASSERT_THAT(pipe(second), SyscallSucceeds()); + const FileDescriptor rfd2(second[0]); + const FileDescriptor wfd2(second[1]); + + // Make some data available to be read. + std::vector<char> buf1(kPageSize); + RandomizeBuffer(buf1.data(), buf1.size()); + ASSERT_THAT(write(wfd1.get(), buf1.data(), buf1.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Fill up the write pipe's buffer. + int pipe_size = -1; + ASSERT_THAT(pipe_size = fcntl(wfd2.get(), F_GETPIPE_SZ), SyscallSucceeds()); + std::vector<char> buf2(pipe_size); + ASSERT_THAT(write(wfd2.get(), buf2.data(), buf2.size()), + SyscallSucceedsWithValue(pipe_size)); + + ScopedThread t([&]() { + absl::SleepFor(absl::Milliseconds(100)); + ASSERT_THAT(read(rfd2.get(), buf2.data(), buf2.size()), + SyscallSucceedsWithValue(pipe_size)); + }); + + // Attempt a tee immediately; it should block. + EXPECT_THAT(tee(rfd1.get(), wfd2.get(), kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + + // Thread should be joinable. + t.Join(); + + // Content should reflect the tee. + std::vector<char> rbuf(kPageSize); + ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(memcmp(rbuf.data(), buf1.data(), kPageSize), 0); +} + +TEST(SpliceTest, NonBlocking) { + // Create two new pipes. + int first[2], second[2]; + ASSERT_THAT(pipe(first), SyscallSucceeds()); + const FileDescriptor rfd1(first[0]); + const FileDescriptor wfd1(first[1]); + ASSERT_THAT(pipe(second), SyscallSucceeds()); + const FileDescriptor rfd2(second[0]); + const FileDescriptor wfd2(second[1]); + + // Splice with no data to back it. + EXPECT_THAT(splice(rfd1.get(), nullptr, wfd2.get(), nullptr, kPageSize, + SPLICE_F_NONBLOCK), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(TeeTest, NonBlocking) { + // Create two new pipes. + int first[2], second[2]; + ASSERT_THAT(pipe(first), SyscallSucceeds()); + const FileDescriptor rfd1(first[0]); + const FileDescriptor wfd1(first[1]); + ASSERT_THAT(pipe(second), SyscallSucceeds()); + const FileDescriptor rfd2(second[0]); + const FileDescriptor wfd2(second[1]); + + // Splice with no data to back it. + EXPECT_THAT(tee(rfd1.get(), wfd2.get(), kPageSize, SPLICE_F_NONBLOCK), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(TeeTest, MultiPage) { + // Create two new pipes. + int first[2], second[2]; + ASSERT_THAT(pipe(first), SyscallSucceeds()); + const FileDescriptor rfd1(first[0]); + const FileDescriptor wfd1(first[1]); + ASSERT_THAT(pipe(second), SyscallSucceeds()); + const FileDescriptor rfd2(second[0]); + const FileDescriptor wfd2(second[1]); + + // Make some data available to be read. + std::vector<char> wbuf(8 * kPageSize); + RandomizeBuffer(wbuf.data(), wbuf.size()); + ASSERT_THAT(write(wfd1.get(), wbuf.data(), wbuf.size()), + SyscallSucceedsWithValue(wbuf.size())); + + // Attempt a tee immediately; it should complete. + EXPECT_THAT(tee(rfd1.get(), wfd2.get(), wbuf.size(), 0), + SyscallSucceedsWithValue(wbuf.size())); + + // Content should reflect the tee. + std::vector<char> rbuf(wbuf.size()); + ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(rbuf.size())); + EXPECT_EQ(memcmp(rbuf.data(), wbuf.data(), rbuf.size()), 0); + ASSERT_THAT(read(rfd1.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(rbuf.size())); + EXPECT_EQ(memcmp(rbuf.data(), wbuf.data(), rbuf.size()), 0); +} + +TEST(SpliceTest, FromPipeMaxFileSize) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill with some random data. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Open the input file. + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor out_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR)); + + EXPECT_THAT(ftruncate(out_fd.get(), 13 << 20), SyscallSucceeds()); + EXPECT_THAT(lseek(out_fd.get(), 0, SEEK_END), + SyscallSucceedsWithValue(13 << 20)); + + // Set our file size limit. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGXFSZ); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + rlimit rlim = {}; + rlim.rlim_cur = rlim.rlim_max = (13 << 20); + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &rlim), SyscallSucceeds()); + + // Splice to the output file. + EXPECT_THAT( + splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3 * kPageSize, 0), + SyscallFailsWithErrno(EFBIG)); + + // Contents should be equal. + std::vector<char> rbuf(kPageSize); + ASSERT_THAT(read(rfd.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc new file mode 100644 index 000000000..2503960f3 --- /dev/null +++ b/test/syscalls/linux/stat.cc @@ -0,0 +1,720 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <sys/types.h> +#include <unistd.h> + +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +#ifndef AT_STATX_FORCE_SYNC +#define AT_STATX_FORCE_SYNC 0x2000 +#endif +#ifndef AT_STATX_DONT_SYNC +#define AT_STATX_DONT_SYNC 0x4000 +#endif + +namespace gvisor { +namespace testing { + +namespace { + +class StatTest : public FileTest {}; + +TEST_F(StatTest, FstatatAbs) { + struct stat st; + + // Check that the stat works. + EXPECT_THAT(fstatat(AT_FDCWD, test_file_name_.c_str(), &st, 0), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st.st_mode)); +} + +TEST_F(StatTest, FstatatEmptyPath) { + struct stat st; + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + // Check that the stat works. + EXPECT_THAT(fstatat(fd.get(), "", &st, AT_EMPTY_PATH), SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st.st_mode)); +} + +TEST_F(StatTest, FstatatRel) { + struct stat st; + int dirfd; + auto filename = std::string(Basename(test_file_name_)); + + // Open the temporary directory read-only. + ASSERT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY), + SyscallSucceeds()); + + // Check that the stat works. + EXPECT_THAT(fstatat(dirfd, filename.c_str(), &st, 0), SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st.st_mode)); + close(dirfd); +} + +TEST_F(StatTest, FstatatSymlink) { + struct stat st; + + // Check that the link is followed. + EXPECT_THAT(fstatat(AT_FDCWD, "/proc/self", &st, 0), SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + EXPECT_FALSE(S_ISLNK(st.st_mode)); + + // Check that the flag works. + EXPECT_THAT(fstatat(AT_FDCWD, "/proc/self", &st, AT_SYMLINK_NOFOLLOW), + SyscallSucceeds()); + EXPECT_TRUE(S_ISLNK(st.st_mode)); + EXPECT_FALSE(S_ISDIR(st.st_mode)); +} + +TEST_F(StatTest, Nlinks) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Directory is initially empty, it should contain 2 links (one from itself, + // one from "."). + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2)); + + // Create a file in the test directory. Files shouldn't increase the link + // count on the base directory. + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path())); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2)); + + // Create subdirectories. This should increase the link count by 1 per + // subdirectory. + TempPath dir1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path())); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(3)); + TempPath dir2 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path())); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(4)); + + // Removing directories should reduce the link count. + dir1.reset(); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(3)); + dir2.reset(); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2)); + + // Removing files should have no effect on link count. + file1.reset(); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2)); +} + +TEST_F(StatTest, BlocksIncreaseOnWrite) { + struct stat st; + + // Stat the empty file. + ASSERT_THAT(fstat(test_file_fd_.get(), &st), SyscallSucceeds()); + + const int initial_blocks = st.st_blocks; + + // Write to the file, making sure to exceed the block size. + std::vector<char> buf(2 * st.st_blksize, 'a'); + ASSERT_THAT(write(test_file_fd_.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Stat the file again, and verify that number of allocated blocks has + // increased. + ASSERT_THAT(fstat(test_file_fd_.get(), &st), SyscallSucceeds()); + EXPECT_GT(st.st_blocks, initial_blocks); +} + +TEST_F(StatTest, PathNotCleaned) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Create a file in the basedir. + TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path())); + + // Stating the file directly should succeed. + struct stat buf; + EXPECT_THAT(lstat(file.path().c_str(), &buf), SyscallSucceeds()); + + // Try to stat the file using a directory that does not exist followed by + // "..". If the path is cleaned prior to stating (which it should not be) + // then this will succeed. + const std::string bad_path = JoinPath("/does_not_exist/..", file.path()); + EXPECT_THAT(lstat(bad_path.c_str(), &buf), SyscallFailsWithErrno(ENOENT)); +} + +TEST_F(StatTest, PathCanContainDotDot) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath subdir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path())); + const std::string subdir_name = std::string(Basename(subdir.path())); + + // Create a file in the subdir. + TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(subdir.path())); + const std::string file_name = std::string(Basename(file.path())); + + // Stat the file through a path that includes '..' and '.' but still resolves + // to the file. + const std::string good_path = + JoinPath(basedir.path(), subdir_name, "..", subdir_name, ".", file_name); + struct stat buf; + EXPECT_THAT(lstat(good_path.c_str(), &buf), SyscallSucceeds()); +} + +TEST_F(StatTest, PathCanContainEmptyComponent) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Create a file in the basedir. + TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path())); + const std::string file_name = std::string(Basename(file.path())); + + // Stat the file through a path that includes an empty component. We have to + // build this ourselves because JoinPath automatically removes empty + // components. + const std::string good_path = absl::StrCat(basedir.path(), "//", file_name); + struct stat buf; + EXPECT_THAT(lstat(good_path.c_str(), &buf), SyscallSucceeds()); +} + +TEST_F(StatTest, TrailingSlashNotCleanedReturnsENOTDIR) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Create a file in the basedir. + TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path())); + + // Stat the file with an extra "/" on the end of it. Since file is not a + // directory, this should return ENOTDIR. + const std::string bad_path = absl::StrCat(file.path(), "/"); + struct stat buf; + EXPECT_THAT(lstat(bad_path.c_str(), &buf), SyscallFailsWithErrno(ENOTDIR)); +} + +// Test fstatating a symlink directory. +TEST_F(StatTest, FstatatSymlinkDir) { + // Create a directory and symlink to it. + const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + const std::string symlink_to_dir = NewTempAbsPath(); + EXPECT_THAT(symlink(dir.path().c_str(), symlink_to_dir.c_str()), + SyscallSucceeds()); + auto cleanup = Cleanup([&symlink_to_dir]() { + EXPECT_THAT(unlink(symlink_to_dir.c_str()), SyscallSucceeds()); + }); + + // Fstatat the link with AT_SYMLINK_NOFOLLOW should return symlink data. + struct stat st = {}; + EXPECT_THAT( + fstatat(AT_FDCWD, symlink_to_dir.c_str(), &st, AT_SYMLINK_NOFOLLOW), + SyscallSucceeds()); + EXPECT_FALSE(S_ISDIR(st.st_mode)); + EXPECT_TRUE(S_ISLNK(st.st_mode)); + + // Fstatat the link should return dir data. + EXPECT_THAT(fstatat(AT_FDCWD, symlink_to_dir.c_str(), &st, 0), + SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + EXPECT_FALSE(S_ISLNK(st.st_mode)); +} + +// Test fstatating a symlink directory with trailing slash. +TEST_F(StatTest, FstatatSymlinkDirWithTrailingSlash) { + // Create a directory and symlink to it. + const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string symlink_to_dir = NewTempAbsPath(); + EXPECT_THAT(symlink(dir.path().c_str(), symlink_to_dir.c_str()), + SyscallSucceeds()); + auto cleanup = Cleanup([&symlink_to_dir]() { + EXPECT_THAT(unlink(symlink_to_dir.c_str()), SyscallSucceeds()); + }); + + // Fstatat on the symlink with a trailing slash should return the directory + // data. + struct stat st = {}; + EXPECT_THAT( + fstatat(AT_FDCWD, absl::StrCat(symlink_to_dir, "/").c_str(), &st, 0), + SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + EXPECT_FALSE(S_ISLNK(st.st_mode)); + + // Fstatat on the symlink with a trailing slash with AT_SYMLINK_NOFOLLOW + // should return the directory data. + // Symlink to directory with trailing slash will ignore AT_SYMLINK_NOFOLLOW. + EXPECT_THAT(fstatat(AT_FDCWD, absl::StrCat(symlink_to_dir, "/").c_str(), &st, + AT_SYMLINK_NOFOLLOW), + SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + EXPECT_FALSE(S_ISLNK(st.st_mode)); +} + +// Test fstatating a symlink directory with a trailing slash +// should return same stat data with fstatating directory. +TEST_F(StatTest, FstatatSymlinkDirWithTrailingSlashSameInode) { + // Create a directory and symlink to it. + const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // We are going to assert that the symlink inode id is the same as the linked + // dir's inode id. In order for the inode id to be stable across + // save/restore, it must be kept open. The FileDescriptor type will do that + // for us automatically. + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY)); + + const std::string symlink_to_dir = NewTempAbsPath(); + EXPECT_THAT(symlink(dir.path().c_str(), symlink_to_dir.c_str()), + SyscallSucceeds()); + auto cleanup = Cleanup([&symlink_to_dir]() { + EXPECT_THAT(unlink(symlink_to_dir.c_str()), SyscallSucceeds()); + }); + + // Fstatat on the symlink with a trailing slash should return the directory + // data. + struct stat st = {}; + EXPECT_THAT(fstatat(AT_FDCWD, absl::StrCat(symlink_to_dir, "/").c_str(), &st, + AT_SYMLINK_NOFOLLOW), + SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + + // Dir and symlink should point to same inode. + struct stat st_dir = {}; + EXPECT_THAT( + fstatat(AT_FDCWD, dir.path().c_str(), &st_dir, AT_SYMLINK_NOFOLLOW), + SyscallSucceeds()); + EXPECT_EQ(st.st_ino, st_dir.st_ino); +} + +TEST_F(StatTest, LeadingDoubleSlash) { + // Create a file, and make sure we can stat it. + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + struct stat st; + ASSERT_THAT(lstat(file.path().c_str(), &st), SyscallSucceeds()); + + // Now add an extra leading slash. + const std::string double_slash_path = absl::StrCat("/", file.path()); + ASSERT_TRUE(absl::StartsWith(double_slash_path, "//")); + + // We should be able to stat the new path, and it should resolve to the same + // file (same device and inode). + struct stat double_slash_st; + ASSERT_THAT(lstat(double_slash_path.c_str(), &double_slash_st), + SyscallSucceeds()); + EXPECT_EQ(st.st_dev, double_slash_st.st_dev); + EXPECT_EQ(st.st_ino, double_slash_st.st_ino); +} + +// Test that a rename doesn't change the underlying file. +TEST_F(StatTest, StatDoesntChangeAfterRename) { + const TempPath old_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath new_path(NewTempAbsPath()); + + struct stat st_old = {}; + struct stat st_new = {}; + + ASSERT_THAT(stat(old_dir.path().c_str(), &st_old), SyscallSucceeds()); + ASSERT_THAT(rename(old_dir.path().c_str(), new_path.path().c_str()), + SyscallSucceeds()); + ASSERT_THAT(stat(new_path.path().c_str(), &st_new), SyscallSucceeds()); + + EXPECT_EQ(st_old.st_nlink, st_new.st_nlink); + EXPECT_EQ(st_old.st_dev, st_new.st_dev); + EXPECT_EQ(st_old.st_ino, st_new.st_ino); + EXPECT_EQ(st_old.st_mode, st_new.st_mode); + EXPECT_EQ(st_old.st_uid, st_new.st_uid); + EXPECT_EQ(st_old.st_gid, st_new.st_gid); + EXPECT_EQ(st_old.st_size, st_new.st_size); +} + +// Test link counts with a regular file as the child. +TEST_F(StatTest, LinkCountsWithRegularFileChild) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + struct stat st_parent_before = {}; + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_before), SyscallSucceeds()); + EXPECT_EQ(st_parent_before.st_nlink, 2); + + // Adding a regular file doesn't adjust the parent's link count. + const TempPath child = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + struct stat st_parent_after = {}; + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds()); + EXPECT_EQ(st_parent_after.st_nlink, 2); + + // The child should have a single link from the parent. + struct stat st_child = {}; + ASSERT_THAT(stat(child.path().c_str(), &st_child), SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st_child.st_mode)); + EXPECT_EQ(st_child.st_nlink, 1); + + // Finally unlinking the child should not affect the parent's link count. + ASSERT_THAT(unlink(child.path().c_str()), SyscallSucceeds()); + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds()); + EXPECT_EQ(st_parent_after.st_nlink, 2); +} + +// This test verifies that inodes remain around when there is an open fd +// after link count hits 0. +TEST_F(StatTest, ZeroLinksOpenFdRegularFileChild_NoRandomSave) { + // Setting the enviornment variable GVISOR_GOFER_UNCACHED to any value + // will prevent this test from running, see the tmpfs lifecycle. + // + // We need to support this because when a file is unlinked and we forward + // the stat to the gofer it would return ENOENT. + const char* uncached_gofer = getenv("GVISOR_GOFER_UNCACHED"); + SKIP_IF(uncached_gofer != nullptr); + + // We don't support saving unlinked files. + const DisableSave ds; + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir.path(), "hello", TempPath::kDefaultFileMode)); + + // The child should have a single link from the parent. + struct stat st_child_before = {}; + ASSERT_THAT(stat(child.path().c_str(), &st_child_before), SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st_child_before.st_mode)); + EXPECT_EQ(st_child_before.st_nlink, 1); + EXPECT_EQ(st_child_before.st_size, 5); // Hello is 5 bytes. + + // Open the file so we can fstat after unlinking. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(child.path(), O_RDONLY)); + + // Now a stat should return ENOENT but we should still be able to stat + // via the open fd and fstat. + ASSERT_THAT(unlink(child.path().c_str()), SyscallSucceeds()); + + // Since the file has no more links stat should fail. + struct stat st_child_after = {}; + ASSERT_THAT(stat(child.path().c_str(), &st_child_after), + SyscallFailsWithErrno(ENOENT)); + + // Fstat should still allow us to access the same file via the fd. + struct stat st_child_fd = {}; + ASSERT_THAT(fstat(fd.get(), &st_child_fd), SyscallSucceeds()); + EXPECT_EQ(st_child_before.st_dev, st_child_fd.st_dev); + EXPECT_EQ(st_child_before.st_ino, st_child_fd.st_ino); + EXPECT_EQ(st_child_before.st_mode, st_child_fd.st_mode); + EXPECT_EQ(st_child_before.st_uid, st_child_fd.st_uid); + EXPECT_EQ(st_child_before.st_gid, st_child_fd.st_gid); + EXPECT_EQ(st_child_before.st_size, st_child_fd.st_size); + + // TODO(b/34861058): This isn't ideal but since fstatfs(2) will always return + // OVERLAYFS_SUPER_MAGIC we have no way to know if this fs is backed by a + // gofer which doesn't support links. + EXPECT_TRUE(st_child_fd.st_nlink == 0 || st_child_fd.st_nlink == 1); +} + +// Test link counts with a directory as the child. +TEST_F(StatTest, LinkCountsWithDirChild) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Before a child is added the two links are "." and the link from the parent. + struct stat st_parent_before = {}; + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_before), SyscallSucceeds()); + EXPECT_EQ(st_parent_before.st_nlink, 2); + + // Create a subdirectory and stat for the parent link counts. + const TempPath sub_dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + + // The three links are ".", the link from the parent, and the link from + // the child as "..". + struct stat st_parent_after = {}; + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds()); + EXPECT_EQ(st_parent_after.st_nlink, 3); + + // The child will have 1 link from the parent and 1 link which represents ".". + struct stat st_child = {}; + ASSERT_THAT(stat(sub_dir.path().c_str(), &st_child), SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st_child.st_mode)); + EXPECT_EQ(st_child.st_nlink, 2); + + // Finally delete the child dir and the parent link count should return to 2. + ASSERT_THAT(rmdir(sub_dir.path().c_str()), SyscallSucceeds()); + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds()); + + // Now we should only have links from the parent and "." since the subdir + // has been removed. + EXPECT_EQ(st_parent_after.st_nlink, 2); +} + +// Test statting a child of a non-directory. +TEST_F(StatTest, ChildOfNonDir) { + // Create a path that has a child of a regular file. + const std::string filename = JoinPath(test_file_name_, "child"); + + // Statting the path should return ENOTDIR. + struct stat st; + EXPECT_THAT(lstat(filename.c_str(), &st), SyscallFailsWithErrno(ENOTDIR)); +} + +// Test lstating a symlink directory. +TEST_F(StatTest, LstatSymlinkDir) { + // Create a directory and symlink to it. + const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string symlink_to_dir = NewTempAbsPath(); + EXPECT_THAT(symlink(dir.path().c_str(), symlink_to_dir.c_str()), + SyscallSucceeds()); + auto cleanup = Cleanup([&symlink_to_dir]() { + EXPECT_THAT(unlink(symlink_to_dir.c_str()), SyscallSucceeds()); + }); + + // Lstat on the symlink should return symlink data. + struct stat st = {}; + ASSERT_THAT(lstat(symlink_to_dir.c_str(), &st), SyscallSucceeds()); + EXPECT_FALSE(S_ISDIR(st.st_mode)); + EXPECT_TRUE(S_ISLNK(st.st_mode)); + + // Lstat on the symlink with a trailing slash should return the directory + // data. + ASSERT_THAT(lstat(absl::StrCat(symlink_to_dir, "/").c_str(), &st), + SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + EXPECT_FALSE(S_ISLNK(st.st_mode)); +} + +// Verify that we get an ELOOP from too many symbolic links even when there +// are directories in the middle. +TEST_F(StatTest, LstatELOOPPath) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::string subdir_base = "subdir"; + ASSERT_THAT(mkdir(JoinPath(dir.path(), subdir_base).c_str(), 0755), + SyscallSucceeds()); + + std::string target = JoinPath(dir.path(), subdir_base, subdir_base); + std::string dst = JoinPath("..", subdir_base); + ASSERT_THAT(symlink(dst.c_str(), target.c_str()), SyscallSucceeds()); + auto cleanup = Cleanup( + [&target]() { EXPECT_THAT(unlink(target.c_str()), SyscallSucceeds()); }); + + // Now build a path which is /subdir/subdir/... repeated many times so that + // we can build a path that is shorter than PATH_MAX but can still cause + // too many symbolic links. Note: Every other subdir is actually a directory + // so we're not in a situation where it's a -> b -> a -> b, where a and b + // are symbolic links. + std::string path = dir.path(); + std::string subdir_append = absl::StrCat("/", subdir_base); + do { + absl::StrAppend(&path, subdir_append); + // Keep appending /subdir until we would overflow PATH_MAX. + } while ((path.size() + subdir_append.size()) < PATH_MAX); + + struct stat s = {}; + ASSERT_THAT(lstat(path.c_str(), &s), SyscallFailsWithErrno(ELOOP)); +} + +// Ensure that inode allocation for anonymous devices work correctly across +// save/restore. In particular, inode numbers should be unique across S/R. +TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) { + // Use sockets as a convenient way to create inodes on an anonymous device. + int fd; + ASSERT_THAT(fd = socket(AF_UNIX, SOCK_STREAM, 0), SyscallSucceeds()); + FileDescriptor fd1(fd); + MaybeSave(); + ASSERT_THAT(fd = socket(AF_UNIX, SOCK_STREAM, 0), SyscallSucceeds()); + FileDescriptor fd2(fd); + + struct stat st1; + struct stat st2; + ASSERT_THAT(fstat(fd1.get(), &st1), SyscallSucceeds()); + ASSERT_THAT(fstat(fd2.get(), &st2), SyscallSucceeds()); + + // The two fds should have different inode numbers. + EXPECT_NE(st2.st_ino, st1.st_ino); + + // Verify again after another S/R cycle. The inode numbers should remain the + // same. + MaybeSave(); + + struct stat st1_after; + struct stat st2_after; + ASSERT_THAT(fstat(fd1.get(), &st1_after), SyscallSucceeds()); + ASSERT_THAT(fstat(fd2.get(), &st2_after), SyscallSucceeds()); + + EXPECT_EQ(st1_after.st_ino, st1.st_ino); + EXPECT_EQ(st2_after.st_ino, st2.st_ino); +} + +#ifndef SYS_statx +#if defined(__x86_64__) +#define SYS_statx 332 +#elif defined(__aarch64__) +#define SYS_statx 291 +#else +#error "Unknown architecture" +#endif +#endif // SYS_statx + +#ifndef STATX_ALL +#define STATX_ALL 0x00000fffU +#endif // STATX_ALL + +// struct kernel_statx_timestamp is a Linux statx_timestamp struct. +struct kernel_statx_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; + int32_t __reserved; +}; + +// struct kernel_statx is a Linux statx struct. Old versions of glibc do not +// expose it. See include/uapi/linux/stat.h +struct kernel_statx { + uint32_t stx_mask; + uint32_t stx_blksize; + uint64_t stx_attributes; + uint32_t stx_nlink; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint16_t __spare0[1]; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + uint64_t stx_attributes_mask; + struct kernel_statx_timestamp stx_atime; + struct kernel_statx_timestamp stx_btime; + struct kernel_statx_timestamp stx_ctime; + struct kernel_statx_timestamp stx_mtime; + uint32_t stx_rdev_major; + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; + uint32_t stx_dev_minor; + uint64_t __spare2[14]; +}; + +int statx(int dirfd, const char* pathname, int flags, unsigned int mask, + struct kernel_statx* statxbuf) { + return syscall(SYS_statx, dirfd, pathname, flags, mask, statxbuf); +} + +TEST_F(StatTest, StatxAbsPath) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxRelPathDirFD) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + auto const dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY)); + auto filename = std::string(Basename(test_file_name_)); + + EXPECT_THAT(statx(dirfd.get(), filename.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxRelPathCwd) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + auto filename = std::string(Basename(test_file_name_)); + struct kernel_statx stx; + EXPECT_THAT(statx(AT_FDCWD, filename.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxEmptyPath) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + struct kernel_statx stx; + EXPECT_THAT(statx(fd.get(), "", AT_EMPTY_PATH, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxDoesNotRejectExtraneousMaskBits) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + // Set all mask bits except for STATX__RESERVED. + uint mask = 0xffffffff & ~0x80000000; + EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, mask, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxRejectsReservedMaskBit) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + // Set STATX__RESERVED in the mask. + EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, 0x80000000, &stx), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(StatTest, StatxSymlink) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + std::string parent_dir = "/tmp"; + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(parent_dir, test_file_name_)); + std::string p = link.path(); + + struct kernel_statx stx; + EXPECT_THAT(statx(AT_FDCWD, p.c_str(), AT_SYMLINK_NOFOLLOW, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISLNK(stx.stx_mode)); + EXPECT_THAT(statx(AT_FDCWD, p.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxInvalidFlags) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), 12345, 0, &stx), + SyscallFailsWithErrno(EINVAL)); + + // Sync flags are mutually exclusive. + EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), + AT_STATX_FORCE_SYNC | AT_STATX_DONT_SYNC, 0, &stx), + SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/stat_times.cc b/test/syscalls/linux/stat_times.cc new file mode 100644 index 000000000..68c0bef09 --- /dev/null +++ b/test/syscalls/linux/stat_times.cc @@ -0,0 +1,303 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> + +#include <tuple> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::IsEmpty; +using ::testing::Not; + +std::tuple<absl::Time, absl::Time, absl::Time> GetTime(const TempPath& file) { + struct stat statbuf = {}; + EXPECT_THAT(stat(file.path().c_str(), &statbuf), SyscallSucceeds()); + + const auto atime = absl::TimeFromTimespec(statbuf.st_atim); + const auto mtime = absl::TimeFromTimespec(statbuf.st_mtim); + const auto ctime = absl::TimeFromTimespec(statbuf.st_ctim); + return std::make_tuple(atime, mtime, ctime); +} + +enum class AtimeEffect { + Unchanged, + Changed, +}; + +enum class MtimeEffect { + Unchanged, + Changed, +}; + +enum class CtimeEffect { + Unchanged, + Changed, +}; + +// Tests that fn modifies the atime/mtime/ctime of path as specified. +void CheckTimes(const TempPath& path, std::function<void()> fn, + AtimeEffect atime_effect, MtimeEffect mtime_effect, + CtimeEffect ctime_effect) { + absl::Time atime, mtime, ctime; + std::tie(atime, mtime, ctime) = GetTime(path); + + // FIXME(b/132819225): gVisor filesystem timestamps inconsistently use the + // internal or host clock, which may diverge slightly. Allow some slack on + // times to account for the difference. + // + // Here we sleep for 1s so that initial creation of path doesn't fall within + // the before slack window. + absl::SleepFor(absl::Seconds(1)); + + const absl::Time before = absl::Now() - absl::Seconds(1); + + // Perform the op. + fn(); + + const absl::Time after = absl::Now() + absl::Seconds(1); + + absl::Time atime2, mtime2, ctime2; + std::tie(atime2, mtime2, ctime2) = GetTime(path); + + if (atime_effect == AtimeEffect::Changed) { + EXPECT_LE(before, atime2); + EXPECT_GE(after, atime2); + EXPECT_GT(atime2, atime); + } else { + EXPECT_EQ(atime2, atime); + } + + if (mtime_effect == MtimeEffect::Changed) { + EXPECT_LE(before, mtime2); + EXPECT_GE(after, mtime2); + EXPECT_GT(mtime2, mtime); + } else { + EXPECT_EQ(mtime2, mtime); + } + + if (ctime_effect == CtimeEffect::Changed) { + EXPECT_LE(before, ctime2); + EXPECT_GE(after, ctime2); + EXPECT_GT(ctime2, ctime); + } else { + EXPECT_EQ(ctime2, ctime); + } +} + +// File creation time is reflected in atime, mtime, and ctime. +TEST(StatTimesTest, FileCreation) { + const DisableSave ds; // Timing-related test. + + // Get a time for when the file is created. + // + // FIXME(b/132819225): See above. + const absl::Time before = absl::Now() - absl::Seconds(1); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const absl::Time after = absl::Now() + absl::Seconds(1); + + absl::Time atime, mtime, ctime; + std::tie(atime, mtime, ctime) = GetTime(file); + + EXPECT_LE(before, atime); + EXPECT_LE(before, mtime); + EXPECT_LE(before, ctime); + EXPECT_GE(after, atime); + EXPECT_GE(after, mtime); + EXPECT_GE(after, ctime); +} + +// Calling chmod on a file changes ctime. +TEST(StatTimesTest, FileChmod) { + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + auto fn = [&] { + EXPECT_THAT(chmod(file.path().c_str(), 0666), SyscallSucceeds()); + }; + CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Unchanged, + CtimeEffect::Changed); +} + +// Renaming a file changes ctime. +TEST(StatTimesTest, FileRename) { + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + const std::string newpath = NewTempAbsPath(); + + auto fn = [&] { + ASSERT_THAT(rename(file.release().c_str(), newpath.c_str()), + SyscallSucceeds()); + file.reset(newpath); + }; + CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Unchanged, + CtimeEffect::Changed); +} + +// Renaming a file changes ctime, even with an open FD. +// +// NOTE(b/132732387): This is a regression test for fs/gofer failing to update +// cached ctime. +TEST(StatTimesTest, FileRenameOpenFD) { + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Holding an FD shouldn't affect behavior. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + const std::string newpath = NewTempAbsPath(); + + // FIXME(b/132814682): Restore fails with an uncached gofer and an open FD + // across rename. + // + // N.B. The logic here looks backwards because it isn't possible to + // conditionally disable save, only conditionally re-enable it. + DisableSave ds; + if (!getenv("GVISOR_GOFER_UNCACHED")) { + ds.reset(); + } + + auto fn = [&] { + ASSERT_THAT(rename(file.release().c_str(), newpath.c_str()), + SyscallSucceeds()); + file.reset(newpath); + }; + CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Unchanged, + CtimeEffect::Changed); +} + +// Calling utimes on a file changes ctime and the time that we ask to change +// (atime to now in this case). +TEST(StatTimesTest, FileUtimes) { + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + auto fn = [&] { + const struct timespec ts[2] = {{0, UTIME_NOW}, {0, UTIME_OMIT}}; + ASSERT_THAT(utimensat(AT_FDCWD, file.path().c_str(), ts, 0), + SyscallSucceeds()); + }; + CheckTimes(file, fn, AtimeEffect::Changed, MtimeEffect::Unchanged, + CtimeEffect::Changed); +} + +// Truncating a file changes mtime and ctime. +TEST(StatTimesTest, FileTruncate) { + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "yaaass", 0666)); + + auto fn = [&] { + EXPECT_THAT(truncate(file.path().c_str(), 0), SyscallSucceeds()); + }; + CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Changed, + CtimeEffect::Changed); +} + +// Writing a file changes mtime and ctime. +TEST(StatTimesTest, FileWrite) { + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "yaaass", 0666)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0)); + + auto fn = [&] { + const std::string contents = "all the single dollars"; + EXPECT_THAT(WriteFd(fd.get(), contents.data(), contents.size()), + SyscallSucceeds()); + }; + CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Changed, + CtimeEffect::Changed); +} + +// Reading a file changes atime. +TEST(StatTimesTest, FileRead) { + const std::string contents = "bills bills bills"; + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), contents, 0666)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY, 0)); + + auto fn = [&] { + char buf[20]; + ASSERT_THAT(ReadFd(fd.get(), buf, sizeof(buf)), + SyscallSucceedsWithValue(contents.size())); + }; + CheckTimes(file, fn, AtimeEffect::Changed, MtimeEffect::Unchanged, + CtimeEffect::Unchanged); +} + +// Listing files in a directory changes atime. +TEST(StatTimesTest, DirList) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + auto fn = [&] { + const auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), false)); + EXPECT_THAT(contents, Not(IsEmpty())); + }; + CheckTimes(dir, fn, AtimeEffect::Changed, MtimeEffect::Unchanged, + CtimeEffect::Unchanged); +} + +// Creating a file in a directory changes mtime and ctime. +TEST(StatTimesTest, DirCreateFile) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + TempPath file; + auto fn = [&] { + file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + }; + CheckTimes(dir, fn, AtimeEffect::Unchanged, MtimeEffect::Changed, + CtimeEffect::Changed); +} + +// Creating a directory in a directory changes mtime and ctime. +TEST(StatTimesTest, DirCreateDir) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + TempPath dir2; + auto fn = [&] { + dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + }; + CheckTimes(dir, fn, AtimeEffect::Unchanged, MtimeEffect::Changed, + CtimeEffect::Changed); +} + +// Removing a file from a directory changes mtime and ctime. +TEST(StatTimesTest, DirRemoveFile) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + auto fn = [&] { file.reset(); }; + CheckTimes(dir, fn, AtimeEffect::Unchanged, MtimeEffect::Changed, + CtimeEffect::Changed); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/statfs.cc b/test/syscalls/linux/statfs.cc new file mode 100644 index 000000000..aca51d30f --- /dev/null +++ b/test/syscalls/linux/statfs.cc @@ -0,0 +1,82 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/statfs.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(StatfsTest, CannotStatBadPath) { + auto temp_file = NewTempAbsPathInDir("/tmp"); + + struct statfs st; + EXPECT_THAT(statfs(temp_file.c_str(), &st), SyscallFailsWithErrno(ENOENT)); +} + +TEST(StatfsTest, InternalTmpfs) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + struct statfs st; + EXPECT_THAT(statfs(temp_file.path().c_str(), &st), SyscallSucceeds()); +} + +TEST(StatfsTest, InternalDevShm) { + struct statfs st; + EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds()); +} + +TEST(StatfsTest, NameLen) { + struct statfs st; + EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds()); + + // This assumes that /dev/shm is tmpfs. + EXPECT_EQ(st.f_namelen, NAME_MAX); +} + +TEST(FstatfsTest, CannotStatBadFd) { + struct statfs st; + EXPECT_THAT(fstatfs(-1, &st), SyscallFailsWithErrno(EBADF)); +} + +TEST(FstatfsTest, InternalTmpfs) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY)); + + struct statfs st; + EXPECT_THAT(fstatfs(fd.get(), &st), SyscallSucceeds()); +} + +TEST(FstatfsTest, InternalDevShm) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/shm", O_RDONLY)); + + struct statfs st; + EXPECT_THAT(fstatfs(fd.get(), &st), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc new file mode 100644 index 000000000..4afed6d08 --- /dev/null +++ b/test/syscalls/linux/sticky.cc @@ -0,0 +1,161 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <grp.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <unistd.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +ABSL_FLAG(int32_t, scratch_uid, 65534, "first scratch UID"); +ABSL_FLAG(int32_t, scratch_gid, 65534, "first scratch GID"); + +namespace gvisor { +namespace testing { + +namespace { + +TEST(StickyTest, StickyBitPermDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); + + // After changing credentials below, we need to use an open fd to make + // modifications in the parent dir, because there is no guarantee that we will + // still have the ability to open it. + const FileDescriptor parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY)); + ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds()); + ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds()); + ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds()); + + // Drop privileges and change IDs only in child thread, or else this parent + // thread won't be able to open some log files after the test ends. + ScopedThread([&] { + // Drop privileges. + if (HaveCapability(CAP_FOWNER).ValueOrDie()) { + EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, false)); + } + + // Change EUID and EGID. + EXPECT_THAT( + syscall(SYS_setresgid, -1, absl::GetFlag(FLAGS_scratch_gid), -1), + SyscallSucceeds()); + EXPECT_THAT( + syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1), + SyscallSucceeds()); + + EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(unlinkat(parent_fd.get(), "file", 0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), + SyscallFailsWithErrno(EPERM)); + }); +} + +TEST(StickyTest, StickyBitSameUID) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); + + // After changing credentials below, we need to use an open fd to make + // modifications in the parent dir, because there is no guarantee that we will + // still have the ability to open it. + const FileDescriptor parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY)); + ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds()); + ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds()); + ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds()); + + // Drop privileges and change IDs only in child thread, or else this parent + // thread won't be able to open some log files after the test ends. + ScopedThread([&] { + // Drop privileges. + if (HaveCapability(CAP_FOWNER).ValueOrDie()) { + EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, false)); + } + + // Change EGID. + EXPECT_THAT( + syscall(SYS_setresgid, -1, absl::GetFlag(FLAGS_scratch_gid), -1), + SyscallSucceeds()); + + // We still have the same EUID. + EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"), + SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR), + SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds()); + }); +} + +TEST(StickyTest, StickyBitCapFOWNER) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); + + // After changing credentials below, we need to use an open fd to make + // modifications in the parent dir, because there is no guarantee that we will + // still have the ability to open it. + const FileDescriptor parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY)); + ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds()); + ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds()); + ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds()); + + // Drop privileges and change IDs only in child thread, or else this parent + // thread won't be able to open some log files after the test ends. + ScopedThread([&] { + // Set PR_SET_KEEPCAPS. + EXPECT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds()); + + // Change EUID and EGID. + EXPECT_THAT( + syscall(SYS_setresgid, -1, absl::GetFlag(FLAGS_scratch_gid), -1), + SyscallSucceeds()); + EXPECT_THAT( + syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1), + SyscallSucceeds()); + + EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true)); + EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"), + SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR), + SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds()); + }); +} +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc new file mode 100644 index 000000000..a17ff62e9 --- /dev/null +++ b/test/syscalls/linux/symlink.cc @@ -0,0 +1,402 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +mode_t FilePermission(const std::string& path) { + struct stat buf = {0}; + TEST_CHECK(lstat(path.c_str(), &buf) == 0); + return buf.st_mode & 0777; +} + +// Test that name collisions are checked on the new link path, not the source +// path. Regression test for b/31782115. +TEST(SymlinkTest, CanCreateSymlinkWithCachedSourceDirent) { + const std::string srcname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + const std::string basedir = std::string(Dirname(srcname)); + ASSERT_EQ(basedir, Dirname(newname)); + + ASSERT_THAT(chdir(basedir.c_str()), SyscallSucceeds()); + + // Open the source node to cause the underlying dirent to be cached. It will + // remain cached while we have the file open. + int fd; + ASSERT_THAT(fd = open(srcname.c_str(), O_CREAT | O_RDWR, 0666), + SyscallSucceeds()); + FileDescriptor fd_closer(fd); + + // Attempt to create a symlink. If the bug exists, this will fail since the + // dirent link creation code will check for a name collision on the source + // link name. + EXPECT_THAT(symlink(std::string(Basename(srcname)).c_str(), + std::string(Basename(newname)).c_str()), + SyscallSucceeds()); +} + +TEST(SymlinkTest, CanCreateSymlinkFile) { + const std::string oldname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + + int fd; + ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds()); + EXPECT_EQ(FilePermission(newname), 0777); + + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(newname)); + EXPECT_EQ(oldname, link); + + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, CanCreateSymlinkDir) { + const std::string olddir = NewTempAbsPath(); + const std::string newdir = NewTempAbsPath(); + + EXPECT_THAT(mkdir(olddir.c_str(), 0777), SyscallSucceeds()); + EXPECT_THAT(symlink(olddir.c_str(), newdir.c_str()), SyscallSucceeds()); + EXPECT_EQ(FilePermission(newdir), 0777); + + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(newdir)); + EXPECT_EQ(olddir, link); + + EXPECT_THAT(unlink(newdir.c_str()), SyscallSucceeds()); + + ASSERT_THAT(rmdir(olddir.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, CannotCreateSymlinkInReadOnlyDir) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + const std::string olddir = NewTempAbsPath(); + ASSERT_THAT(mkdir(olddir.c_str(), 0444), SyscallSucceeds()); + + const std::string newdir = NewTempAbsPathInDir(olddir); + EXPECT_THAT(symlink(olddir.c_str(), newdir.c_str()), + SyscallFailsWithErrno(EACCES)); + + ASSERT_THAT(rmdir(olddir.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, CannotSymlinkOverExistingFile) { + const auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const auto newfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + EXPECT_THAT(symlink(oldfile.path().c_str(), newfile.path().c_str()), + SyscallFailsWithErrno(EEXIST)); +} + +TEST(SymlinkTest, CannotSymlinkOverExistingDir) { + const auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const auto newdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + EXPECT_THAT(symlink(oldfile.path().c_str(), newdir.path().c_str()), + SyscallFailsWithErrno(EEXIST)); +} + +TEST(SymlinkTest, OldnameIsEmpty) { + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(symlink("", newname.c_str()), SyscallFailsWithErrno(ENOENT)); +} + +TEST(SymlinkTest, OldnameIsDangling) { + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(symlink("/dangling", newname.c_str()), SyscallSucceeds()); + + // This is required for S/R random save tests, which pre-run this test + // in the same TEST_TMPDIR, which means that we need to clean it for any + // operations exclusively creating files, like symlink above. + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, NewnameCannotExist) { + const std::string newname = + JoinPath(GetAbsoluteTestTmpdir(), "thisdoesnotexist", "foo"); + EXPECT_THAT(symlink("/thisdoesnotmatter", newname.c_str()), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(SymlinkTest, CanEvaluateLink) { + const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // We are going to assert that the symlink inode id is the same as the linked + // file's inode id. In order for the inode id to be stable across + // save/restore, it must be kept open. The FileDescriptor type will do that + // for us automatically. + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + struct stat file_st; + EXPECT_THAT(fstat(fd.get(), &file_st), SyscallSucceeds()); + + const std::string link = NewTempAbsPath(); + EXPECT_THAT(symlink(file.path().c_str(), link.c_str()), SyscallSucceeds()); + EXPECT_EQ(FilePermission(link), 0777); + + auto linkfd = ASSERT_NO_ERRNO_AND_VALUE(Open(link.c_str(), O_RDWR)); + struct stat link_st; + EXPECT_THAT(fstat(linkfd.get(), &link_st), SyscallSucceeds()); + + // Check that in fact newname points to the file we expect. + EXPECT_EQ(file_st.st_dev, link_st.st_dev); + EXPECT_EQ(file_st.st_ino, link_st.st_ino); +} + +TEST(SymlinkTest, TargetIsNotMapped) { + const std::string oldname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + + int fd; + // Create the target so that when we read the link, it exists. + ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + // Create a symlink called newname that points to oldname. + EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds()); + + std::vector<char> buf(1024); + int linksize; + // Read the link and assert that the oldname is still the same. + EXPECT_THAT(linksize = readlink(newname.c_str(), buf.data(), 1024), + SyscallSucceeds()); + EXPECT_EQ(0, strncmp(oldname.c_str(), buf.data(), linksize)); + + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, PreadFromSymlink) { + std::string name = NewTempAbsPath(); + int fd; + ASSERT_THAT(fd = open(name.c_str(), O_CREAT, 0644), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + + std::string linkname = NewTempAbsPath(); + ASSERT_THAT(symlink(name.c_str(), linkname.c_str()), SyscallSucceeds()); + + ASSERT_THAT(fd = open(linkname.c_str(), O_RDONLY), SyscallSucceeds()); + + char buf[1024]; + EXPECT_THAT(pread64(fd, buf, 1024, 0), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(unlink(name.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(linkname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, SymlinkAtDegradedPermissions_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + int dirfd; + ASSERT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0), + SyscallSucceeds()); + + const DisableSave ds; // Permissions are dropped. + EXPECT_THAT(fchmod(dirfd, 0), SyscallSucceeds()); + + std::string basename = std::string(Basename(file.path())); + EXPECT_THAT(symlinkat("/dangling", dirfd, basename.c_str()), + SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(SymlinkTest, ReadlinkAtDegradedPermissions_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string oldpath = NewTempAbsPathInDir(dir.path()); + const std::string oldbase = std::string(Basename(oldpath)); + ASSERT_THAT(symlink("/dangling", oldpath.c_str()), SyscallSucceeds()); + + int dirfd; + EXPECT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0), + SyscallSucceeds()); + + const DisableSave ds; // Permissions are dropped. + EXPECT_THAT(fchmod(dirfd, 0), SyscallSucceeds()); + + char buf[1024]; + int linksize; + EXPECT_THAT(linksize = readlinkat(dirfd, oldbase.c_str(), buf, 1024), + SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(SymlinkTest, ChmodSymlink) { + auto target = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string newpath = NewTempAbsPath(); + ASSERT_THAT(symlink(target.path().c_str(), newpath.c_str()), + SyscallSucceeds()); + EXPECT_EQ(FilePermission(newpath), 0777); + EXPECT_THAT(chmod(newpath.c_str(), 0666), SyscallSucceeds()); + EXPECT_EQ(FilePermission(newpath), 0777); +} + +// Test that following a symlink updates the atime on the symlink. +TEST(SymlinkTest, FollowUpdatesATime) { + const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string link = NewTempAbsPath(); + EXPECT_THAT(symlink(file.path().c_str(), link.c_str()), SyscallSucceeds()); + + // Lstat the symlink. + struct stat st_before_follow; + ASSERT_THAT(lstat(link.c_str(), &st_before_follow), SyscallSucceeds()); + + // Let the clock advance. + absl::SleepFor(absl::Seconds(1)); + + // Open the file via the symlink. + int fd; + ASSERT_THAT(fd = open(link.c_str(), O_RDWR, 0666), SyscallSucceeds()); + FileDescriptor fd_closer(fd); + + // Lstat the symlink again, and check that atime is updated. + struct stat st_after_follow; + ASSERT_THAT(lstat(link.c_str(), &st_after_follow), SyscallSucceeds()); + EXPECT_LT(st_before_follow.st_atime, st_after_follow.st_atime); +} + +class ParamSymlinkTest : public ::testing::TestWithParam<std::string> {}; + +// Test that creating an existing symlink with creat will create the target. +TEST_P(ParamSymlinkTest, CreatLinkCreatesTarget) { + const std::string target = GetParam(); + const std::string linkpath = NewTempAbsPath(); + + ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds()); + + int fd; + EXPECT_THAT(fd = creat(linkpath.c_str(), 0666), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + struct stat st; + EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds()); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); + ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds()); +} + +// Test that opening an existing symlink with O_CREAT will create the target. +TEST_P(ParamSymlinkTest, OpenLinkCreatesTarget) { + const std::string target = GetParam(); + const std::string linkpath = NewTempAbsPath(); + + ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds()); + + int fd; + EXPECT_THAT(fd = open(linkpath.c_str(), O_CREAT, 0666), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + struct stat st; + EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds()); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); + ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds()); +} + +// Test that opening a self-symlink with O_CREAT will fail with ELOOP. +TEST_P(ParamSymlinkTest, CreateExistingSelfLink) { + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + + const std::string linkpath = GetParam(); + ASSERT_THAT(symlink(linkpath.c_str(), linkpath.c_str()), SyscallSucceeds()); + + EXPECT_THAT(open(linkpath.c_str(), O_CREAT, 0666), + SyscallFailsWithErrno(ELOOP)); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); +} + +// Test that opening a file that is a symlink to its parent directory fails +// with ELOOP. +TEST_P(ParamSymlinkTest, CreateExistingParentLink) { + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + + const std::string linkpath = GetParam(); + const std::string target = JoinPath(linkpath, "child"); + ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds()); + + EXPECT_THAT(open(linkpath.c_str(), O_CREAT, 0666), + SyscallFailsWithErrno(ELOOP)); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); +} + +// Test that opening an existing symlink with O_CREAT|O_EXCL will fail with +// EEXIST. +TEST_P(ParamSymlinkTest, OpenLinkExclFails) { + const std::string target = GetParam(); + const std::string linkpath = NewTempAbsPath(); + + ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds()); + + EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_EXCL, 0666), + SyscallFailsWithErrno(EEXIST)); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); +} + +// Test that opening an existing symlink with O_CREAT|O_NOFOLLOW will fail with +// ELOOP. +TEST_P(ParamSymlinkTest, OpenLinkNoFollowFails) { + const std::string target = GetParam(); + const std::string linkpath = NewTempAbsPath(); + + ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds()); + + EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_NOFOLLOW, 0666), + SyscallFailsWithErrno(ELOOP)); + + ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds()); +} + +INSTANTIATE_TEST_SUITE_P(AbsAndRelTarget, ParamSymlinkTest, + ::testing::Values(NewTempAbsPath(), NewTempRelPath())); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sync.cc b/test/syscalls/linux/sync.cc new file mode 100644 index 000000000..8aa2525a9 --- /dev/null +++ b/test/syscalls/linux/sync.cc @@ -0,0 +1,59 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdio.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SyncTest, SyncEverything) { + ASSERT_THAT(syscall(SYS_sync), SyscallSucceeds()); +} + +TEST(SyncTest, SyncFileSytem) { + int fd; + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_THAT(fd = open(f.path().c_str(), O_RDONLY), SyscallSucceeds()); + EXPECT_THAT(syncfs(fd), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(SyncTest, SyncFromPipe) { + int pipes[2]; + EXPECT_THAT(pipe(pipes), SyscallSucceeds()); + EXPECT_THAT(syncfs(pipes[0]), SyscallSucceeds()); + EXPECT_THAT(syncfs(pipes[1]), SyscallSucceeds()); + EXPECT_THAT(close(pipes[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipes[1]), SyscallSucceeds()); +} + +TEST(SyncTest, CannotSyncFileSytemAtBadFd) { + EXPECT_THAT(syncfs(-1), SyscallFailsWithErrno(EBADF)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sync_file_range.cc b/test/syscalls/linux/sync_file_range.cc new file mode 100644 index 000000000..36cc42043 --- /dev/null +++ b/test/syscalls/linux/sync_file_range.cc @@ -0,0 +1,112 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SyncFileRangeTest, TempFileSucceeds) { + auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR)); + constexpr char data[] = "some data to sync"; + int fd = f.get(); + + EXPECT_THAT(write(fd, data, sizeof(data)), + SyscallSucceedsWithValue(sizeof(data))); + EXPECT_THAT(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE), + SyscallSucceeds()); + EXPECT_THAT(sync_file_range(fd, 0, 0, 0), SyscallSucceeds()); + EXPECT_THAT( + sync_file_range(fd, 0, 0, + SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER | + SYNC_FILE_RANGE_WAIT_BEFORE), + SyscallSucceeds()); + EXPECT_THAT(sync_file_range( + fd, 0, 1, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER), + SyscallSucceeds()); + EXPECT_THAT(sync_file_range( + fd, 1, 0, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER), + SyscallSucceeds()); +} + +TEST(SyncFileRangeTest, CannotSyncFileRangeOnUnopenedFd) { + auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR)); + constexpr char data[] = "some data to sync"; + int fd = f.get(); + + EXPECT_THAT(write(fd, data, sizeof(data)), + SyscallSucceedsWithValue(sizeof(data))); + + pid_t pid = fork(); + if (pid == 0) { + f.reset(); + + // fd is now invalid. + TEST_CHECK(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE) == -1); + TEST_PCHECK(errno == EBADF); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + + int status = 0; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(WEXITSTATUS(status), 0); +} + +TEST(SyncFileRangeTest, BadArgs) { + auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR)); + int fd = f.get(); + + EXPECT_THAT(sync_file_range(fd, -1, 0, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(sync_file_range(fd, 0, -1, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(sync_file_range(fd, 8912, INT64_MAX - 4096, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SyncFileRangeTest, CannotSyncFileRangeWithWaitBefore) { + auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR)); + constexpr char data[] = "some data to sync"; + int fd = f.get(); + + EXPECT_THAT(write(fd, data, sizeof(data)), + SyscallSucceedsWithValue(sizeof(data))); + if (IsRunningOnGvisor()) { + EXPECT_THAT(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE), + SyscallFailsWithErrno(ENOSYS)); + EXPECT_THAT( + sync_file_range(fd, 0, 0, + SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE), + SyscallFailsWithErrno(ENOSYS)); + } +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sysinfo.cc b/test/syscalls/linux/sysinfo.cc new file mode 100644 index 000000000..1a71256da --- /dev/null +++ b/test/syscalls/linux/sysinfo.cc @@ -0,0 +1,86 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a very simple sanity test to validate that the sysinfo syscall is +// supported by gvisor and returns sane values. +#include <sys/syscall.h> +#include <sys/sysinfo.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SysinfoTest, SysinfoIsCallable) { + struct sysinfo ignored = {}; + EXPECT_THAT(syscall(SYS_sysinfo, &ignored), SyscallSucceedsWithValue(0)); +} + +TEST(SysinfoTest, EfaultProducedOnBadAddress) { + // Validate that we return EFAULT when a bad address is provided. + // specified by man 2 sysinfo + EXPECT_THAT(syscall(SYS_sysinfo, nullptr), SyscallFailsWithErrno(EFAULT)); +} + +TEST(SysinfoTest, TotalRamSaneValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GT(s.totalram, 0); +} + +TEST(SysinfoTest, MemunitSet) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GE(s.mem_unit, 1); +} + +TEST(SysinfoTest, UptimeSaneValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GE(s.uptime, 0); +} + +TEST(SysinfoTest, UptimeIncreasingValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + absl::SleepFor(absl::Seconds(2)); + struct sysinfo s2 = {}; + EXPECT_THAT(sysinfo(&s2), SyscallSucceedsWithValue(0)); + EXPECT_LT(s.uptime, s2.uptime); +} + +TEST(SysinfoTest, FreeRamSaneValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GT(s.freeram, 0); + EXPECT_LT(s.freeram, s.totalram); +} + +TEST(SysinfoTest, NumProcsSaneValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GT(s.procs, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/syslog.cc b/test/syscalls/linux/syslog.cc new file mode 100644 index 000000000..9a7407d96 --- /dev/null +++ b/test/syscalls/linux/syslog.cc @@ -0,0 +1,51 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/klog.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr int SYSLOG_ACTION_READ_ALL = 3; +constexpr int SYSLOG_ACTION_SIZE_BUFFER = 10; + +int Syslog(int type, char* buf, int len) { + return syscall(__NR_syslog, type, buf, len); +} + +// Only SYSLOG_ACTION_SIZE_BUFFER and SYSLOG_ACTION_READ_ALL are implemented in +// gVisor. + +TEST(Syslog, Size) { + EXPECT_THAT(Syslog(SYSLOG_ACTION_SIZE_BUFFER, nullptr, 0), SyscallSucceeds()); +} + +TEST(Syslog, ReadAll) { + // There might not be anything to read, so we can't check the write count. + char buf[100]; + EXPECT_THAT(Syslog(SYSLOG_ACTION_READ_ALL, buf, sizeof(buf)), + SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sysret.cc b/test/syscalls/linux/sysret.cc new file mode 100644 index 000000000..19ffbd85b --- /dev/null +++ b/test/syscalls/linux/sysret.cc @@ -0,0 +1,142 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Tests to verify that the behavior of linux and gvisor matches when +// 'sysret' returns to bad (aka non-canonical) %rip or %rsp. + +#include <linux/elf.h> +#include <sys/ptrace.h> +#include <sys/user.h> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000; +constexpr uint64_t kNonCanonicalRsp = 0xFFFF000000000000; + +class SysretTest : public ::testing::Test { + protected: + struct user_regs_struct regs_; + struct iovec iov; + pid_t child_; + + void SetUp() override { + pid_t pid = fork(); + + // Child. + if (pid == 0) { + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + TEST_PCHECK(raise(SIGSTOP) == 0); + MaybeSave(); + _exit(0); + } + + // Parent. + int status; + memset(&iov, 0, sizeof(iov)); + ASSERT_THAT(pid, SyscallSucceeds()); // Might still be < 0. + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + iov.iov_base = ®s_; + iov.iov_len = sizeof(regs_); + ASSERT_THAT(ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov), + SyscallSucceeds()); + + child_ = pid; + } + + void Detach() { + ASSERT_THAT(ptrace(PTRACE_DETACH, child_, 0, 0), SyscallSucceeds()); + } + + void SetRip(uint64_t newrip) { +#if defined(__x86_64__) + regs_.rip = newrip; +#elif defined(__aarch64__) + regs_.pc = newrip; +#else +#error "Unknown architecture" +#endif + ASSERT_THAT(ptrace(PTRACE_SETREGSET, child_, NT_PRSTATUS, &iov), + SyscallSucceeds()); + } + + void SetRsp(uint64_t newrsp) { +#if defined(__x86_64__) + regs_.rsp = newrsp; +#elif defined(__aarch64__) + regs_.sp = newrsp; +#else +#error "Unknown architecture" +#endif + ASSERT_THAT(ptrace(PTRACE_SETREGSET, child_, NT_PRSTATUS, &iov), + SyscallSucceeds()); + } + + // Wait waits for the child pid and returns the exit status. + int Wait() { + int status; + while (true) { + int rval = wait4(child_, &status, 0, NULL); + if (rval < 0) { + return rval; + } + if (rval == child_) { + return status; + } + } + } +}; + +TEST_F(SysretTest, JustDetach) { + Detach(); + int status = Wait(); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; +} + +TEST_F(SysretTest, BadRip) { + SetRip(kNonCanonicalRip); + Detach(); + int status = Wait(); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) + << "status = " << status; +} + +TEST_F(SysretTest, BadRsp) { + SetRsp(kNonCanonicalRsp); + Detach(); + int status = Wait(); +#if defined(__x86_64__) + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGBUS) + << "status = " << status; +#elif defined(__aarch64__) + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) + << "status = " << status; +#else +#error "Unknown architecture" +#endif +} +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc new file mode 100644 index 000000000..a4d2953e1 --- /dev/null +++ b/test/syscalls/linux/tcp_socket.cc @@ -0,0 +1,1568 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <poll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> + +#include <limits> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<sockaddr_storage> InetLoopbackAddr(int family) { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + addr.ss_family = family; + switch (family) { + case AF_INET: + reinterpret_cast<struct sockaddr_in*>(&addr)->sin_addr.s_addr = + htonl(INADDR_LOOPBACK); + break; + case AF_INET6: + reinterpret_cast<struct sockaddr_in6*>(&addr)->sin6_addr = + in6addr_loopback; + break; + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } + return addr; +} + +// Fixture for tests parameterized by the address family to use (AF_INET and +// AF_INET6) when creating sockets. +class TcpSocketTest : public ::testing::TestWithParam<int> { + protected: + // Creates three sockets that will be used by test cases -- a listener, one + // that connects, and the accepted one. + void SetUp() override; + + // Closes the sockets created by SetUp(). + void TearDown() override; + + // Listening socket. + int listener_ = -1; + + // Socket connected via connect(). + int s_ = -1; + + // Socket connected via accept(). + int t_ = -1; + + // Initial size of the send buffer. + int sendbuf_size_ = -1; +}; + +void TcpSocketTest::SetUp() { + ASSERT_THAT(listener_ = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + + ASSERT_THAT(s_ = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + // Bind to some port then start listening. + ASSERT_THAT( + bind(listener_, reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(listen(listener_, SOMAXCONN), SyscallSucceeds()); + + // Get the address we're listening on, then connect to it. We need to do this + // because we're allowing the stack to pick a port for us. + ASSERT_THAT(getsockname(listener_, reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(connect)(s_, reinterpret_cast<struct sockaddr*>(&addr), + addrlen), + SyscallSucceeds()); + + // Get the initial send buffer size. + socklen_t optlen = sizeof(sendbuf_size_); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &sendbuf_size_, &optlen), + SyscallSucceeds()); + + // Accept the connection. + ASSERT_THAT(t_ = RetryEINTR(accept)(listener_, nullptr, nullptr), + SyscallSucceeds()); +} + +void TcpSocketTest::TearDown() { + EXPECT_THAT(close(listener_), SyscallSucceeds()); + if (s_ >= 0) { + EXPECT_THAT(close(s_), SyscallSucceeds()); + } + if (t_ >= 0) { + EXPECT_THAT(close(t_), SyscallSucceeds()); + } +} + +TEST_P(TcpSocketTest, ConnectOnEstablishedConnection) { + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + ASSERT_THAT( + connect(s_, reinterpret_cast<const struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EISCONN)); + ASSERT_THAT( + connect(t_, reinterpret_cast<const struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EISCONN)); +} + +TEST_P(TcpSocketTest, ShutdownWriteInTimeWait) { + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds()); + EXPECT_THAT(shutdown(s_, SHUT_RDWR), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); // Wait to enter TIME_WAIT. + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(TcpSocketTest, ShutdownWriteInFinWait1) { + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds()); + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); // Wait to enter FIN-WAIT2. + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds()); +} + +TEST_P(TcpSocketTest, DataCoalesced) { + char buf[10]; + + // Write in two steps. + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf) / 2), + SyscallSucceedsWithValue(sizeof(buf) / 2)); + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf) / 2), + SyscallSucceedsWithValue(sizeof(buf) / 2)); + + // Allow stack to process both packets. + absl::SleepFor(absl::Seconds(1)); + + // Read in one shot. + EXPECT_THAT(RetryEINTR(recv)(t_, buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST_P(TcpSocketTest, SenderAddressIgnored) { + char buf[3]; + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + + ASSERT_THAT( + RetryEINTR(recvfrom)(t_, buf, sizeof(buf), 0, + reinterpret_cast<struct sockaddr*>(&addr), &addrlen), + SyscallSucceedsWithValue(3)); + + // Check that addr remains zeroed-out. + const char* ptr = reinterpret_cast<char*>(&addr); + for (size_t i = 0; i < sizeof(addr); i++) { + EXPECT_EQ(ptr[i], 0); + } +} + +TEST_P(TcpSocketTest, SenderAddressIgnoredOnPeek) { + char buf[3]; + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + + ASSERT_THAT( + RetryEINTR(recvfrom)(t_, buf, sizeof(buf), MSG_PEEK, + reinterpret_cast<struct sockaddr*>(&addr), &addrlen), + SyscallSucceedsWithValue(3)); + + // Check that addr remains zeroed-out. + const char* ptr = reinterpret_cast<char*>(&addr); + for (size_t i = 0; i < sizeof(addr); i++) { + EXPECT_EQ(ptr[i], 0); + } +} + +TEST_P(TcpSocketTest, SendtoAddressIgnored) { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + addr.ss_family = GetParam(); // FIXME(b/63803955) + + char data = '\0'; + EXPECT_THAT( + RetryEINTR(sendto)(s_, &data, sizeof(data), 0, + reinterpret_cast<sockaddr*>(&addr), sizeof(addr)), + SyscallSucceedsWithValue(1)); +} + +TEST_P(TcpSocketTest, WritevZeroIovec) { + // 2 bytes just to be safe and have vecs[1] not point to something random + // (even though length is 0). + char buf[2]; + char recv_buf[1]; + + // Construct a vec where the final vector is of length 0. + iovec vecs[2] = {}; + vecs[0].iov_base = buf; + vecs[0].iov_len = 1; + vecs[1].iov_base = buf + 1; + vecs[1].iov_len = 0; + + EXPECT_THAT(RetryEINTR(writev)(s_, vecs, 2), SyscallSucceedsWithValue(1)); + + EXPECT_THAT(RetryEINTR(recv)(t_, recv_buf, 1, 0), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(memcmp(recv_buf, buf, 1), 0); +} + +TEST_P(TcpSocketTest, ZeroWriteAllowed) { + char buf[3]; + // Send a zero length packet. + ASSERT_THAT(RetryEINTR(write)(s_, buf, 0), SyscallSucceedsWithValue(0)); + // Verify that there is no packet available. + EXPECT_THAT(RetryEINTR(recv)(t_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Test that a non-blocking write with a buffer that is larger than the send +// buffer size will not actually write the whole thing at once. Regression test +// for b/64438887. +TEST_P(TcpSocketTest, NonblockingLargeWrite) { + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds()); + + // Allocate a buffer three times the size of the send buffer. We do this with + // a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> buf(size); + + // Try to write the whole thing. + int n; + ASSERT_THAT(n = RetryEINTR(write)(s_, buf.data(), size), SyscallSucceeds()); + + // We should have written something, but not the whole thing. + EXPECT_GT(n, 0); + EXPECT_LT(n, size); +} + +// Test that a blocking write with a buffer that is larger than the send buffer +// will block until the entire buffer is sent. +TEST_P(TcpSocketTest, BlockingLargeWrite_NoRandomSave) { + // Allocate a buffer three times the size of the send buffer on the heap. We + // do this as a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> writebuf(size); + + // Start reading the response in a loop. + int read_bytes = 0; + ScopedThread t([this, &read_bytes]() { + // Avoid interrupting the blocking write in main thread. + const DisableSave ds; + + // Take ownership of the FD so that we close it on failure. This will + // unblock the blocking write below. + FileDescriptor fd(t_); + t_ = -1; + + char readbuf[2500] = {}; + int n = -1; + while (n != 0) { + ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)), + SyscallSucceeds()); + read_bytes += n; + } + }); + + // Try to write the whole thing. + int n; + ASSERT_THAT(n = WriteFd(s_, writebuf.data(), size), SyscallSucceeds()); + + // We should have written the whole thing. + EXPECT_EQ(n, size); + EXPECT_THAT(close(s_), SyscallSucceedsWithValue(0)); + s_ = -1; + t.Join(); + + // We should have read the whole thing. + EXPECT_EQ(read_bytes, size); +} + +// Test that a send with MSG_DONTWAIT flag and buffer that larger than the send +// buffer size will not write the whole thing. +TEST_P(TcpSocketTest, LargeSendDontWait) { + // Allocate a buffer three times the size of the send buffer. We do this on + // with a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> buf(size); + + // Try to write the whole thing with MSG_DONTWAIT flag, which can + // return a partial write. + int n; + ASSERT_THAT(n = RetryEINTR(send)(s_, buf.data(), size, MSG_DONTWAIT), + SyscallSucceeds()); + + // We should have written something, but not the whole thing. + EXPECT_GT(n, 0); + EXPECT_LT(n, size); +} + +// Test that a send on a non-blocking socket with a buffer that larger than the +// send buffer will not write the whole thing at once. +TEST_P(TcpSocketTest, NonblockingLargeSend) { + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds()); + + // Allocate a buffer three times the size of the send buffer. We do this on + // with a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> buf(size); + + // Try to write the whole thing. + int n; + ASSERT_THAT(n = RetryEINTR(send)(s_, buf.data(), size, 0), SyscallSucceeds()); + + // We should have written something, but not the whole thing. + EXPECT_GT(n, 0); + EXPECT_LT(n, size); +} + +// Same test as above, but calls send instead of write. +TEST_P(TcpSocketTest, BlockingLargeSend_NoRandomSave) { + // Allocate a buffer three times the size of the send buffer. We do this on + // with a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> writebuf(size); + + // Start reading the response in a loop. + int read_bytes = 0; + ScopedThread t([this, &read_bytes]() { + // Avoid interrupting the blocking write in main thread. + const DisableSave ds; + + // Take ownership of the FD so that we close it on failure. This will + // unblock the blocking write below. + FileDescriptor fd(t_); + t_ = -1; + + char readbuf[2500] = {}; + int n = -1; + while (n != 0) { + ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)), + SyscallSucceeds()); + read_bytes += n; + } + }); + + // Try to send the whole thing. + int n; + ASSERT_THAT(n = SendFd(s_, writebuf.data(), size, 0), SyscallSucceeds()); + + // We should have written the whole thing. + EXPECT_EQ(n, size); + EXPECT_THAT(close(s_), SyscallSucceedsWithValue(0)); + s_ = -1; + t.Join(); + + // We should have read the whole thing. + EXPECT_EQ(read_bytes, size); +} + +// Test that polling on a socket with a full send buffer will block. +TEST_P(TcpSocketTest, PollWithFullBufferBlocks) { + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds()); + + // Set TCP_NODELAY, which will cause linux to fill the receive buffer from the + // send buffer as quickly as possibly. This way we can fill up both buffers + // faster. + constexpr int tcp_nodelay_flag = 1; + ASSERT_THAT(setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &tcp_nodelay_flag, + sizeof(tcp_nodelay_flag)), + SyscallSucceeds()); + + // Set a 256KB send/receive buffer. + int buf_sz = 1 << 18; + EXPECT_THAT(setsockopt(t_, SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &buf_sz, sizeof(buf_sz)), + SyscallSucceedsWithValue(0)); + + // Create a large buffer that will be used for sending. + std::vector<char> buf(1 << 16); + + // Write until we receive an error. + while (RetryEINTR(send)(s_, buf.data(), buf.size(), 0) != -1) { + // Sleep to give linux a chance to move data from the send buffer to the + // receive buffer. + usleep(10000); // 10ms. + } + // The last error should have been EWOULDBLOCK. + ASSERT_EQ(errno, EWOULDBLOCK); + + // Now polling on the FD with a timeout should return 0 corresponding to no + // FDs ready. + struct pollfd poll_fd = {s_, POLLOUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10), SyscallSucceedsWithValue(0)); +} + +TEST_P(TcpSocketTest, MsgTrunc) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT( + RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + // Check that we didn't get anything. + char zeros[sizeof(received_data)] = {}; + EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); +} + +// MSG_CTRUNC is a return flag but linux allows it to be set on input flags +// without returning an error. +TEST_P(TcpSocketTest, MsgTruncWithCtrunc) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, + MSG_TRUNC | MSG_CTRUNC), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + // Check that we didn't get anything. + char zeros[sizeof(received_data)] = {}; + EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); +} + +// This test will verify that MSG_CTRUNC doesn't do anything when specified +// on input. +TEST_P(TcpSocketTest, MsgTruncWithCtruncOnly) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, + MSG_CTRUNC), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + // Since MSG_CTRUNC here had no affect, it should not behave like MSG_TRUNC. + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); +} + +TEST_P(TcpSocketTest, MsgTruncLargeSize) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data) * 2] = {}; + ASSERT_THAT( + RetryEINTR(recv)(t_, received_data, sizeof(received_data), MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // Check that we didn't get anything. + char zeros[sizeof(received_data)] = {}; + EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); +} + +TEST_P(TcpSocketTest, MsgTruncPeek) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, + MSG_TRUNC | MSG_PEEK), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + // Check that we didn't get anything. + char zeros[sizeof(received_data)] = {}; + EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); + + // Check that we can still get all of the data. + ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(TcpSocketTest, NoDelayDefault) { + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(TcpSocketTest, SetNoDelay) { + ASSERT_THAT( + setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT(setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &kSockOptOff, + sizeof(kSockOptOff)), + SyscallSucceeds()); + + EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +#ifndef TCP_INQ +#define TCP_INQ 36 +#endif + +TEST_P(TcpSocketTest, TcpInqSetSockOpt) { + char buf[1024]; + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // TCP_INQ is disabled by default. + int val = -1; + socklen_t slen = sizeof(val); + EXPECT_THAT(getsockopt(t_, SOL_TCP, TCP_INQ, &val, &slen), + SyscallSucceedsWithValue(0)); + ASSERT_EQ(val, 0); + + // Try to set TCP_INQ. + val = 1; + EXPECT_THAT(setsockopt(t_, SOL_TCP, TCP_INQ, &val, sizeof(val)), + SyscallSucceedsWithValue(0)); + val = -1; + slen = sizeof(val); + EXPECT_THAT(getsockopt(t_, SOL_TCP, TCP_INQ, &val, &slen), + SyscallSucceedsWithValue(0)); + ASSERT_EQ(val, 1); + + // Try to unset TCP_INQ. + val = 0; + EXPECT_THAT(setsockopt(t_, SOL_TCP, TCP_INQ, &val, sizeof(val)), + SyscallSucceedsWithValue(0)); + val = -1; + slen = sizeof(val); + EXPECT_THAT(getsockopt(t_, SOL_TCP, TCP_INQ, &val, &slen), + SyscallSucceedsWithValue(0)); + ASSERT_EQ(val, 0); +} + +TEST_P(TcpSocketTest, TcpInq) { + char buf[1024]; + // Write more than one TCP segment. + int size = sizeof(buf); + int kChunk = sizeof(buf) / 4; + for (int i = 0; i < size; i += kChunk) { + ASSERT_THAT(RetryEINTR(write)(s_, buf, kChunk), + SyscallSucceedsWithValue(kChunk)); + } + + int val = 1; + kChunk = sizeof(buf) / 2; + EXPECT_THAT(setsockopt(t_, SOL_TCP, TCP_INQ, &val, sizeof(val)), + SyscallSucceedsWithValue(0)); + + // Wait when all data will be in the received queue. + while (true) { + ASSERT_THAT(ioctl(t_, TIOCINQ, &size), SyscallSucceeds()); + if (size == sizeof(buf)) { + break; + } + absl::SleepFor(absl::Milliseconds(10)); + } + + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(sizeof(int))); + size = sizeof(buf); + struct iovec iov; + for (int i = 0; size != 0; i += kChunk) { + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + iov.iov_base = buf; + iov.iov_len = kChunk; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ASSERT_THAT(RetryEINTR(recvmsg)(t_, &msg, 0), + SyscallSucceedsWithValue(kChunk)); + size -= kChunk; + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_TCP); + ASSERT_EQ(cmsg->cmsg_type, TCP_INQ); + + int inq = 0; + memcpy(&inq, CMSG_DATA(cmsg), sizeof(int)); + ASSERT_EQ(inq, size); + } +} + +TEST_P(TcpSocketTest, Tiocinq) { + char buf[1024]; + size_t size = sizeof(buf); + ASSERT_THAT(RetryEINTR(write)(s_, buf, size), SyscallSucceedsWithValue(size)); + + uint32_t seed = time(nullptr); + const size_t max_chunk = size / 10; + while (size > 0) { + size_t chunk = (rand_r(&seed) % max_chunk) + 1; + ssize_t read = RetryEINTR(recvfrom)(t_, buf, chunk, 0, nullptr, nullptr); + ASSERT_THAT(read, SyscallSucceeds()); + size -= read; + + int inq = 0; + ASSERT_THAT(ioctl(t_, TIOCINQ, &inq), SyscallSucceeds()); + ASSERT_EQ(inq, size); + } +} + +TEST_P(TcpSocketTest, TcpSCMPriority) { + char buf[1024]; + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + int val = 1; + EXPECT_THAT(setsockopt(t_, SOL_TCP, TCP_INQ, &val, sizeof(val)), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(setsockopt(t_, SOL_SOCKET, SO_TIMESTAMP, &val, sizeof(val)), + SyscallSucceedsWithValue(0)); + + struct msghdr msg = {}; + std::vector<char> control( + CMSG_SPACE(sizeof(struct timeval) + CMSG_SPACE(sizeof(int)))); + struct iovec iov; + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ASSERT_THAT(RetryEINTR(recvmsg)(t_, &msg, 0), + SyscallSucceedsWithValue(sizeof(buf))); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + // TODO(b/78348848): SO_TIMESTAMP isn't implemented for TCP sockets. + if (!IsRunningOnGvisor() || cmsg->cmsg_level == SOL_SOCKET) { + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval))); + + cmsg = CMSG_NXTHDR(&msg, cmsg); + ASSERT_NE(cmsg, nullptr); + } + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_TCP); + ASSERT_EQ(cmsg->cmsg_type, TCP_INQ); + + int inq = 0; + memcpy(&inq, CMSG_DATA(cmsg), sizeof(int)); + ASSERT_EQ(inq, 0); + + cmsg = CMSG_NXTHDR(&msg, cmsg); + ASSERT_EQ(cmsg, nullptr); +} + +INSTANTIATE_TEST_SUITE_P(AllInetTests, TcpSocketTest, + ::testing::Values(AF_INET, AF_INET6)); + +// Fixture for tests parameterized by address family that don't want the fixture +// to do things. +using SimpleTcpSocketTest = ::testing::TestWithParam<int>; + +TEST_P(SimpleTcpSocketTest, SendUnconnected) { + int fd; + ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + FileDescriptor sock_fd(fd); + + char data = '\0'; + EXPECT_THAT(RetryEINTR(send)(fd, &data, sizeof(data), 0), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(SimpleTcpSocketTest, SendtoWithoutAddressUnconnected) { + int fd; + ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + FileDescriptor sock_fd(fd); + + char data = '\0'; + EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, nullptr, 0), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(SimpleTcpSocketTest, SendtoWithAddressUnconnected) { + int fd; + ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + FileDescriptor sock_fd(fd); + + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + char data = '\0'; + EXPECT_THAT( + RetryEINTR(sendto)(fd, &data, sizeof(data), 0, + reinterpret_cast<sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(SimpleTcpSocketTest, GetPeerNameUnconnected) { + int fd; + ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + FileDescriptor sock_fd(fd); + + sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getpeername(fd, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(TcpSocketTest, FullBuffer) { + // Set both FDs to be blocking. + int flags = 0; + ASSERT_THAT(flags = fcntl(s_, F_GETFL), SyscallSucceeds()); + EXPECT_THAT(fcntl(s_, F_SETFL, flags & ~O_NONBLOCK), SyscallSucceeds()); + flags = 0; + ASSERT_THAT(flags = fcntl(t_, F_GETFL), SyscallSucceeds()); + EXPECT_THAT(fcntl(t_, F_SETFL, flags & ~O_NONBLOCK), SyscallSucceeds()); + + // 2500 was chosen as a small value that can be set on Linux. + int set_snd = 2500; + EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &set_snd, sizeof(set_snd)), + SyscallSucceedsWithValue(0)); + int get_snd = -1; + socklen_t get_snd_len = sizeof(get_snd); + EXPECT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &get_snd, &get_snd_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_snd_len, sizeof(get_snd)); + EXPECT_GT(get_snd, 0); + + // 2500 was chosen as a small value that can be set on Linux and gVisor. + int set_rcv = 2500; + EXPECT_THAT(setsockopt(t_, SOL_SOCKET, SO_RCVBUF, &set_rcv, sizeof(set_rcv)), + SyscallSucceedsWithValue(0)); + int get_rcv = -1; + socklen_t get_rcv_len = sizeof(get_rcv); + EXPECT_THAT(getsockopt(t_, SOL_SOCKET, SO_RCVBUF, &get_rcv, &get_rcv_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_rcv_len, sizeof(get_rcv)); + EXPECT_GE(get_rcv, 2500); + + // Quick sanity test. + EXPECT_LT(get_snd + get_rcv, 2500 * IOV_MAX); + + char data[2500] = {}; + std::vector<struct iovec> iovecs; + for (int i = 0; i < IOV_MAX; i++) { + struct iovec iov = {}; + iov.iov_base = data; + iov.iov_len = sizeof(data); + iovecs.push_back(iov); + } + ScopedThread t([this, &iovecs]() { + int result = -1; + EXPECT_THAT(result = RetryEINTR(writev)(s_, iovecs.data(), iovecs.size()), + SyscallSucceeds()); + EXPECT_GT(result, 1); + EXPECT_LT(result, sizeof(data) * iovecs.size()); + }); + + char recv = 0; + EXPECT_THAT(RetryEINTR(read)(t_, &recv, 1), SyscallSucceedsWithValue(1)); + EXPECT_THAT(close(t_), SyscallSucceedsWithValue(0)); + t_ = -1; +} + +TEST_P(TcpSocketTest, PollAfterShutdown) { + ScopedThread client_thread([this]() { + EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallSucceedsWithValue(0)); + struct pollfd poll_fd = {s_, POLLIN | POLLERR | POLLHUP, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); + }); + + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceedsWithValue(0)); + struct pollfd poll_fd = {t_, POLLIN | POLLERR | POLLHUP, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); +} + +TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListener) { + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + const FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EINPROGRESS)); + + // Now polling on the FD with a timeout should return 0 corresponding to no + // FDs ready. + struct pollfd poll_fd = {s.get(), POLLOUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); + + int err; + socklen_t optlen = sizeof(err); + ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen), + SyscallSucceeds()); + + EXPECT_EQ(err, ECONNREFUSED); +} + +TEST_P(SimpleTcpSocketTest, NonBlockingConnect) { + const FileDescriptor listener = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + // Bind to some port then start listening. + ASSERT_THAT( + bind(listener.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds()); + + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds()); + + ASSERT_THAT(getsockname(listener.get(), + reinterpret_cast<struct sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EINPROGRESS)); + + int t; + ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr), + SyscallSucceeds()); + + // Now polling on the FD with a timeout should return 0 corresponding to no + // FDs ready. + struct pollfd poll_fd = {s.get(), POLLOUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); + + int err; + socklen_t optlen = sizeof(err); + ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen), + SyscallSucceeds()); + + EXPECT_EQ(err, 0); + + EXPECT_THAT(close(t), SyscallSucceeds()); +} + +TEST_P(SimpleTcpSocketTest, NonBlockingConnectRemoteClose) { + const FileDescriptor listener = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + // Bind to some port then start listening. + ASSERT_THAT( + bind(listener.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds()); + + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + + ASSERT_THAT(getsockname(listener.get(), + reinterpret_cast<struct sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EINPROGRESS)); + + int t; + ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr), + SyscallSucceeds()); + + EXPECT_THAT(close(t), SyscallSucceeds()); + + // Now polling on the FD with a timeout should return 0 corresponding to no + // FDs ready. + struct pollfd poll_fd = {s.get(), POLLOUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EISCONN)); +} + +// Test that we get an ECONNREFUSED with a blocking socket when no one is +// listening on the other end. +TEST_P(SimpleTcpSocketTest, BlockingConnectRefused) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(ECONNREFUSED)); + + // Avoiding triggering save in destructor of s. + EXPECT_THAT(close(s.release()), SyscallSucceeds()); +} + +// Test that connecting to a non-listening port and thus receiving a RST is +// handled appropriately by the socket - the port that the socket was bound to +// is released and the expected error is returned. +TEST_P(SimpleTcpSocketTest, CleanupOnConnectionRefused) { + // Create a socket that is known to not be listening. As is it bound but not + // listening, when another socket connects to the port, it will refuse.. + FileDescriptor bound_s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage bound_addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t bound_addrlen = sizeof(bound_addr); + + ASSERT_THAT( + bind(bound_s.get(), reinterpret_cast<struct sockaddr*>(&bound_addr), + bound_addrlen), + SyscallSucceeds()); + + // Get the addresses the socket is bound to because the port is chosen by the + // stack. + ASSERT_THAT(getsockname(bound_s.get(), + reinterpret_cast<struct sockaddr*>(&bound_addr), + &bound_addrlen), + SyscallSucceeds()); + + // Create, initialize, and bind the socket that is used to test connecting to + // the non-listening port. + FileDescriptor client_s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + // Initialize client address to the loopback one. + sockaddr_storage client_addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t client_addrlen = sizeof(client_addr); + + ASSERT_THAT( + bind(client_s.get(), reinterpret_cast<struct sockaddr*>(&client_addr), + client_addrlen), + SyscallSucceeds()); + + ASSERT_THAT(getsockname(client_s.get(), + reinterpret_cast<struct sockaddr*>(&client_addr), + &client_addrlen), + SyscallSucceeds()); + + // Now the test: connect to the bound but not listening socket with the + // client socket. The bound socket should return a RST and cause the client + // socket to return an error and clean itself up immediately. + // The error being ECONNREFUSED diverges with RFC 793, page 37, but does what + // Linux does. + ASSERT_THAT(connect(client_s.get(), + reinterpret_cast<const struct sockaddr*>(&bound_addr), + bound_addrlen), + SyscallFailsWithErrno(ECONNREFUSED)); + + FileDescriptor new_s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Test binding to the address from the client socket. This should be okay + // if it was dropped correctly. + ASSERT_THAT( + bind(new_s.get(), reinterpret_cast<struct sockaddr*>(&client_addr), + client_addrlen), + SyscallSucceeds()); + + // Attempt #2, with the new socket and reused addr our connect should fail in + // the same way as before, not with an EADDRINUSE. + ASSERT_THAT(connect(client_s.get(), + reinterpret_cast<const struct sockaddr*>(&bound_addr), + bound_addrlen), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +// Test that we get an ECONNREFUSED with a nonblocking socket. +TEST_P(SimpleTcpSocketTest, NonBlockingConnectRefused) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EINPROGRESS)); + + // We don't need to specify any events to get POLLHUP or POLLERR as these + // are added before the poll. + struct pollfd poll_fd = {s.get(), /*events=*/0, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000), SyscallSucceedsWithValue(1)); + + // The ECONNREFUSED should cause us to be woken up with POLLHUP. + EXPECT_NE(poll_fd.revents & (POLLHUP | POLLERR), 0); + + // Avoiding triggering save in destructor of s. + EXPECT_THAT(close(s.release()), SyscallSucceeds()); +} + +// Test that setting a supported congestion control algorithm succeeds for an +// unconnected TCP socket +TEST_P(SimpleTcpSocketTest, SetCongestionControlSucceedsForSupported) { + // This is Linux's net/tcp.h TCP_CA_NAME_MAX. + const int kTcpCaNameMax = 16; + + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + { + const char kSetCC[kTcpCaNameMax] = "reno"; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC, + strlen(kSetCC)), + SyscallSucceedsWithValue(0)); + + char got_cc[kTcpCaNameMax]; + memset(got_cc, '1', sizeof(got_cc)); + socklen_t optlen = sizeof(got_cc); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + // We ignore optlen here as the linux kernel sets optlen to the lower of the + // size of the buffer passed in or kTcpCaNameMax and not the length of the + // congestion control algorithm's actual name. + EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax))); + } + { + const char kSetCC[kTcpCaNameMax] = "cubic"; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC, + strlen(kSetCC)), + SyscallSucceedsWithValue(0)); + + char got_cc[kTcpCaNameMax]; + memset(got_cc, '1', sizeof(got_cc)); + socklen_t optlen = sizeof(got_cc); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + // We ignore optlen here as the linux kernel sets optlen to the lower of the + // size of the buffer passed in or kTcpCaNameMax and not the length of the + // congestion control algorithm's actual name. + EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax))); + } +} + +// This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is +// consistent between linux and gvisor when the passed in buffer is smaller than +// kTcpCaNameMax. +TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionShortReadBuffer) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + { + // Verify that getsockopt/setsockopt work with buffers smaller than + // kTcpCaNameMax. + const char kSetCC[] = "cubic"; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC, + strlen(kSetCC)), + SyscallSucceedsWithValue(0)); + + char got_cc[sizeof(kSetCC)]; + socklen_t optlen = sizeof(got_cc); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(sizeof(got_cc), optlen); + EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(got_cc))); + } +} + +// This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is +// consistent between linux and gvisor when the passed in buffer is larger than +// kTcpCaNameMax. +TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionLargeReadBuffer) { + // This is Linux's net/tcp.h TCP_CA_NAME_MAX. + const int kTcpCaNameMax = 16; + + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + { + // Verify that getsockopt works with buffers larger than + // kTcpCaNameMax. + const char kSetCC[] = "cubic"; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC, + strlen(kSetCC)), + SyscallSucceedsWithValue(0)); + + char got_cc[kTcpCaNameMax + 5]; + socklen_t optlen = sizeof(got_cc); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + // Linux copies the minimum of kTcpCaNameMax or the length of the passed in + // buffer and sets optlen to the number of bytes actually copied + // irrespective of the actual length of the congestion control name. + EXPECT_EQ(kTcpCaNameMax, optlen); + EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC))); + } +} + +// Test that setting an unsupported congestion control algorithm fails for an +// unconnected TCP socket. +TEST_P(SimpleTcpSocketTest, SetCongestionControlFailsForUnsupported) { + // This is Linux's net/tcp.h TCP_CA_NAME_MAX. + const int kTcpCaNameMax = 16; + + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + char old_cc[kTcpCaNameMax]; + socklen_t optlen = sizeof(old_cc); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &old_cc, &optlen), + SyscallSucceedsWithValue(0)); + + const char kSetCC[] = "invalid_ca_kSetCC"; + ASSERT_THAT( + setsockopt(s.get(), SOL_TCP, TCP_CONGESTION, &kSetCC, strlen(kSetCC)), + SyscallFailsWithErrno(ENOENT)); + + char got_cc[kTcpCaNameMax]; + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), + SyscallSucceedsWithValue(0)); + // We ignore optlen here as the linux kernel sets optlen to the lower of the + // size of the buffer passed in or kTcpCaNameMax and not the length of the + // congestion control algorithm's actual name. + EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(kTcpCaNameMax))); +} + +TEST_P(SimpleTcpSocketTest, MaxSegDefault) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + constexpr int kDefaultMSS = 536; + int tcp_max_seg; + socklen_t optlen = sizeof(tcp_max_seg); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, &optlen), + SyscallSucceedsWithValue(0)); + + EXPECT_EQ(kDefaultMSS, tcp_max_seg); + EXPECT_EQ(sizeof(tcp_max_seg), optlen); +} + +TEST_P(SimpleTcpSocketTest, SetMaxSeg) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + constexpr int kDefaultMSS = 536; + constexpr int kTCPMaxSeg = 1024; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &kTCPMaxSeg, + sizeof(kTCPMaxSeg)), + SyscallSucceedsWithValue(0)); + + // Linux actually never returns the user_mss value. It will always return the + // default MSS value defined above for an unconnected socket and always return + // the actual current MSS for a connected one. + int optval; + socklen_t optlen = sizeof(optval); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &optval, &optlen), + SyscallSucceedsWithValue(0)); + + EXPECT_EQ(kDefaultMSS, optval); + EXPECT_EQ(sizeof(optval), optlen); +} + +TEST_P(SimpleTcpSocketTest, SetMaxSegFailsForInvalidMSSValues) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + { + constexpr int tcp_max_seg = 10; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, + sizeof(tcp_max_seg)), + SyscallFailsWithErrno(EINVAL)); + } + { + constexpr int tcp_max_seg = 75000; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, + sizeof(tcp_max_seg)), + SyscallFailsWithErrno(EINVAL)); + } +} + +TEST_P(SimpleTcpSocketTest, SetTCPUserTimeout) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + { + constexpr int kTCPUserTimeout = -1; + EXPECT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kTCPUserTimeout, sizeof(kTCPUserTimeout)), + SyscallFailsWithErrno(EINVAL)); + } + + // kTCPUserTimeout is in milliseconds. + constexpr int kTCPUserTimeout = 100; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kTCPUserTimeout, sizeof(kTCPUserTimeout)), + SyscallSucceedsWithValue(0)); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kTCPUserTimeout); +} + +TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptNeg) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // -ve TCP_DEFER_ACCEPT is same as setting it to zero. + constexpr int kNeg = -1; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &kNeg, sizeof(kNeg)), + SyscallSucceeds()); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); +} + +TEST_P(SimpleTcpSocketTest, GetTCPDeferAcceptDefault) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); +} + +TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptGreaterThanZero) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + // kTCPDeferAccept is in seconds. + // NOTE: linux translates seconds to # of retries and back from + // #of retries to seconds. Which means only certain values + // translate back exactly. That's why we use 3 here, a value of + // 5 will result in us getting back 7 instead of 5 in the + // getsockopt. + constexpr int kTCPDeferAccept = 3; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, + &kTCPDeferAccept, sizeof(kTCPDeferAccept)), + SyscallSucceeds()); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kTCPDeferAccept); +} + +TEST_P(SimpleTcpSocketTest, RecvOnClosedSocket) { + auto s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + char buf[1]; + EXPECT_THAT(recv(s.get(), buf, 0, 0), SyscallFailsWithErrno(ENOTCONN)); + EXPECT_THAT(recv(s.get(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(SimpleTcpSocketTest, TCPConnectSoRcvBufRace) { + auto s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + RetryEINTR(connect)(s.get(), reinterpret_cast<struct sockaddr*>(&addr), + addrlen); + int buf_sz = 1 << 18; + EXPECT_THAT( + setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(SimpleTcpSocketTest, SetTCPSynCntLessThanOne) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int default_syn_cnt = get; + + { + // TCP_SYNCNT less than 1 should be rejected with an EINVAL. + constexpr int kZero = 0; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kZero, sizeof(kZero)), + SyscallFailsWithErrno(EINVAL)); + + // TCP_SYNCNT less than 1 should be rejected with an EINVAL. + constexpr int kNeg = -1; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kNeg, sizeof(kNeg)), + SyscallFailsWithErrno(EINVAL)); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(default_syn_cnt, get); + } +} + +TEST_P(SimpleTcpSocketTest, GetTCPSynCntDefault) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + int get = -1; + socklen_t get_len = sizeof(get); + constexpr int kDefaultSynCnt = 6; + + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kDefaultSynCnt); +} + +TEST_P(SimpleTcpSocketTest, SetTCPSynCntGreaterThanOne) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + constexpr int kTCPSynCnt = 20; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt, + sizeof(kTCPSynCnt)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kTCPSynCnt); +} + +TEST_P(SimpleTcpSocketTest, SetTCPSynCntAboveMax) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int default_syn_cnt = get; + { + constexpr int kTCPSynCnt = 256; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt, + sizeof(kTCPSynCnt)), + SyscallFailsWithErrno(EINVAL)); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, default_syn_cnt); + } +} + +TEST_P(SimpleTcpSocketTest, SetTCPWindowClampBelowMinRcvBuf) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Discover minimum receive buf by setting a really low value + // for the receive buffer. + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)), + SyscallSucceeds()); + + // Now retrieve the minimum value for SO_RCVBUF as the set above should + // have caused SO_RCVBUF for the socket to be set to the minimum. + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int min_so_rcvbuf = get; + + { + // TCP_WINDOW_CLAMP less than min_so_rcvbuf/2 should be set to + // min_so_rcvbuf/2. + int below_half_min_rcvbuf = min_so_rcvbuf / 2 - 1; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &below_half_min_rcvbuf, sizeof(below_half_min_rcvbuf)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(min_so_rcvbuf / 2, get); + } +} + +TEST_P(SimpleTcpSocketTest, SetTCPWindowClampZeroClosedSocket) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + constexpr int kZero = 0; + ASSERT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &kZero, sizeof(kZero)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kZero); +} + +TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Discover minimum receive buf by setting a really low value + // for the receive buffer. + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)), + SyscallSucceeds()); + + // Now retrieve the minimum value for SO_RCVBUF as the set above should + // have caused SO_RCVBUF for the socket to be set to the minimum. + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int min_so_rcvbuf = get; + + { + int above_half_min_rcv_buf = min_so_rcvbuf / 2 + 1; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &above_half_min_rcv_buf, sizeof(above_half_min_rcv_buf)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(above_half_min_rcv_buf, get); + } +} + +INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, + ::testing::Values(AF_INET, AF_INET6)); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/tgkill.cc b/test/syscalls/linux/tgkill.cc new file mode 100644 index 000000000..80acae5de --- /dev/null +++ b/test/syscalls/linux/tgkill.cc @@ -0,0 +1,48 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(TgkillTest, InvalidTID) { + EXPECT_THAT(tgkill(getpid(), -1, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(tgkill(getpid(), 0, 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TgkillTest, InvalidTGID) { + EXPECT_THAT(tgkill(-1, gettid(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(tgkill(0, gettid(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TgkillTest, ValidInput) { + EXPECT_THAT(tgkill(getpid(), gettid(), 0), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/time.cc b/test/syscalls/linux/time.cc new file mode 100644 index 000000000..e75bba669 --- /dev/null +++ b/test/syscalls/linux/time.cc @@ -0,0 +1,107 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <time.h> + +#include "gtest/gtest.h" +#include "test/util/proc_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr long kFudgeSeconds = 5; + +#if defined(__x86_64__) || defined(__i386__) +// Mimics the time(2) wrapper from glibc prior to 2.15. +time_t vsyscall_time(time_t* t) { + constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; + return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t); +} + +TEST(TimeTest, VsyscallTime_Succeeds) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + time_t t1, t2; + + { + const DisableSave ds; // Timing assertions. + EXPECT_THAT(time(&t1), SyscallSucceeds()); + EXPECT_THAT(vsyscall_time(&t2), SyscallSucceeds()); + } + + // Time should be monotonic. + EXPECT_LE(static_cast<long>(t1), static_cast<long>(t2)); + + // Check that it's within kFudge seconds. + EXPECT_LE(static_cast<long>(t2), static_cast<long>(t1) + kFudgeSeconds); + + // Redo with save. + EXPECT_THAT(time(&t1), SyscallSucceeds()); + EXPECT_THAT(vsyscall_time(&t2), SyscallSucceeds()); + + // Time should be monotonic. + EXPECT_LE(static_cast<long>(t1), static_cast<long>(t2)); +} + +TEST(TimeTest, VsyscallTime_InvalidAddressSIGSEGV) { + EXPECT_EXIT(vsyscall_time(reinterpret_cast<time_t*>(0x1)), + ::testing::KilledBySignal(SIGSEGV), ""); +} + +// Mimics the gettimeofday(2) wrapper from the Go runtime <= 1.2. +int vsyscall_gettimeofday(struct timeval* tv, struct timezone* tz) { + constexpr uint64_t kVsyscallGettimeofdayEntry = 0xffffffffff600000; + return reinterpret_cast<int (*)(struct timeval*, struct timezone*)>( + kVsyscallGettimeofdayEntry)(tv, tz); +} + +TEST(TimeTest, VsyscallGettimeofday_Succeeds) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + struct timeval tv1, tv2; + struct timezone tz1, tz2; + + { + const DisableSave ds; // Timing assertions. + EXPECT_THAT(gettimeofday(&tv1, &tz1), SyscallSucceeds()); + EXPECT_THAT(vsyscall_gettimeofday(&tv2, &tz2), SyscallSucceeds()); + } + + // See above. + EXPECT_LE(static_cast<long>(tv1.tv_sec), static_cast<long>(tv2.tv_sec)); + EXPECT_LE(static_cast<long>(tv2.tv_sec), + static_cast<long>(tv1.tv_sec) + kFudgeSeconds); + + // Redo with save. + EXPECT_THAT(gettimeofday(&tv1, &tz1), SyscallSucceeds()); + EXPECT_THAT(vsyscall_gettimeofday(&tv2, &tz2), SyscallSucceeds()); +} + +TEST(TimeTest, VsyscallGettimeofday_InvalidAddressSIGSEGV) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + EXPECT_EXIT(vsyscall_gettimeofday(reinterpret_cast<struct timeval*>(0x1), + reinterpret_cast<struct timezone*>(0x1)), + ::testing::KilledBySignal(SIGSEGV), ""); +} +#endif + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/timerfd.cc b/test/syscalls/linux/timerfd.cc new file mode 100644 index 000000000..c4f8fdd7a --- /dev/null +++ b/test/syscalls/linux/timerfd.cc @@ -0,0 +1,273 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <poll.h> +#include <sys/timerfd.h> +#include <time.h> + +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Wrapper around timerfd_create(2) that returns a FileDescriptor. +PosixErrorOr<FileDescriptor> TimerfdCreate(int clockid, int flags) { + int fd = timerfd_create(clockid, flags); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, "timerfd_create failed"); + } + return FileDescriptor(fd); +} + +// In tests that race a timerfd with a sleep, some slack is required because: +// +// - Timerfd expirations are asynchronous with respect to nanosleeps. +// +// - Because clock_gettime(CLOCK_MONOTONIC) is implemented through the VDSO, +// it technically uses a closely-related, but distinct, time domain from the +// CLOCK_MONOTONIC used to trigger timerfd expirations. The same applies to +// CLOCK_BOOTTIME which is an alias for CLOCK_MONOTONIC. +absl::Duration TimerSlack() { return absl::Milliseconds(500); } + +class TimerfdTest : public ::testing::TestWithParam<int> {}; + +TEST_P(TimerfdTest, IsInitiallyStopped) { + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + struct itimerspec its = {}; + ASSERT_THAT(timerfd_gettime(tfd.get(), &its), SyscallSucceeds()); + EXPECT_EQ(0, its.it_value.tv_sec); + EXPECT_EQ(0, its.it_value.tv_nsec); +} + +TEST_P(TimerfdTest, SingleShot) { + constexpr absl::Duration kDelay = absl::Seconds(1); + + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(kDelay); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // The timer should fire exactly once since the interval is zero. + absl::SleepFor(kDelay + TimerSlack()); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_EQ(1, val); +} + +TEST_P(TimerfdTest, Periodic) { + constexpr absl::Duration kDelay = absl::Seconds(1); + constexpr int kPeriods = 3; + + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(kDelay); + its.it_interval = absl::ToTimespec(kDelay); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // Expect to see at least kPeriods expirations. More may occur due to the + // timer slack, or due to delays from scheduling or save/restore. + absl::SleepFor(kPeriods * kDelay + TimerSlack()); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_GE(val, kPeriods); +} + +TEST_P(TimerfdTest, BlockingRead) { + constexpr absl::Duration kDelay = absl::Seconds(3); + + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + struct itimerspec its = {}; + its.it_value.tv_sec = absl::ToInt64Seconds(kDelay); + auto const start_time = absl::Now(); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // read should block until the timer fires. + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + auto const end_time = absl::Now(); + EXPECT_EQ(1, val); + EXPECT_GE((end_time - start_time) + TimerSlack(), kDelay); +} + +TEST_P(TimerfdTest, NonblockingRead_NoRandomSave) { + constexpr absl::Duration kDelay = absl::Seconds(5); + + auto const tfd = + ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK)); + + // Since the timer is initially disabled and has never fired, read should + // return EAGAIN. + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallFailsWithErrno(EAGAIN)); + + DisableSave ds; // Timing-sensitive. + + // Arm the timer. + struct itimerspec its = {}; + its.it_value.tv_sec = absl::ToInt64Seconds(kDelay); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // Since the timer has not yet fired, read should return EAGAIN. + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallFailsWithErrno(EAGAIN)); + + ds.reset(); // No longer timing-sensitive. + + // After the timer fires, read should indicate 1 expiration. + absl::SleepFor(kDelay + TimerSlack()); + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_EQ(1, val); + + // The successful read should have reset the number of expirations. + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(TimerfdTest, BlockingPoll_SetTimeResetsExpirations) { + constexpr absl::Duration kDelay = absl::Seconds(3); + + auto const tfd = + ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK)); + struct itimerspec its = {}; + its.it_value.tv_sec = absl::ToInt64Seconds(kDelay); + auto const start_time = absl::Now(); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // poll should block until the timer fires. + struct pollfd pfd = {}; + pfd.fd = tfd.get(); + pfd.events = POLLIN; + ASSERT_THAT(poll(&pfd, /* nfds = */ 1, + /* timeout = */ 2 * absl::ToInt64Seconds(kDelay) * 1000), + SyscallSucceedsWithValue(1)); + auto const end_time = absl::Now(); + EXPECT_EQ(POLLIN, pfd.revents); + EXPECT_GE((end_time - start_time) + TimerSlack(), kDelay); + + // Call timerfd_settime again with a value of 0. This should reset the number + // of expirations to 0, causing read to return EAGAIN since the timerfd is + // non-blocking. + its.it_value.tv_sec = 0; + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(TimerfdTest, SetAbsoluteTime) { + constexpr absl::Duration kDelay = absl::Seconds(3); + + // Use a non-blocking timerfd so that if TFD_TIMER_ABSTIME is incorrectly + // non-functional, we get EAGAIN rather than a test timeout. + auto const tfd = + ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK)); + struct itimerspec its = {}; + ASSERT_THAT(clock_gettime(GetParam(), &its.it_value), SyscallSucceeds()); + its.it_value.tv_sec += absl::ToInt64Seconds(kDelay); + ASSERT_THAT(timerfd_settime(tfd.get(), TFD_TIMER_ABSTIME, &its, nullptr), + SyscallSucceeds()); + + absl::SleepFor(kDelay + TimerSlack()); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_EQ(1, val); +} + +TEST_P(TimerfdTest, IllegalSeek) { + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + if (!IsRunningWithVFS1()) { + EXPECT_THAT(lseek(tfd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + } +} + +TEST_P(TimerfdTest, IllegalPread) { + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + int val; + EXPECT_THAT(pread(tfd.get(), &val, sizeof(val), 0), + SyscallFailsWithErrno(ESPIPE)); +} + +TEST_P(TimerfdTest, IllegalPwrite) { + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + EXPECT_THAT(pwrite(tfd.get(), "x", 1, 0), SyscallFailsWithErrno(ESPIPE)); + if (!IsRunningWithVFS1()) { + } +} + +TEST_P(TimerfdTest, IllegalWrite) { + auto const tfd = + ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK)); + uint64_t val = 0; + EXPECT_THAT(write(tfd.get(), &val, sizeof(val)), + SyscallFailsWithErrno(EINVAL)); +} + +std::string PrintClockId(::testing::TestParamInfo<int> info) { + switch (info.param) { + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + default: + return absl::StrCat(info.param); + } +} + +INSTANTIATE_TEST_SUITE_P(AllTimerTypes, TimerfdTest, + ::testing::Values(CLOCK_MONOTONIC, CLOCK_BOOTTIME), + PrintClockId); + +TEST(TimerfdClockRealtimeTest, ClockRealtime) { + // Since CLOCK_REALTIME can, by definition, change, we can't make any + // non-flaky assertions about the amount of time it takes for a + // CLOCK_REALTIME-based timer to expire. Just check that it expires at all, + // and hope it happens before the test times out. + constexpr int kDelaySecs = 1; + + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_REALTIME, 0)); + struct itimerspec its = {}; + its.it_value.tv_sec = kDelaySecs; + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_EQ(1, val); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc new file mode 100644 index 000000000..4b3c44527 --- /dev/null +++ b/test/syscalls/linux/timers.cc @@ -0,0 +1,662 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <signal.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <syscall.h> +#include <time.h> +#include <unistd.h> + +#include <atomic> + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/cleanup.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +ABSL_FLAG(bool, timers_test_sleep, false, + "If true, sleep forever instead of running tests."); + +using ::testing::_; +using ::testing::AnyOf; + +namespace gvisor { +namespace testing { +namespace { + +#ifndef CPUCLOCK_PROF +#define CPUCLOCK_PROF 0 +#endif // CPUCLOCK_PROF + +PosixErrorOr<absl::Duration> ProcessCPUTime(pid_t pid) { + // Use pid-specific CPUCLOCK_PROF, which is the clock used to enforce + // RLIMIT_CPU. + clockid_t clockid = (~static_cast<clockid_t>(pid) << 3) | CPUCLOCK_PROF; + + struct timespec ts; + int ret = clock_gettime(clockid, &ts); + if (ret < 0) { + return PosixError(errno, "clock_gettime failed"); + } + + return absl::DurationFromTimespec(ts); +} + +void NoopSignalHandler(int signo) { + TEST_CHECK_MSG(SIGXCPU == signo, + "NoopSigHandler did not receive expected signal"); +} + +void UninstallingSignalHandler(int signo) { + TEST_CHECK_MSG(SIGXCPU == signo, + "UninstallingSignalHandler did not receive expected signal"); + struct sigaction rev_action; + rev_action.sa_handler = SIG_DFL; + rev_action.sa_flags = 0; + sigemptyset(&rev_action.sa_mask); + sigaction(SIGXCPU, &rev_action, nullptr); +} + +TEST(TimerTest, ProcessKilledOnCPUSoftLimit) { + constexpr absl::Duration kSoftLimit = absl::Seconds(1); + constexpr absl::Duration kHardLimit = absl::Seconds(3); + + struct rlimit cpu_limits; + cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit); + cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit); + + int pid = fork(); + MaybeSave(); + if (pid == 0) { + TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0); + MaybeSave(); + for (;;) { + } + } + ASSERT_THAT(pid, SyscallSucceeds()); + auto c = Cleanup([pid] { + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(WTERMSIG(status), SIGXCPU); + }); + + // Wait for the child to exit, but do not reap it. This will allow us to check + // its CPU usage while it is zombied. + EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT), + SyscallSucceeds()); + + // Assert that the child spent 1s of CPU before getting killed. + // + // We must be careful to use CPUCLOCK_PROF, the same clock used for RLIMIT_CPU + // enforcement, to get correct results. Note that this is slightly different + // from rusage-reported CPU usage: + // + // RLIMIT_CPU, CPUCLOCK_PROF use kernel/sched/cputime.c:thread_group_cputime. + // rusage uses kernel/sched/cputime.c:thread_group_cputime_adjusted. + absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid)); + EXPECT_GE(cpu, kSoftLimit); + + // Child did not make it to the hard limit. + // + // Linux sends SIGXCPU synchronously with CPU tick updates. See + // kernel/time/timer.c:update_process_times: + // => account_process_tick // update task CPU usage. + // => run_posix_cpu_timers // enforce RLIMIT_CPU, sending signal. + // + // Thus, only chance for this to flake is if the system time required to + // deliver the signal exceeds 2s. + EXPECT_LT(cpu, kHardLimit); +} + +TEST(TimerTest, ProcessPingedRepeatedlyAfterCPUSoftLimit) { + struct sigaction new_action; + new_action.sa_handler = UninstallingSignalHandler; + new_action.sa_flags = 0; + sigemptyset(&new_action.sa_mask); + + constexpr absl::Duration kSoftLimit = absl::Seconds(1); + constexpr absl::Duration kHardLimit = absl::Seconds(10); + + struct rlimit cpu_limits; + cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit); + cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit); + + int pid = fork(); + MaybeSave(); + if (pid == 0) { + TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0); + MaybeSave(); + TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0); + MaybeSave(); + for (;;) { + } + } + ASSERT_THAT(pid, SyscallSucceeds()); + auto c = Cleanup([pid] { + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(WTERMSIG(status), SIGXCPU); + }); + + // Wait for the child to exit, but do not reap it. This will allow us to check + // its CPU usage while it is zombied. + EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT), + SyscallSucceeds()); + + absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid)); + // Following signals come every CPU second. + EXPECT_GE(cpu, kSoftLimit + absl::Seconds(1)); + + // Child did not make it to the hard limit. + // + // As above, should not flake. + EXPECT_LT(cpu, kHardLimit); +} + +TEST(TimerTest, ProcessKilledOnCPUHardLimit) { + struct sigaction new_action; + new_action.sa_handler = NoopSignalHandler; + new_action.sa_flags = 0; + sigemptyset(&new_action.sa_mask); + + constexpr absl::Duration kSoftLimit = absl::Seconds(1); + constexpr absl::Duration kHardLimit = absl::Seconds(3); + + struct rlimit cpu_limits; + cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit); + cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit); + + int pid = fork(); + MaybeSave(); + if (pid == 0) { + TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0); + MaybeSave(); + TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0); + MaybeSave(); + for (;;) { + } + } + ASSERT_THAT(pid, SyscallSucceeds()); + auto c = Cleanup([pid] { + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(WTERMSIG(status), SIGKILL); + }); + + // Wait for the child to exit, but do not reap it. This will allow us to check + // its CPU usage while it is zombied. + EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT), + SyscallSucceeds()); + + absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid)); + EXPECT_GE(cpu, kHardLimit); +} + +// RAII type for a kernel "POSIX" interval timer. (The kernel provides system +// calls such as timer_create that behave very similarly, but not identically, +// to those described by timer_create(2); in particular, the kernel does not +// implement SIGEV_THREAD. glibc builds POSIX-compliant interval timers based on +// these kernel interval timers.) +// +// Compare implementation to FileDescriptor. +class IntervalTimer { + public: + IntervalTimer() = default; + + explicit IntervalTimer(int id) { set_id(id); } + + IntervalTimer(IntervalTimer&& orig) : id_(orig.release()) {} + + IntervalTimer& operator=(IntervalTimer&& orig) { + if (this == &orig) return *this; + reset(orig.release()); + return *this; + } + + IntervalTimer(const IntervalTimer& other) = delete; + IntervalTimer& operator=(const IntervalTimer& other) = delete; + + ~IntervalTimer() { reset(); } + + int get() const { return id_; } + + int release() { + int const id = id_; + id_ = -1; + return id; + } + + void reset() { reset(-1); } + + void reset(int id) { + if (id_ >= 0) { + TEST_PCHECK(syscall(SYS_timer_delete, id_) == 0); + MaybeSave(); + } + set_id(id); + } + + PosixErrorOr<struct itimerspec> Set( + int flags, const struct itimerspec& new_value) const { + struct itimerspec old_value = {}; + if (syscall(SYS_timer_settime, id_, flags, &new_value, &old_value) < 0) { + return PosixError(errno, "timer_settime"); + } + MaybeSave(); + return old_value; + } + + PosixErrorOr<struct itimerspec> Get() const { + struct itimerspec curr_value = {}; + if (syscall(SYS_timer_gettime, id_, &curr_value) < 0) { + return PosixError(errno, "timer_gettime"); + } + MaybeSave(); + return curr_value; + } + + PosixErrorOr<int> Overruns() const { + int rv = syscall(SYS_timer_getoverrun, id_); + if (rv < 0) { + return PosixError(errno, "timer_getoverrun"); + } + MaybeSave(); + return rv; + } + + private: + void set_id(int id) { id_ = std::max(id, -1); } + + // Kernel timer_t is int; glibc timer_t is void*. + int id_ = -1; +}; + +PosixErrorOr<IntervalTimer> TimerCreate(clockid_t clockid, + const struct sigevent& sev) { + int timerid; + int ret = syscall(SYS_timer_create, clockid, &sev, &timerid); + if (ret < 0) { + return PosixError(errno, "timer_create"); + } + if (ret > 0) { + return PosixError(EINVAL, "timer_create should never return positive"); + } + MaybeSave(); + return IntervalTimer(timerid); +} + +// See timerfd.cc:TimerSlack() for rationale. +constexpr absl::Duration kTimerSlack = absl::Milliseconds(500); + +TEST(IntervalTimerTest, IsInitiallyStopped) { + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_NONE; + const auto timer = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + const struct itimerspec its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get()); + EXPECT_EQ(0, its.it_value.tv_sec); + EXPECT_EQ(0, its.it_value.tv_nsec); +} + +// Kernel can create multiple timers without issue. +// +// Regression test for gvisor.dev/issue/1738. +TEST(IntervalTimerTest, MultipleTimers) { + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_NONE; + const auto timer1 = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + const auto timer2 = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); +} + +TEST(IntervalTimerTest, SingleShotSilent) { + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_NONE; + const auto timer = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kDelay = absl::Seconds(1); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(kDelay); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // The timer should count down to 0 and stop since the interval is zero. No + // overruns should be counted. + absl::SleepFor(kDelay + kTimerSlack); + its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get()); + EXPECT_EQ(0, its.it_value.tv_sec); + EXPECT_EQ(0, its.it_value.tv_nsec); + EXPECT_THAT(timer.Overruns(), IsPosixErrorOkAndHolds(0)); +} + +TEST(IntervalTimerTest, PeriodicSilent) { + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_NONE; + const auto timer = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + absl::SleepFor(kPeriod * 3 + kTimerSlack); + + // The timer should still be running. + its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get()); + EXPECT_TRUE(its.it_value.tv_nsec != 0 || its.it_value.tv_sec != 0); + + // Timer expirations are not counted as overruns under SIGEV_NONE. + EXPECT_THAT(timer.Overruns(), IsPosixErrorOkAndHolds(0)); +} + +std::atomic<int> counted_signals; + +void IntervalTimerCountingSignalHandler(int sig, siginfo_t* info, + void* ucontext) { + counted_signals.fetch_add(1 + info->si_overrun); +} + +TEST(IntervalTimerTest, PeriodicGroupDirectedSignal) { + constexpr int kSigno = SIGUSR1; + constexpr int kSigvalue = 42; + + // Install our signal handler. + counted_signals.store(0); + struct sigaction sa = {}; + sa.sa_sigaction = IntervalTimerCountingSignalHandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + const auto scoped_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa)); + + // Ensure that kSigno is unblocked on at least one thread. + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, kSigno)); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + absl::SleepFor(kPeriod * kCycles + kTimerSlack); + EXPECT_GE(counted_signals.load(), kCycles); +} + +// From Linux's include/uapi/asm-generic/siginfo.h. +#ifndef sigev_notify_thread_id +#define sigev_notify_thread_id _sigev_un._tid +#endif + +TEST(IntervalTimerTest, PeriodicThreadDirectedSignal) { + constexpr int kSigno = SIGUSR1; + constexpr int kSigvalue = 42; + + // Block kSigno so that we can accumulate overruns. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask)); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + absl::SleepFor(kPeriod * kCycles + kTimerSlack); + + // At least kCycles expirations should have occurred, resulting in kCycles-1 + // overruns (the first expiration sent the signal successfully). + siginfo_t si; + struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration()); + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + EXPECT_EQ(si.si_code, SI_TIMER); + EXPECT_EQ(si.si_timerid, timer.get()); + EXPECT_GE(si.si_overrun, kCycles - 1); + EXPECT_EQ(si.si_int, kSigvalue); + + // Kill the timer, then drain any additional signal it may have enqueued. We + // can't do this before the preceding sigtimedwait because stopping or + // deleting the timer resets si_overrun to 0. + timer.reset(); + sigtimedwait(&mask, &si, &zero_ts); +} + +TEST(IntervalTimerTest, OtherThreadGroup) { + constexpr int kSigno = SIGUSR1; + + // Create a subprocess that does nothing until killed. + pid_t child_pid; + const auto sp = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + "/proc/self/exe", ExecveArray({"timers", "--timers_test_sleep"}), + ExecveArray(), &child_pid, nullptr)); + + // Verify that we can't create a timer that would send signals to it. + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_notify_thread_id = child_pid; + EXPECT_THAT(TimerCreate(CLOCK_MONOTONIC, sev), PosixErrorIs(EINVAL, _)); +} + +TEST(IntervalTimerTest, RealTimeSignalsAreNotDuplicated) { + const int kSigno = SIGRTMIN; + constexpr int kSigvalue = 42; + + // Block signo so that we can accumulate overruns. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + const auto scoped_sigmask = ScopedSignalMask(SIG_BLOCK, mask); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + sev.sigev_notify_thread_id = gettid(); + const auto timer = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + absl::SleepFor(kPeriod * kCycles + kTimerSlack); + + // Stop the timer so that no further signals are enqueued after sigtimedwait. + struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration()); + its.it_value = its.it_interval = zero_ts; + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // The timer should have sent only a single signal, even though the kernel + // supports enqueueing of multiple RT signals. + siginfo_t si; + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + EXPECT_EQ(si.si_code, SI_TIMER); + EXPECT_EQ(si.si_timerid, timer.get()); + // si_overrun was reset by timer_settime. + EXPECT_EQ(si.si_overrun, 0); + EXPECT_EQ(si.si_int, kSigvalue); + EXPECT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(IntervalTimerTest, AlreadyPendingSignal) { + constexpr int kSigno = SIGUSR1; + constexpr int kSigvalue = 42; + + // Block kSigno so that we can accumulate overruns. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask)); + + // Send ourselves a signal, preventing the timer from enqueuing. + ASSERT_THAT(tgkill(getpid(), gettid(), kSigno), SyscallSucceeds()); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // End the sleep one cycle short; we will sleep for one more cycle below. + absl::SleepFor(kPeriod * (kCycles - 1)); + + // Dequeue the first signal, which we sent to ourselves with tgkill. + siginfo_t si; + struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration()); + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + // glibc sigtimedwait silently replaces SI_TKILL with SI_USER: + // sysdeps/unix/sysv/linux/sigtimedwait.c:__sigtimedwait(). This isn't + // documented, so we don't depend on it. + EXPECT_THAT(si.si_code, AnyOf(SI_USER, SI_TKILL)); + + // Sleep for 1 more cycle to give the timer time to send a signal. + absl::SleepFor(kPeriod + kTimerSlack); + + // At least kCycles expirations should have occurred, resulting in kCycles-1 + // overruns (the last expiration sent the signal successfully). + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + EXPECT_EQ(si.si_code, SI_TIMER); + EXPECT_EQ(si.si_timerid, timer.get()); + EXPECT_GE(si.si_overrun, kCycles - 1); + EXPECT_EQ(si.si_int, kSigvalue); + + // Kill the timer, then drain any additional signal it may have enqueued. We + // can't do this before the preceding sigtimedwait because stopping or + // deleting the timer resets si_overrun to 0. + timer.reset(); + sigtimedwait(&mask, &si, &zero_ts); +} + +TEST(IntervalTimerTest, IgnoredSignalCountsAsOverrun) { + constexpr int kSigno = SIGUSR1; + constexpr int kSigvalue = 42; + + // Ignore kSigno. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + const auto scoped_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa)); + + // Unblock kSigno so that ignored signals will be discarded. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, mask)); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // End the sleep one cycle short; we will sleep for one more cycle below. + absl::SleepFor(kPeriod * (kCycles - 1)); + + // Block kSigno so that ignored signals will be enqueued. + scoped_sigmask.Release()(); + scoped_sigmask = ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask)); + + // Sleep for 1 more cycle to give the timer time to send a signal. + absl::SleepFor(kPeriod + kTimerSlack); + + // At least kCycles expirations should have occurred, resulting in kCycles-1 + // overruns (the last expiration sent the signal successfully). + siginfo_t si; + struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration()); + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + EXPECT_EQ(si.si_code, SI_TIMER); + EXPECT_EQ(si.si_timerid, timer.get()); + EXPECT_GE(si.si_overrun, kCycles - 1); + EXPECT_EQ(si.si_int, kSigvalue); + + // Kill the timer, then drain any additional signal it may have enqueued. We + // can't do this before the preceding sigtimedwait because stopping or + // deleting the timer resets si_overrun to 0. + timer.reset(); + sigtimedwait(&mask, &si, &zero_ts); +} + +} // namespace +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (absl::GetFlag(FLAGS_timers_test_sleep)) { + while (true) { + absl::SleepFor(absl::Seconds(10)); + } + } + + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/tkill.cc b/test/syscalls/linux/tkill.cc new file mode 100644 index 000000000..8d8ebbb24 --- /dev/null +++ b/test/syscalls/linux/tkill.cc @@ -0,0 +1,75 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <cerrno> +#include <csignal> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +static int tkill(pid_t tid, int sig) { + int ret; + do { + // NOTE(b/25434735): tkill(2) could return EAGAIN for RT signals. + ret = syscall(SYS_tkill, tid, sig); + } while (ret == -1 && errno == EAGAIN); + return ret; +} + +TEST(TkillTest, InvalidTID) { + EXPECT_THAT(tkill(-1, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(tkill(0, 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TkillTest, ValidTID) { + EXPECT_THAT(tkill(gettid(), 0), SyscallSucceeds()); +} + +void SigHandler(int sig, siginfo_t* info, void* context) { + TEST_CHECK(sig == SIGRTMAX); + TEST_CHECK(info->si_pid == getpid()); + TEST_CHECK(info->si_uid == getuid()); + TEST_CHECK(info->si_code == SI_TKILL); +} + +// Test with a real signal. Regression test for b/24790092. +TEST(TkillTest, ValidTIDAndRealSignal) { + struct sigaction sa; + sa.sa_sigaction = SigHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + ASSERT_THAT(sigaction(SIGRTMAX, &sa, nullptr), SyscallSucceeds()); + // InitGoogle blocks all RT signals, so we need undo it. + sigset_t unblock; + sigemptyset(&unblock); + sigaddset(&unblock, SIGRTMAX); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &unblock, nullptr), SyscallSucceeds()); + EXPECT_THAT(tkill(gettid(), SIGRTMAX), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/truncate.cc b/test/syscalls/linux/truncate.cc new file mode 100644 index 000000000..c988c6380 --- /dev/null +++ b/test/syscalls/linux/truncate.cc @@ -0,0 +1,218 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <signal.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/vfs.h> +#include <time.h> +#include <unistd.h> + +#include <iostream> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class FixtureTruncateTest : public FileTest { + void SetUp() override { FileTest::SetUp(); } +}; + +TEST_F(FixtureTruncateTest, Truncate) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + // Check that it starts at size zero. + struct stat buf; + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Stay at size zero. + EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Grow to ten bytes. + EXPECT_THAT(truncate(test_file_name_.c_str(), 10), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 10); + + // Can't be truncated to a negative number. + EXPECT_THAT(truncate(test_file_name_.c_str(), -1), + SyscallFailsWithErrno(EINVAL)); + + // Try growing past the file size limit. + sigset_t new_mask; + sigemptyset(&new_mask); + sigaddset(&new_mask, SIGXFSZ); + sigprocmask(SIG_BLOCK, &new_mask, nullptr); + struct timespec timelimit; + timelimit.tv_sec = 10; + timelimit.tv_nsec = 0; + + struct rlimit setlim; + setlim.rlim_cur = 1024; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + EXPECT_THAT(truncate(test_file_name_.c_str(), 1025), + SyscallFailsWithErrno(EFBIG)); + EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds()); + + // Shrink back down to zero. + EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); +} + +TEST_F(FixtureTruncateTest, Ftruncate) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + // Check that it starts at size zero. + struct stat buf; + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Stay at size zero. + EXPECT_THAT(ftruncate(test_file_fd_.get(), 0), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Grow to ten bytes. + EXPECT_THAT(ftruncate(test_file_fd_.get(), 10), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 10); + + // Can't be truncated to a negative number. + EXPECT_THAT(ftruncate(test_file_fd_.get(), -1), + SyscallFailsWithErrno(EINVAL)); + + // Try growing past the file size limit. + sigset_t new_mask; + sigemptyset(&new_mask); + sigaddset(&new_mask, SIGXFSZ); + sigprocmask(SIG_BLOCK, &new_mask, nullptr); + struct timespec timelimit; + timelimit.tv_sec = 10; + timelimit.tv_nsec = 0; + + struct rlimit setlim; + setlim.rlim_cur = 1024; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + EXPECT_THAT(ftruncate(test_file_fd_.get(), 1025), + SyscallFailsWithErrno(EFBIG)); + EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds()); + + // Shrink back down to zero. + EXPECT_THAT(ftruncate(test_file_fd_.get(), 0), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); +} + +// Truncating a file down clears that portion of the file. +TEST_F(FixtureTruncateTest, FtruncateShrinkGrow) { + std::vector<char> buf(10, 'a'); + EXPECT_THAT(WriteFd(test_file_fd_.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Shrink then regrow the file. This should clear the second half of the file. + EXPECT_THAT(ftruncate(test_file_fd_.get(), 5), SyscallSucceeds()); + EXPECT_THAT(ftruncate(test_file_fd_.get(), 10), SyscallSucceeds()); + + EXPECT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), SyscallSucceeds()); + + std::vector<char> buf2(10); + EXPECT_THAT(ReadFd(test_file_fd_.get(), buf2.data(), buf2.size()), + SyscallSucceedsWithValue(buf2.size())); + + std::vector<char> expect = {'a', 'a', 'a', 'a', 'a', + '\0', '\0', '\0', '\0', '\0'}; + EXPECT_EQ(expect, buf2); +} + +TEST(TruncateTest, TruncateDir) { + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(truncate(temp_dir.path().c_str(), 0), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(TruncateTest, FtruncateDir) { + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(temp_dir.path(), O_DIRECTORY | O_RDONLY)); + EXPECT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TruncateTest, TruncateNonWriteable) { + // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to + // always override write permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::string_view(), 0555 /* mode */)); + EXPECT_THAT(truncate(temp_file.path().c_str(), 0), + SyscallFailsWithErrno(EACCES)); +} + +TEST(TruncateTest, FtruncateNonWriteable) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::string_view(), 0555 /* mode */)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY)); + EXPECT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TruncateTest, TruncateNonExist) { + EXPECT_THAT(truncate("/foo/bar", 0), SyscallFailsWithErrno(ENOENT)); +} + +TEST(TruncateTest, FtruncateVirtualTmp_NoRandomSave) { + auto temp_file = NewTempAbsPathInDir("/dev/shm"); + const DisableSave ds; // Incompatible permissions. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file, O_RDWR | O_CREAT | O_EXCL, 0)); + EXPECT_THAT(ftruncate(fd.get(), 100), SyscallSucceeds()); +} + +// NOTE: There are additional truncate(2)/ftruncate(2) tests in mknod.cc +// which are there to avoid running the tests on a number of different +// filesystems which may not support mknod. + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc new file mode 100644 index 000000000..97d554e72 --- /dev/null +++ b/test/syscalls/linux/tuntap.cc @@ -0,0 +1,422 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <linux/capability.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/if_tun.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_split.h" +#include "test/syscalls/linux/socket_netlink_route_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr int kIPLen = 4; + +constexpr const char kDevNetTun[] = "/dev/net/tun"; +constexpr const char kTapName[] = "tap0"; + +constexpr const uint8_t kMacA[ETH_ALEN] = {0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA}; +constexpr const uint8_t kMacB[ETH_ALEN] = {0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB}; + +PosixErrorOr<std::set<std::string>> DumpLinkNames() { + ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); + std::set<std::string> names; + for (const auto& link : links) { + names.emplace(link.name); + } + return names; +} + +PosixErrorOr<Link> GetLinkByName(const std::string& name) { + ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); + for (const auto& link : links) { + if (link.name == name) { + return link; + } + } + return PosixError(ENOENT, "interface not found"); +} + +struct pihdr { + uint16_t pi_flags; + uint16_t pi_protocol; +} __attribute__((packed)); + +struct ping_pkt { + pihdr pi; + struct ethhdr eth; + struct iphdr ip; + struct icmphdr icmp; + char payload[64]; +} __attribute__((packed)); + +ping_pkt CreatePingPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip, + const uint8_t dstmac[ETH_ALEN], const char* dstip) { + ping_pkt pkt = {}; + + pkt.pi.pi_protocol = htons(ETH_P_IP); + + memcpy(pkt.eth.h_dest, dstmac, sizeof(pkt.eth.h_dest)); + memcpy(pkt.eth.h_source, srcmac, sizeof(pkt.eth.h_source)); + pkt.eth.h_proto = htons(ETH_P_IP); + + pkt.ip.ihl = 5; + pkt.ip.version = 4; + pkt.ip.tos = 0; + pkt.ip.tot_len = htons(sizeof(struct iphdr) + sizeof(struct icmphdr) + + sizeof(pkt.payload)); + pkt.ip.id = 1; + pkt.ip.frag_off = 1 << 6; // Do not fragment + pkt.ip.ttl = 64; + pkt.ip.protocol = IPPROTO_ICMP; + inet_pton(AF_INET, dstip, &pkt.ip.daddr); + inet_pton(AF_INET, srcip, &pkt.ip.saddr); + pkt.ip.check = IPChecksum(pkt.ip); + + pkt.icmp.type = ICMP_ECHO; + pkt.icmp.code = 0; + pkt.icmp.checksum = 0; + pkt.icmp.un.echo.sequence = 1; + pkt.icmp.un.echo.id = 1; + + strncpy(pkt.payload, "abcd", sizeof(pkt.payload)); + pkt.icmp.checksum = ICMPChecksum(pkt.icmp, pkt.payload, sizeof(pkt.payload)); + + return pkt; +} + +struct arp_pkt { + pihdr pi; + struct ethhdr eth; + struct arphdr arp; + uint8_t arp_sha[ETH_ALEN]; + uint8_t arp_spa[kIPLen]; + uint8_t arp_tha[ETH_ALEN]; + uint8_t arp_tpa[kIPLen]; +} __attribute__((packed)); + +std::string CreateArpPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip, + const uint8_t dstmac[ETH_ALEN], const char* dstip) { + std::string buffer; + buffer.resize(sizeof(arp_pkt)); + + arp_pkt* pkt = reinterpret_cast<arp_pkt*>(&buffer[0]); + { + pkt->pi.pi_protocol = htons(ETH_P_ARP); + + memcpy(pkt->eth.h_dest, kMacA, sizeof(pkt->eth.h_dest)); + memcpy(pkt->eth.h_source, kMacB, sizeof(pkt->eth.h_source)); + pkt->eth.h_proto = htons(ETH_P_ARP); + + pkt->arp.ar_hrd = htons(ARPHRD_ETHER); + pkt->arp.ar_pro = htons(ETH_P_IP); + pkt->arp.ar_hln = ETH_ALEN; + pkt->arp.ar_pln = kIPLen; + pkt->arp.ar_op = htons(ARPOP_REPLY); + + memcpy(pkt->arp_sha, srcmac, sizeof(pkt->arp_sha)); + inet_pton(AF_INET, srcip, pkt->arp_spa); + memcpy(pkt->arp_tha, dstmac, sizeof(pkt->arp_tha)); + inet_pton(AF_INET, dstip, pkt->arp_tpa); + } + return buffer; +} + +} // namespace + +TEST(TuntapStaticTest, NetTunExists) { + struct stat statbuf; + ASSERT_THAT(stat(kDevNetTun, &statbuf), SyscallSucceeds()); + // Check that it's a character device with rw-rw-rw- permissions. + EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); +} + +class TuntapTest : public ::testing::Test { + protected: + void TearDown() override { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))) { + // Bring back capability if we had dropped it in test case. + ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, true)); + } + } +}; + +TEST_F(TuntapTest, CreateInterfaceNoCap) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, false)); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr = {}; + ifr.ifr_flags = IFF_TAP; + strncpy(ifr.ifr_name, kTapName, IFNAMSIZ); + + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallFailsWithErrno(EPERM)); +} + +TEST_F(TuntapTest, CreateFixedNameInterface) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr_set = {}; + ifr_set.ifr_flags = IFF_TAP; + strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ); + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set), + SyscallSucceedsWithValue(0)); + + struct ifreq ifr_get = {}; + EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get), + SyscallSucceedsWithValue(0)); + + struct ifreq ifr_expect = ifr_set; + // See __tun_chr_ioctl() in net/drivers/tun.c. + ifr_expect.ifr_flags |= IFF_NOFILTER; + + EXPECT_THAT(DumpLinkNames(), + IsPosixErrorOkAndHolds(::testing::Contains(kTapName))); + EXPECT_THAT(memcmp(&ifr_expect, &ifr_get, sizeof(ifr_get)), ::testing::Eq(0)); +} + +TEST_F(TuntapTest, CreateInterface) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr = {}; + ifr.ifr_flags = IFF_TAP; + // Empty ifr.ifr_name. Let kernel assign. + + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0)); + + struct ifreq ifr_get = {}; + EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get), + SyscallSucceedsWithValue(0)); + + std::string ifname = ifr_get.ifr_name; + EXPECT_THAT(ifname, ::testing::StartsWith("tap")); + EXPECT_THAT(DumpLinkNames(), + IsPosixErrorOkAndHolds(::testing::Contains(ifname))); +} + +TEST_F(TuntapTest, InvalidReadWrite) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + char buf[128] = {}; + EXPECT_THAT(read(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD)); + EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD)); +} + +TEST_F(TuntapTest, WriteToDownDevice) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces. + SKIP_IF(IsRunningOnGvisor()); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + // Device created should be down by default. + struct ifreq ifr = {}; + ifr.ifr_flags = IFF_TAP; + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0)); + + char buf[128] = {}; + EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EIO)); +} + +PosixErrorOr<FileDescriptor> OpenAndAttachTap( + const std::string& dev_name, const std::string& dev_ipv4_addr) { + // Interface creation. + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr_set = {}; + ifr_set.ifr_flags = IFF_TAP; + strncpy(ifr_set.ifr_name, dev_name.c_str(), IFNAMSIZ); + if (ioctl(fd.get(), TUNSETIFF, &ifr_set) < 0) { + return PosixError(errno); + } + + ASSIGN_OR_RETURN_ERRNO(auto link, GetLinkByName(dev_name)); + + // Interface setup. + struct in_addr addr; + inet_pton(AF_INET, dev_ipv4_addr.c_str(), &addr); + EXPECT_NO_ERRNO(LinkAddLocalAddr(link.index, AF_INET, /*prefixlen=*/24, &addr, + sizeof(addr))); + + if (!IsRunningOnGvisor()) { + // FIXME(b/110961832): gVisor doesn't support setting MAC address on + // interfaces yet. + RETURN_IF_ERRNO(LinkSetMacAddr(link.index, kMacA, sizeof(kMacA))); + + // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces. + RETURN_IF_ERRNO(LinkChangeFlags(link.index, IFF_UP, IFF_UP)); + } + + return fd; +} + +// This test sets up a TAP device and pings kernel by sending ICMP echo request. +// +// It works as the following: +// * Open /dev/net/tun, and create kTapName interface. +// * Use rtnetlink to do initial setup of the interface: +// * Assign IP address 10.0.0.1/24 to kernel. +// * MAC address: kMacA +// * Bring up the interface. +// * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel. +// * Loop to receive packets from TAP device/fd: +// * If packet is an ICMP echo reply, it stops and passes the test. +// * If packet is an ARP request, it responds with canned reply and resends +// the +// ICMP request packet. +TEST_F(TuntapTest, PingKernel) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1")); + ping_pkt ping_req = CreatePingPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1"); + std::string arp_rep = CreateArpPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1"); + + // Send ping, this would trigger an ARP request on Linux. + EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)), + SyscallSucceedsWithValue(sizeof(ping_req))); + + // Receive loop to process inbound packets. + struct inpkt { + union { + pihdr pi; + ping_pkt ping; + arp_pkt arp; + }; + }; + while (1) { + inpkt r = {}; + int n = read(fd.get(), &r, sizeof(r)); + EXPECT_THAT(n, SyscallSucceeds()); + + if (n < sizeof(pihdr)) { + std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol + << " len: " << n << std::endl; + continue; + } + + // Process ARP packet. + if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) { + // Respond with canned ARP reply. + EXPECT_THAT(write(fd.get(), arp_rep.data(), arp_rep.size()), + SyscallSucceedsWithValue(arp_rep.size())); + // First ping request might have been dropped due to mac address not in + // ARP cache. Send it again. + EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)), + SyscallSucceedsWithValue(sizeof(ping_req))); + } + + // Process ping response packet. + if (n >= sizeof(ping_pkt) && r.pi.pi_protocol == ping_req.pi.pi_protocol && + r.ping.ip.protocol == ping_req.ip.protocol && + !memcmp(&r.ping.ip.saddr, &ping_req.ip.daddr, kIPLen) && + !memcmp(&r.ping.ip.daddr, &ping_req.ip.saddr, kIPLen) && + r.ping.icmp.type == 0 && r.ping.icmp.code == 0) { + // Ends and passes the test. + break; + } + } +} + +TEST_F(TuntapTest, SendUdpTriggersArpResolution) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1")); + + // Send a UDP packet to remote. + int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); + ASSERT_THAT(sock, SyscallSucceeds()); + + struct sockaddr_in remote = {}; + remote.sin_family = AF_INET; + remote.sin_port = htons(42); + inet_pton(AF_INET, "10.0.0.2", &remote.sin_addr); + int ret = sendto(sock, "hello", 5, 0, reinterpret_cast<sockaddr*>(&remote), + sizeof(remote)); + ASSERT_THAT(ret, ::testing::AnyOf(SyscallSucceeds(), + SyscallFailsWithErrno(EHOSTDOWN))); + + struct inpkt { + union { + pihdr pi; + arp_pkt arp; + }; + }; + while (1) { + inpkt r = {}; + int n = read(fd.get(), &r, sizeof(r)); + EXPECT_THAT(n, SyscallSucceeds()); + + if (n < sizeof(pihdr)) { + std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol + << " len: " << n << std::endl; + continue; + } + + if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) { + break; + } + } +} + +// Write hang bug found by syskaller: b/155928773 +// https://syzkaller.appspot.com/bug?id=065b893bd8d1d04a4e0a1d53c578537cde1efe99 +TEST_F(TuntapTest, WriteHangBug155928773) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1")); + + int sock = socket(AF_INET, SOCK_DGRAM, 0); + ASSERT_THAT(sock, SyscallSucceeds()); + + struct sockaddr_in remote = {}; + remote.sin_family = AF_INET; + remote.sin_port = htons(42); + inet_pton(AF_INET, "10.0.0.1", &remote.sin_addr); + // Return values do not matter in this test. + connect(sock, reinterpret_cast<struct sockaddr*>(&remote), sizeof(remote)); + write(sock, "hello", 5); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/tuntap_hostinet.cc b/test/syscalls/linux/tuntap_hostinet.cc new file mode 100644 index 000000000..1513fb9d5 --- /dev/null +++ b/test/syscalls/linux/tuntap_hostinet.cc @@ -0,0 +1,38 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(TuntapHostInetTest, NoNetTun) { + SKIP_IF(!IsRunningOnGvisor()); + SKIP_IF(!IsRunningWithHostinet()); + + struct stat statbuf; + ASSERT_THAT(stat("/dev/net/tun", &statbuf), SyscallFailsWithErrno(ENOENT)); +} + +} // namespace +} // namespace testing + +} // namespace gvisor diff --git a/test/syscalls/linux/udp_bind.cc b/test/syscalls/linux/udp_bind.cc new file mode 100644 index 000000000..6d92bdbeb --- /dev/null +++ b/test/syscalls/linux/udp_bind.cc @@ -0,0 +1,316 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +struct sockaddr_in_common { + sa_family_t sin_family; + in_port_t sin_port; +}; + +struct SendtoTestParam { + // Human readable description of test parameter. + std::string description; + + // Test is broken in gVisor, skip. + bool skip_on_gvisor; + + // Domain for the socket that will do the sending. + int send_domain; + + // Address to bind for the socket that will do the sending. + struct sockaddr_storage send_addr; + socklen_t send_addr_len; // 0 for unbound. + + // Address to connect to for the socket that will do the sending. + struct sockaddr_storage connect_addr; + socklen_t connect_addr_len; // 0 for no connection. + + // Domain for the socket that will do the receiving. + int recv_domain; + + // Address to bind for the socket that will do the receiving. + struct sockaddr_storage recv_addr; + socklen_t recv_addr_len; + + // Address to send to. + struct sockaddr_storage sendto_addr; + socklen_t sendto_addr_len; + + // Expected errno for the sendto call. + std::vector<int> sendto_errnos; // empty on success. +}; + +class SendtoTest : public ::testing::TestWithParam<SendtoTestParam> { + protected: + SendtoTest() { + // gUnit uses printf, so so will we. + printf("Testing with %s\n", GetParam().description.c_str()); + } +}; + +TEST_P(SendtoTest, Sendto) { + auto param = GetParam(); + + SKIP_IF(param.skip_on_gvisor && IsRunningOnGvisor()); + + const FileDescriptor s1 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(param.send_domain, SOCK_DGRAM, 0)); + const FileDescriptor s2 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(param.recv_domain, SOCK_DGRAM, 0)); + + if (param.send_addr_len > 0) { + ASSERT_THAT(bind(s1.get(), reinterpret_cast<sockaddr*>(¶m.send_addr), + param.send_addr_len), + SyscallSucceeds()); + } + + if (param.connect_addr_len > 0) { + ASSERT_THAT( + connect(s1.get(), reinterpret_cast<sockaddr*>(¶m.connect_addr), + param.connect_addr_len), + SyscallSucceeds()); + } + + ASSERT_THAT(bind(s2.get(), reinterpret_cast<sockaddr*>(¶m.recv_addr), + param.recv_addr_len), + SyscallSucceeds()); + + struct sockaddr_storage real_recv_addr = {}; + socklen_t real_recv_addr_len = param.recv_addr_len; + ASSERT_THAT( + getsockname(s2.get(), reinterpret_cast<sockaddr*>(&real_recv_addr), + &real_recv_addr_len), + SyscallSucceeds()); + + ASSERT_EQ(real_recv_addr_len, param.recv_addr_len); + + int recv_port = + reinterpret_cast<sockaddr_in_common*>(&real_recv_addr)->sin_port; + + struct sockaddr_storage sendto_addr = param.sendto_addr; + reinterpret_cast<sockaddr_in_common*>(&sendto_addr)->sin_port = recv_port; + + char buf[20] = {}; + if (!param.sendto_errnos.empty()) { + ASSERT_THAT(RetryEINTR(sendto)(s1.get(), buf, sizeof(buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr), + param.sendto_addr_len), + SyscallFailsWithErrno(ElementOf(param.sendto_errnos))); + return; + } + + ASSERT_THAT(RetryEINTR(sendto)(s1.get(), buf, sizeof(buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr), + param.sendto_addr_len), + SyscallSucceedsWithValue(sizeof(buf))); + + struct sockaddr_storage got_addr = {}; + socklen_t got_addr_len = sizeof(sockaddr_storage); + ASSERT_THAT(RetryEINTR(recvfrom)(s2.get(), buf, sizeof(buf), 0, + reinterpret_cast<sockaddr*>(&got_addr), + &got_addr_len), + SyscallSucceedsWithValue(sizeof(buf))); + + ASSERT_GT(got_addr_len, sizeof(sockaddr_in_common)); + int got_port = reinterpret_cast<sockaddr_in_common*>(&got_addr)->sin_port; + + struct sockaddr_storage sender_addr = {}; + socklen_t sender_addr_len = sizeof(sockaddr_storage); + ASSERT_THAT(getsockname(s1.get(), reinterpret_cast<sockaddr*>(&sender_addr), + &sender_addr_len), + SyscallSucceeds()); + + ASSERT_GT(sender_addr_len, sizeof(sockaddr_in_common)); + int sender_port = + reinterpret_cast<sockaddr_in_common*>(&sender_addr)->sin_port; + + EXPECT_EQ(got_port, sender_port); +} + +socklen_t Ipv4Addr(sockaddr_storage* addr, int port = 0) { + auto addr4 = reinterpret_cast<sockaddr_in*>(addr); + addr4->sin_family = AF_INET; + addr4->sin_port = port; + inet_pton(AF_INET, "127.0.0.1", &addr4->sin_addr.s_addr); + return sizeof(struct sockaddr_in); +} + +socklen_t Ipv6Addr(sockaddr_storage* addr, int port = 0) { + auto addr6 = reinterpret_cast<sockaddr_in6*>(addr); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = port; + inet_pton(AF_INET6, "::1", &addr6->sin6_addr.s6_addr); + return sizeof(struct sockaddr_in6); +} + +socklen_t Ipv4MappedIpv6Addr(sockaddr_storage* addr, int port = 0) { + auto addr6 = reinterpret_cast<sockaddr_in6*>(addr); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = port; + inet_pton(AF_INET6, "::ffff:127.0.0.1", &addr6->sin6_addr.s6_addr); + return sizeof(struct sockaddr_in6); +} + +INSTANTIATE_TEST_SUITE_P( + UdpBindTest, SendtoTest, + ::testing::Values( + []() { + SendtoTestParam param = {}; + param.description = "IPv4 mapped IPv6 sendto IPv4 mapped IPv6"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv4MappedIpv6Addr(¶m.send_addr); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv4MappedIpv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv6 sendto IPv6"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv6Addr(¶m.send_addr); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv4 sendto IPv4"; + param.send_domain = AF_INET; + param.send_addr_len = Ipv4Addr(¶m.send_addr); + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv4 mapped IPv6 sendto IPv4"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv4MappedIpv6Addr(¶m.send_addr); + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv4 sendto IPv4 mapped IPv6"; + param.send_domain = AF_INET; + param.send_addr_len = Ipv4Addr(¶m.send_addr); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv4MappedIpv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "unbound IPv6 sendto IPv4 mapped IPv6"; + param.send_domain = AF_INET6; + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv4MappedIpv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "unbound IPv6 sendto IPv4"; + param.send_domain = AF_INET6; + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv6 sendto IPv4"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv6Addr(¶m.send_addr); + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + param.sendto_errnos = {ENETUNREACH}; + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv4 mapped IPv6 sendto IPv6"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv4MappedIpv6Addr(¶m.send_addr); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv6Addr(¶m.sendto_addr); + param.sendto_errnos = {EAFNOSUPPORT}; + // The errno returned changed in Linux commit c8e6ad0829a723. + param.sendto_errnos = {EINVAL, EAFNOSUPPORT}; + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "connected IPv4 mapped IPv6 sendto IPv6"; + param.send_domain = AF_INET6; + param.connect_addr_len = + Ipv4MappedIpv6Addr(¶m.connect_addr, 5000); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv6Addr(¶m.sendto_addr); + // The errno returned changed in Linux commit c8e6ad0829a723. + param.sendto_errnos = {EINVAL, EAFNOSUPPORT}; + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "connected IPv6 sendto IPv4 mapped IPv6"; + // TODO(igudger): Determine if this inconsistent behavior is worth + // implementing. + param.skip_on_gvisor = true; + param.send_domain = AF_INET6; + param.connect_addr_len = Ipv6Addr(¶m.connect_addr, 5000); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv4MappedIpv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "connected IPv6 sendto IPv4"; + // TODO(igudger): Determine if this inconsistent behavior is worth + // implementing. + param.skip_on_gvisor = true; + param.send_domain = AF_INET6; + param.connect_addr_len = Ipv6Addr(¶m.connect_addr, 5000); + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }())); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/udp_socket.cc b/test/syscalls/linux/udp_socket.cc new file mode 100644 index 000000000..7a8ac30a4 --- /dev/null +++ b/test/syscalls/linux/udp_socket.cc @@ -0,0 +1,30 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/udp_socket_test_cases.h" + +namespace gvisor { +namespace testing { + +namespace { + +INSTANTIATE_TEST_SUITE_P(AllInetTests, UdpSocketTest, + ::testing::Values(AddressFamily::kIpv4, + AddressFamily::kIpv6, + AddressFamily::kDualStack)); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/udp_socket_errqueue_test_case.cc b/test/syscalls/linux/udp_socket_errqueue_test_case.cc new file mode 100644 index 000000000..54a0594f7 --- /dev/null +++ b/test/syscalls/linux/udp_socket_errqueue_test_case.cc @@ -0,0 +1,57 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __fuchsia__ + +#include <arpa/inet.h> +#include <fcntl.h> +#include <linux/errqueue.h> +#include <netinet/in.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/udp_socket_test_cases.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(UdpSocketTest, ErrorQueue) { + char cmsgbuf[CMSG_SPACE(sizeof(sock_extended_err))]; + msghdr msg; + memset(&msg, 0, sizeof(msg)); + iovec iov; + memset(&iov, 0, sizeof(iov)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT. + EXPECT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, MSG_ERRQUEUE), + SyscallFailsWithErrno(EAGAIN)); +} + +} // namespace testing +} // namespace gvisor + +#endif // __fuchsia__ diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc new file mode 100644 index 000000000..9cc6be4fb --- /dev/null +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -0,0 +1,1727 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/udp_socket_test_cases.h" + +#include <arpa/inet.h> +#include <fcntl.h> +#include <netinet/in.h> +#include <poll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "absl/strings/str_format.h" +#ifndef SIOCGSTAMP +#include <linux/sockios.h> +#endif + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +// Gets a pointer to the port component of the given address. +uint16_t* Port(struct sockaddr_storage* addr) { + switch (addr->ss_family) { + case AF_INET: { + auto sin = reinterpret_cast<struct sockaddr_in*>(addr); + return &sin->sin_port; + } + case AF_INET6: { + auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr); + return &sin6->sin6_port; + } + } + + return nullptr; +} + +// Sets addr port to "port". +void SetPort(struct sockaddr_storage* addr, uint16_t port) { + switch (addr->ss_family) { + case AF_INET: { + auto sin = reinterpret_cast<struct sockaddr_in*>(addr); + sin->sin_port = port; + break; + } + case AF_INET6: { + auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr); + sin6->sin6_port = port; + break; + } + } +} + +void UdpSocketTest::SetUp() { + addrlen_ = GetAddrLength(); + + bind_ = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP)); + memset(&bind_addr_storage_, 0, sizeof(bind_addr_storage_)); + bind_addr_ = reinterpret_cast<struct sockaddr*>(&bind_addr_storage_); + + sock_ = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP)); +} + +int UdpSocketTest::GetFamily() { + if (GetParam() == AddressFamily::kIpv4) { + return AF_INET; + } + return AF_INET6; +} + +PosixError UdpSocketTest::BindLoopback() { + bind_addr_storage_ = InetLoopbackAddr(); + struct sockaddr* bind_addr_ = + reinterpret_cast<struct sockaddr*>(&bind_addr_storage_); + return BindSocket(bind_.get(), bind_addr_); +} + +PosixError UdpSocketTest::BindAny() { + bind_addr_storage_ = InetAnyAddr(); + struct sockaddr* bind_addr_ = + reinterpret_cast<struct sockaddr*>(&bind_addr_storage_); + return BindSocket(bind_.get(), bind_addr_); +} + +PosixError UdpSocketTest::BindSocket(int socket, struct sockaddr* addr) { + socklen_t len = sizeof(bind_addr_storage_); + + // Bind, then check that we get the right address. + RETURN_ERROR_IF_SYSCALL_FAIL(bind(socket, addr, addrlen_)); + + RETURN_ERROR_IF_SYSCALL_FAIL(getsockname(socket, addr, &len)); + + if (addrlen_ != len) { + return PosixError( + EINVAL, + absl::StrFormat("getsockname len: %u expected: %u", len, addrlen_)); + } + return PosixError(0); +} + +socklen_t UdpSocketTest::GetAddrLength() { + struct sockaddr_storage addr; + if (GetFamily() == AF_INET) { + auto sin = reinterpret_cast<struct sockaddr_in*>(&addr); + return sizeof(*sin); + } + + auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr); + return sizeof(*sin6); +} + +sockaddr_storage UdpSocketTest::InetAnyAddr() { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + reinterpret_cast<struct sockaddr*>(&addr)->sa_family = GetFamily(); + + if (GetFamily() == AF_INET) { + auto sin = reinterpret_cast<struct sockaddr_in*>(&addr); + sin->sin_addr.s_addr = htonl(INADDR_ANY); + sin->sin_port = htons(0); + return addr; + } + + auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr); + sin6->sin6_addr = IN6ADDR_ANY_INIT; + sin6->sin6_port = htons(0); + return addr; +} + +sockaddr_storage UdpSocketTest::InetLoopbackAddr() { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + reinterpret_cast<struct sockaddr*>(&addr)->sa_family = GetFamily(); + + if (GetFamily() == AF_INET) { + auto sin = reinterpret_cast<struct sockaddr_in*>(&addr); + sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + sin->sin_port = htons(0); + return addr; + } + auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr); + sin6->sin6_addr = in6addr_loopback; + sin6->sin6_port = htons(0); + return addr; +} + +void UdpSocketTest::Disconnect(int sockfd) { + sockaddr_storage addr_storage = InetAnyAddr(); + sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + socklen_t addrlen = sizeof(addr_storage); + + addr->sa_family = AF_UNSPEC; + ASSERT_THAT(connect(sockfd, addr, addrlen), SyscallSucceeds()); + + // Check that after disconnect the socket is bound to the ANY address. + EXPECT_THAT(getsockname(sockfd, addr, &addrlen), SyscallSucceeds()); + if (GetParam() == AddressFamily::kIpv4) { + auto addr_out = reinterpret_cast<struct sockaddr_in*>(addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY)); + } else { + auto addr_out = reinterpret_cast<struct sockaddr_in6*>(addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + struct in6_addr loopback = IN6ADDR_ANY_INIT; + + EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); + } +} + +TEST_P(UdpSocketTest, Creation) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP)); + EXPECT_THAT(close(sock.release()), SyscallSucceeds()); + + sock = ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, 0)); + EXPECT_THAT(close(sock.release()), SyscallSucceeds()); + + ASSERT_THAT(socket(GetFamily(), SOCK_STREAM, IPPROTO_UDP), SyscallFails()); +} + +TEST_P(UdpSocketTest, Getsockname) { + // Check that we're not bound. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + struct sockaddr_storage any = InetAnyAddr(); + EXPECT_EQ(memcmp(&addr, reinterpret_cast<struct sockaddr*>(&any), addrlen_), + 0); + + ASSERT_NO_ERRNO(BindLoopback()); + + EXPECT_THAT( + getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0); +} + +TEST_P(UdpSocketTest, Getpeername) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Check that we're not connected. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); + + // Connect, then check that we get the right address. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0); +} + +TEST_P(UdpSocketTest, SendNotConnected) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Do send & write, they must fail. + char buf[512]; + EXPECT_THAT(send(sock_.get(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EDESTADDRREQ)); + + EXPECT_THAT(write(sock_.get(), buf, sizeof(buf)), + SyscallFailsWithErrno(EDESTADDRREQ)); + + // Use sendto. + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Check that we're bound now. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_NE(*Port(&addr), 0); +} + +TEST_P(UdpSocketTest, ConnectBinds) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect the socket. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + // Check that we're bound now. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_NE(*Port(&addr), 0); +} + +TEST_P(UdpSocketTest, ReceiveNotBound) { + char buf[512]; + EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, Bind) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Try to bind again. + EXPECT_THAT(bind(bind_.get(), bind_addr_, addrlen_), + SyscallFailsWithErrno(EINVAL)); + + // Check that we're still bound to the original address. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0); +} + +TEST_P(UdpSocketTest, BindInUse) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Try to bind again. + EXPECT_THAT(bind(sock_.get(), bind_addr_, addrlen_), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(UdpSocketTest, ReceiveAfterConnect) { + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + // Send from sock_ to bind_ + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[sizeof(buf)]; + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, ReceiveAfterDisconnect) { + ASSERT_NO_ERRNO(BindLoopback()); + + for (int i = 0; i < 2; i++) { + // Connet sock_ to bound address. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + + // Send from sock to bind_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(bind_.get(), buf, sizeof(buf), 0, + reinterpret_cast<sockaddr*>(&addr), addrlen), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[sizeof(buf)]; + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); + + // Disconnect sock_. + struct sockaddr unspec = {}; + unspec.sa_family = AF_UNSPEC; + ASSERT_THAT(connect(sock_.get(), &unspec, sizeof(unspec.sa_family)), + SyscallSucceeds()); + } +} + +TEST_P(UdpSocketTest, Connect) { + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + // Check that we're connected to the right peer. + struct sockaddr_storage peer; + socklen_t peerlen = sizeof(peer); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen), + SyscallSucceeds()); + EXPECT_EQ(peerlen, addrlen_); + EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0); + + // Try to bind after connect. + struct sockaddr_storage any = InetAnyAddr(); + EXPECT_THAT( + bind(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_), + SyscallFailsWithErrno(EINVAL)); + + struct sockaddr_storage bind2_storage = InetLoopbackAddr(); + struct sockaddr* bind2_addr = + reinterpret_cast<struct sockaddr*>(&bind2_storage); + FileDescriptor bind2 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP)); + ASSERT_NO_ERRNO(BindSocket(bind2.get(), bind2_addr)); + + // Try to connect again. + EXPECT_THAT(connect(sock_.get(), bind2_addr, addrlen_), SyscallSucceeds()); + + // Check that peer name changed. + peerlen = sizeof(peer); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen), + SyscallSucceeds()); + EXPECT_EQ(peerlen, addrlen_); + EXPECT_EQ(memcmp(&peer, bind2_addr, addrlen_), 0); +} + +TEST_P(UdpSocketTest, ConnectAnyZero) { + // TODO(138658473): Enable when we can connect to port 0 with gVisor. + SKIP_IF(IsRunningOnGvisor()); + + struct sockaddr_storage any = InetAnyAddr(); + EXPECT_THAT( + connect(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_), + SyscallSucceeds()); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UdpSocketTest, ConnectAnyWithPort) { + ASSERT_NO_ERRNO(BindAny()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); +} + +TEST_P(UdpSocketTest, DisconnectAfterConnectAny) { + // TODO(138658473): Enable when we can connect to port 0 with gVisor. + SKIP_IF(IsRunningOnGvisor()); + struct sockaddr_storage any = InetAnyAddr(); + EXPECT_THAT( + connect(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_), + SyscallSucceeds()); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); + + Disconnect(sock_.get()); +} + +TEST_P(UdpSocketTest, DisconnectAfterConnectAnyWithPort) { + ASSERT_NO_ERRNO(BindAny()); + EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(*Port(&bind_addr_storage_), *Port(&addr)); + + Disconnect(sock_.get()); +} + +TEST_P(UdpSocketTest, DisconnectAfterBind) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Bind to the next port above bind_. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_NO_ERRNO(BindSocket(sock_.get(), addr)); + + // Connect the socket. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + struct sockaddr_storage unspec = {}; + unspec.ss_family = AF_UNSPEC; + EXPECT_THAT(connect(sock_.get(), reinterpret_cast<sockaddr*>(&unspec), + sizeof(unspec.ss_family)), + SyscallSucceeds()); + + // Check that we're still bound. + socklen_t addrlen = sizeof(unspec); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&unspec), &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(addr, &unspec, addrlen_), 0); + + addrlen = sizeof(addr); + EXPECT_THAT(getpeername(sock_.get(), addr, &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UdpSocketTest, BindToAnyConnnectToLocalhost) { + ASSERT_NO_ERRNO(BindAny()); + + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + socklen_t addrlen = sizeof(addr); + + // Connect the socket. + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + EXPECT_THAT(getsockname(bind_.get(), addr, &addrlen), SyscallSucceeds()); + + // If the socket is bound to ANY and connected to a loopback address, + // getsockname() has to return the loopback address. + if (GetParam() == AddressFamily::kIpv4) { + auto addr_out = reinterpret_cast<struct sockaddr_in*>(addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_LOOPBACK)); + } else { + auto addr_out = reinterpret_cast<struct sockaddr_in6*>(addr); + struct in6_addr loopback = IN6ADDR_LOOPBACK_INIT; + EXPECT_EQ(addrlen, sizeof(*addr_out)); + EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); + } +} + +TEST_P(UdpSocketTest, DisconnectAfterBindToAny) { + ASSERT_NO_ERRNO(BindLoopback()); + + struct sockaddr_storage any_storage = InetAnyAddr(); + struct sockaddr* any = reinterpret_cast<struct sockaddr*>(&any_storage); + SetPort(&any_storage, *Port(&bind_addr_storage_) + 1); + + ASSERT_NO_ERRNO(BindSocket(sock_.get(), any)); + + // Connect the socket. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + Disconnect(sock_.get()); + + // Check that we're still bound. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, any, addrlen), 0); + + addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UdpSocketTest, Disconnect) { + ASSERT_NO_ERRNO(BindLoopback()); + + struct sockaddr_storage any_storage = InetAnyAddr(); + struct sockaddr* any = reinterpret_cast<struct sockaddr*>(&any_storage); + SetPort(&any_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_NO_ERRNO(BindSocket(sock_.get(), any)); + + for (int i = 0; i < 2; i++) { + // Try to connect again. + EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + // Check that we're connected to the right peer. + struct sockaddr_storage peer; + socklen_t peerlen = sizeof(peer); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen), + SyscallSucceeds()); + EXPECT_EQ(peerlen, addrlen_); + EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0); + + // Try to disconnect. + struct sockaddr_storage addr = {}; + addr.ss_family = AF_UNSPEC; + EXPECT_THAT(connect(sock_.get(), reinterpret_cast<sockaddr*>(&addr), + sizeof(addr.ss_family)), + SyscallSucceeds()); + + peerlen = sizeof(peer); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen), + SyscallFailsWithErrno(ENOTCONN)); + + // Check that we're still bound. + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(*Port(&addr), *Port(&any_storage)); + } +} + +TEST_P(UdpSocketTest, ConnectBadAddress) { + struct sockaddr addr = {}; + addr.sa_family = GetFamily(); + ASSERT_THAT(connect(sock_.get(), &addr, sizeof(addr.sa_family)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) { + ASSERT_NO_ERRNO(BindLoopback()); + + struct sockaddr_storage addr_storage = InetAnyAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + // Send to a different destination than we're connected to. + char buf[512]; + EXPECT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, addr, addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST_P(UdpSocketTest, ZerolengthWriteAllowed) { + // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes. + SKIP_IF(IsRunningWithHostinet()); + + ASSERT_NO_ERRNO(BindLoopback()); + // Connect to loopback:bind_addr_+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + // Bind sock to loopback:bind_addr_+1. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); + + char buf[3]; + // Send zero length packet from bind_ to sock_. + ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0)); + + struct pollfd pfd = {sock_.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout*/ 1000), + SyscallSucceedsWithValue(1)); + + // Receive the packet. + char received[3]; + EXPECT_THAT(read(sock_.get(), received, sizeof(received)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) { + // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes. + SKIP_IF(IsRunningWithHostinet()); + + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + // Bind sock to loopback:bind_addr_port+1. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); + + // Set sock to non-blocking. + int opts = 0; + ASSERT_THAT(opts = fcntl(sock_.get(), F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(sock_.get(), F_SETFL, opts | O_NONBLOCK), + SyscallSucceeds()); + + char buf[3]; + // Send zero length packet from bind_ to sock_. + ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0)); + + struct pollfd pfd = {sock_.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + + // Receive the packet. + char received[3]; + EXPECT_THAT(read(sock_.get(), received, sizeof(received)), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(read(sock_.get(), received, sizeof(received)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(UdpSocketTest, SendAndReceiveNotConnected) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Send some data to bind_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[sizeof(buf)]; + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, SendAndReceiveConnected) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + // Bind sock to loopback:TestPort+1. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); + + // Send some data from sock to bind_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[sizeof(buf)]; + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, ReceiveFromNotConnected) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + // Bind sock to loopback:bind_addr_port+2. + struct sockaddr_storage addr2_storage = InetLoopbackAddr(); + struct sockaddr* addr2 = reinterpret_cast<struct sockaddr*>(&addr2_storage); + SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2); + ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds()); + + // Send some data from sock to bind_. + char buf[512]; + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Check that the data isn't received because it was sent from a different + // address than we're connected. + EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, ReceiveBeforeConnect) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Bind sock to loopback:bind_addr_port+2. + struct sockaddr_storage addr2_storage = InetLoopbackAddr(); + struct sockaddr* addr2 = reinterpret_cast<struct sockaddr*>(&addr2_storage); + SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2); + ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds()); + + // Send some data from sock to bind_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Connect to loopback:TestPort+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + // Receive the data. It works because it was sent before the connect. + char received[sizeof(buf)]; + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); + + // Send again. This time it should not be received. + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(recv(bind_.get(), buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, ReceiveFrom) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + // Bind sock to loopback:TestPort+1. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); + + // Send some data from sock to bind_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data and sender address. + char received[sizeof(buf)]; + struct sockaddr_storage addr2; + socklen_t addr2len = sizeof(addr2); + EXPECT_THAT(recvfrom(bind_.get(), received, sizeof(received), 0, + reinterpret_cast<sockaddr*>(&addr2), &addr2len), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); + EXPECT_EQ(addr2len, addrlen_); + EXPECT_EQ(memcmp(addr, &addr2, addrlen_), 0); +} + +TEST_P(UdpSocketTest, Listen) { + ASSERT_THAT(listen(sock_.get(), SOMAXCONN), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +TEST_P(UdpSocketTest, Accept) { + ASSERT_THAT(accept(sock_.get(), nullptr, nullptr), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +// This test validates that a read shutdown with pending data allows the read +// to proceed with the data before returning EAGAIN. +TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + // Bind to loopback:bind_addr_port+1 and connect to bind_addr_. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + // Verify that we get EWOULDBLOCK when there is nothing to read. + char received[512]; + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + const char* buf = "abc"; + EXPECT_THAT(write(sock_.get(), buf, 3), SyscallSucceedsWithValue(3)); + + int opts = 0; + ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(bind_.get(), F_SETFL, opts | O_NONBLOCK), + SyscallSucceeds()); + ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds()); + ASSERT_NE(opts & O_NONBLOCK, 0); + + EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds()); + + struct pollfd pfd = {bind_.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + + // We should get the data even though read has been shutdown. + EXPECT_THAT(recv(bind_.get(), received, 2, 0), SyscallSucceedsWithValue(2)); + + // Because we read less than the entire packet length, since it's a packet + // based socket any subsequent reads should return EWOULDBLOCK. + EXPECT_THAT(recv(bind_.get(), received, 1, 0), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +// This test is validating that even after a socket is shutdown if it's +// reconnected it will reset the shutdown state. +TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) { + char received[512]; + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); + + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Connect the socket, then try to shutdown again. + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, ReadShutdown) { + // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without + // MSG_DONTWAIT blocks indefinitely. + SKIP_IF(IsRunningWithHostinet()); + + ASSERT_NO_ERRNO(BindLoopback()); + + char received[512]; + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); + + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Connect the socket, then try to shutdown again. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds()); + + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UdpSocketTest, ReadShutdownDifferentThread) { + // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without + // MSG_DONTWAIT blocks indefinitely. + SKIP_IF(IsRunningWithHostinet()); + ASSERT_NO_ERRNO(BindLoopback()); + + char received[512]; + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Connect the socket, then shutdown from another thread. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + ScopedThread t([&] { + absl::SleepFor(absl::Milliseconds(200)); + EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds()); + }); + EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0), + SyscallSucceedsWithValue(0)); + t.Join(); + + EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UdpSocketTest, WriteShutdown) { + ASSERT_NO_ERRNO(BindLoopback()); + EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallSucceeds()); +} + +TEST_P(UdpSocketTest, SynchronousReceive) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Send some data to bind_ from another thread. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + // Receive the data prior to actually starting the other thread. + char received[512]; + EXPECT_THAT( + RetryEINTR(recv)(bind_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Start the thread. + ScopedThread t([&] { + absl::SleepFor(absl::Milliseconds(200)); + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, this->bind_addr_, + this->addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + }); + + EXPECT_THAT(RetryEINTR(recv)(bind_.get(), received, sizeof(received), 0), + SyscallSucceedsWithValue(512)); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, BoundaryPreserved_SendRecv) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Send 3 packets from sock to bind_. + constexpr int psize = 100; + char buf[3 * psize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 3; ++i) { + ASSERT_THAT( + sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(psize)); + } + + // Receive the data as 3 separate packets. + char received[6 * psize]; + for (int i = 0; i < 3; ++i) { + EXPECT_THAT(recv(bind_.get(), received + i * psize, 3 * psize, 0), + SyscallSucceedsWithValue(psize)); + } + EXPECT_EQ(memcmp(buf, received, 3 * psize), 0); +} + +TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Direct writes from sock to bind_. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + // Send 2 packets from sock to bind_, where each packet's data consists of + // 2 discontiguous iovecs. + constexpr size_t kPieceSize = 100; + char buf[4 * kPieceSize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 2; i++) { + struct iovec iov[2]; + for (int j = 0; j < 2; j++) { + iov[j].iov_base = reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + ASSERT_THAT(writev(sock_.get(), iov, 2), + SyscallSucceedsWithValue(2 * kPieceSize)); + } + + // Receive the data as 2 separate packets. + char received[6 * kPieceSize]; + for (int i = 0; i < 2; i++) { + struct iovec iov[3]; + for (int j = 0; j < 3; j++) { + iov[j].iov_base = reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + ASSERT_THAT(readv(bind_.get(), iov, 3), + SyscallSucceedsWithValue(2 * kPieceSize)); + } + EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); +} + +TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Send 2 packets from sock to bind_, where each packet's data consists of + // 2 discontiguous iovecs. + constexpr size_t kPieceSize = 100; + char buf[4 * kPieceSize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 2; i++) { + struct iovec iov[2]; + for (int j = 0; j < 2; j++) { + iov[j].iov_base = reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + struct msghdr msg = {}; + msg.msg_name = bind_addr_; + msg.msg_namelen = addrlen_; + msg.msg_iov = iov; + msg.msg_iovlen = 2; + ASSERT_THAT(sendmsg(sock_.get(), &msg, 0), + SyscallSucceedsWithValue(2 * kPieceSize)); + } + + // Receive the data as 2 separate packets. + char received[6 * kPieceSize]; + for (int i = 0; i < 2; i++) { + struct iovec iov[3]; + for (int j = 0; j < 3; j++) { + iov[j].iov_base = reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + struct msghdr msg = {}; + msg.msg_iov = iov; + msg.msg_iovlen = 3; + ASSERT_THAT(recvmsg(bind_.get(), &msg, 0), + SyscallSucceedsWithValue(2 * kPieceSize)); + } + EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); +} + +TEST_P(UdpSocketTest, FIONREADShutdown) { + ASSERT_NO_ERRNO(BindLoopback()); + + int n = -1; + EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // A UDP socket must be connected before it can be shutdown. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); +} + +TEST_P(UdpSocketTest, FIONREADWriteShutdown) { + int n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + ASSERT_NO_ERRNO(BindLoopback()); + + // A UDP socket must be connected before it can be shutdown. + ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + const char str[] = "abc"; + ASSERT_THAT(send(bind_.get(), str, sizeof(str), 0), + SyscallSucceedsWithValue(sizeof(str))); + + struct pollfd pfd = {bind_.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + + n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, sizeof(str)); + + EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, sizeof(str)); +} + +// NOTE: Do not use `FIONREAD` as test name because it will be replaced by the +// corresponding macro and become `0x541B`. +TEST_P(UdpSocketTest, Fionread) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Check that the bound socket with an empty buffer reports an empty first + // packet. + int n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Send 3 packets from sock to bind_. + constexpr int psize = 100; + char buf[3 * psize]; + RandomizeBuffer(buf, sizeof(buf)); + + struct pollfd pfd = {bind_.get(), POLLIN, 0}; + for (int i = 0; i < 3; ++i) { + ASSERT_THAT( + sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(psize)); + + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + + // Check that regardless of how many packets are in the queue, the size + // reported is that of a single packet. + n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, psize); + } +} + +TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) { + ASSERT_NO_ERRNO(BindLoopback()); + + // Check that the bound socket with an empty buffer reports an empty first + // packet. + int n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Send 3 packets from sock to bind_. + constexpr int psize = 100; + char buf[3 * psize]; + RandomizeBuffer(buf, sizeof(buf)); + + struct pollfd pfd = {bind_.get(), POLLIN, 0}; + for (int i = 0; i < 3; ++i) { + ASSERT_THAT( + sendto(sock_.get(), buf + i * psize, 0, 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(0)); + + // TODO(gvisor.dev/issue/2726): sending a zero-length message to a hostinet + // socket does not cause a poll event to be triggered. + if (!IsRunningWithHostinet()) { + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + } + + // Check that regardless of how many packets are in the queue, the size + // reported is that of a single packet. + n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + } +} + +TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) { + int n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + ASSERT_NO_ERRNO(BindLoopback()); + + // A UDP socket must be connected before it can be shutdown. + ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + const char str[] = "abc"; + ASSERT_THAT(send(bind_.get(), str, 0, 0), SyscallSucceedsWithValue(0)); + + n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); +} + +TEST_P(UdpSocketTest, SoNoCheckOffByDefault) { + // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by + // hostinet. + SKIP_IF(IsRunningWithHostinet()); + + int v = -1; + socklen_t optlen = sizeof(v); + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen), + SyscallSucceeds()); + ASSERT_EQ(v, kSockOptOff); + ASSERT_EQ(optlen, sizeof(v)); +} + +TEST_P(UdpSocketTest, SoNoCheck) { + // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by + // hostinet. + SKIP_IF(IsRunningWithHostinet()); + + int v = kSockOptOn; + socklen_t optlen = sizeof(v); + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen), + SyscallSucceeds()); + v = -1; + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen), + SyscallSucceeds()); + ASSERT_EQ(v, kSockOptOn); + ASSERT_EQ(optlen, sizeof(v)); + + v = kSockOptOff; + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen), + SyscallSucceeds()); + v = -1; + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen), + SyscallSucceeds()); + ASSERT_EQ(v, kSockOptOff); + ASSERT_EQ(optlen, sizeof(v)); +} + +TEST_P(UdpSocketTest, SoTimestampOffByDefault) { + // TODO(gvisor.dev/issue/1202): SO_TIMESTAMP socket option not supported by + // hostinet. + SKIP_IF(IsRunningWithHostinet()); + + int v = -1; + socklen_t optlen = sizeof(v); + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, &optlen), + SyscallSucceeds()); + ASSERT_EQ(v, kSockOptOff); + ASSERT_EQ(optlen, sizeof(v)); +} + +TEST_P(UdpSocketTest, SoTimestamp) { + // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not + // supported by hostinet. + SKIP_IF(IsRunningWithHostinet()); + + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + int v = 1; + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), + SyscallSucceeds()); + + char buf[3]; + // Send zero length packet from sock to bind_. + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0), + SyscallSucceedsWithValue(0)); + + struct pollfd pfd = {bind_.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + + char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))]; + msghdr msg; + memset(&msg, 0, sizeof(msg)); + iovec iov; + memset(&iov, 0, sizeof(iov)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0), + SyscallSucceedsWithValue(0)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval))); + + struct timeval tv = {}; + memcpy(&tv, CMSG_DATA(cmsg), sizeof(struct timeval)); + + ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); + + // There should be nothing to get via ioctl. + ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), + SyscallFailsWithErrno(ENOENT)); +} + +TEST_P(UdpSocketTest, WriteShutdownNotConnected) { + EXPECT_THAT(shutdown(bind_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UdpSocketTest, TimestampIoctl) { + // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. + SKIP_IF(IsRunningWithHostinet()); + + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + char buf[3]; + // Send packet from sock to bind_. + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + struct pollfd pfd = {bind_.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + + // There should be no control messages. + char recv_buf[sizeof(buf)]; + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf))); + + // A nonzero timeval should be available via ioctl. + struct timeval tv = {}; + ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds()); + ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); +} + +TEST_P(UdpSocketTest, TimestampIoctlNothingRead) { + // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. + SKIP_IF(IsRunningWithHostinet()); + + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + struct timeval tv = {}; + ASSERT_THAT(ioctl(sock_.get(), SIOCGSTAMP, &tv), + SyscallFailsWithErrno(ENOENT)); +} + +// Test that the timestamp accessed via SIOCGSTAMP is still accessible after +// SO_TIMESTAMP is enabled and used to retrieve a timestamp. +TEST_P(UdpSocketTest, TimestampIoctlPersistence) { + // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not + // supported by hostinet. + SKIP_IF(IsRunningWithHostinet()); + + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + char buf[3]; + // Send packet from sock to bind_. + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0), + SyscallSucceedsWithValue(0)); + + struct pollfd pfd = {bind_.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + + // There should be no control messages. + char recv_buf[sizeof(buf)]; + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf))); + + // A nonzero timeval should be available via ioctl. + struct timeval tv = {}; + ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds()); + ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); + + // Enable SO_TIMESTAMP and send a message. + int v = 1; + EXPECT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), + SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0), + SyscallSucceedsWithValue(0)); + + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + + // There should be a message for SO_TIMESTAMP. + char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))]; + msghdr msg = {}; + iovec iov = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0), + SyscallSucceedsWithValue(0)); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + + // The ioctl should return the exact same values as before. + struct timeval tv2 = {}; + ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv2), SyscallSucceeds()); + ASSERT_EQ(tv.tv_sec, tv2.tv_sec); + ASSERT_EQ(tv.tv_usec, tv2.tv_usec); +} + +// Test that a socket with IP_TOS or IPV6_TCLASS set will set the TOS byte on +// outgoing packets, and that a receiving socket with IP_RECVTOS or +// IPV6_RECVTCLASS will create the corresponding control message. +TEST_P(UdpSocketTest, SetAndReceiveTOS) { + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + // Allow socket to receive control message. + int recv_level = SOL_IP; + int recv_type = IP_RECVTOS; + if (GetParam() != AddressFamily::kIpv4) { + recv_level = SOL_IPV6; + recv_type = IPV6_RECVTCLASS; + } + ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Set socket TOS. + int sent_level = recv_level; + int sent_type = IP_TOS; + if (sent_level == SOL_IPV6) { + sent_type = IPV6_TCLASS; + } + int sent_tos = IPTOS_LOWDELAY; // Choose some TOS value. + ASSERT_THAT(setsockopt(sock_.get(), sent_level, sent_type, &sent_tos, + sizeof(sent_tos)), + SyscallSucceeds()); + + // Prepare message to send. + constexpr size_t kDataLength = 1024; + struct msghdr sent_msg = {}; + struct iovec sent_iov = {}; + char sent_data[kDataLength]; + sent_iov.iov_base = &sent_data[0]; + sent_iov.iov_len = kDataLength; + sent_msg.msg_iov = &sent_iov; + sent_msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + // Receive message. + struct msghdr received_msg = {}; + struct iovec received_iov = {}; + char received_data[kDataLength]; + received_iov.iov_base = &received_data[0]; + received_iov.iov_len = kDataLength; + received_msg.msg_iov = &received_iov; + received_msg.msg_iovlen = 1; + size_t cmsg_data_len = sizeof(int8_t); + if (sent_type == IPV6_TCLASS) { + cmsg_data_len = sizeof(int); + } + std::vector<char> received_cmsgbuf(CMSG_SPACE(cmsg_data_len)); + received_msg.msg_control = &received_cmsgbuf[0]; + received_msg.msg_controllen = received_cmsgbuf.size(); + ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len)); + EXPECT_EQ(cmsg->cmsg_level, sent_level); + EXPECT_EQ(cmsg->cmsg_type, sent_type); + int8_t received_tos = 0; + memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos)); + EXPECT_EQ(received_tos, sent_tos); +} + +// Test that sendmsg with IP_TOS and IPV6_TCLASS control messages will set the +// TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or +// IPV6_RECVTCLASS will create the corresponding control message. +TEST_P(UdpSocketTest, SendAndReceiveTOS) { + // TODO(b/146661005): Setting TOS via cmsg not supported for netstack. + SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); + + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + // Allow socket to receive control message. + int recv_level = SOL_IP; + int recv_type = IP_RECVTOS; + if (GetParam() != AddressFamily::kIpv4) { + recv_level = SOL_IPV6; + recv_type = IPV6_RECVTCLASS; + } + int recv_opt = kSockOptOn; + ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &recv_opt, + sizeof(recv_opt)), + SyscallSucceeds()); + + // Prepare message to send. + constexpr size_t kDataLength = 1024; + int sent_level = recv_level; + int sent_type = IP_TOS; + int sent_tos = IPTOS_LOWDELAY; // Choose some TOS value. + + struct msghdr sent_msg = {}; + struct iovec sent_iov = {}; + char sent_data[kDataLength]; + sent_iov.iov_base = &sent_data[0]; + sent_iov.iov_len = kDataLength; + sent_msg.msg_iov = &sent_iov; + sent_msg.msg_iovlen = 1; + size_t cmsg_data_len = sizeof(int8_t); + if (sent_level == SOL_IPV6) { + sent_type = IPV6_TCLASS; + cmsg_data_len = sizeof(int); + } + std::vector<char> sent_cmsgbuf(CMSG_SPACE(cmsg_data_len)); + sent_msg.msg_control = &sent_cmsgbuf[0]; + sent_msg.msg_controllen = CMSG_LEN(cmsg_data_len); + + // Manually add control message. + struct cmsghdr* sent_cmsg = CMSG_FIRSTHDR(&sent_msg); + sent_cmsg->cmsg_len = CMSG_LEN(cmsg_data_len); + sent_cmsg->cmsg_level = sent_level; + sent_cmsg->cmsg_type = sent_type; + *(int8_t*)CMSG_DATA(sent_cmsg) = sent_tos; + + ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + // Receive message. + struct msghdr received_msg = {}; + struct iovec received_iov = {}; + char received_data[kDataLength]; + received_iov.iov_base = &received_data[0]; + received_iov.iov_len = kDataLength; + received_msg.msg_iov = &received_iov; + received_msg.msg_iovlen = 1; + std::vector<char> received_cmsgbuf(CMSG_SPACE(cmsg_data_len)); + received_msg.msg_control = &received_cmsgbuf[0]; + received_msg.msg_controllen = CMSG_LEN(cmsg_data_len); + ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len)); + EXPECT_EQ(cmsg->cmsg_level, sent_level); + EXPECT_EQ(cmsg->cmsg_type, sent_type); + int8_t received_tos = 0; + memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos)); + EXPECT_EQ(received_tos, sent_tos); +} + +TEST_P(UdpSocketTest, RecvBufLimitsEmptyRcvBuf) { + // Discover minimum buffer size by setting it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + + // Bind bind_ to loopback. + ASSERT_NO_ERRNO(BindLoopback()); + + { + // Send data of size min and verify that it's received. + std::vector<char> buf(min); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + std::vector<char> received(buf.size()); + EXPECT_THAT( + recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT), + SyscallSucceedsWithValue(received.size())); + } + + { + // Send data of size min + 1 and verify that its received. Both linux and + // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer + // is currently empty. + std::vector<char> buf(min + 1); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + + std::vector<char> received(buf.size()); + EXPECT_THAT( + recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT), + SyscallSucceedsWithValue(received.size())); + } +} + +// Test that receive buffer limits are enforced. +TEST_P(UdpSocketTest, RecvBufLimits) { + // Bind s_ to loopback. + ASSERT_NO_ERRNO(BindLoopback()); + + int min = 0; + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + // Now set the limit to min * 4. + int new_rcv_buf_sz = min * 4; + if (!IsRunningOnGvisor() || IsRunningWithHostinet()) { + // Linux doubles the value specified so just set to min * 2. + new_rcv_buf_sz = min * 2; + } + + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, + sizeof(new_rcv_buf_sz)), + SyscallSucceeds()); + int rcv_buf_sz = 0; + { + socklen_t rcv_buf_len = sizeof(rcv_buf_sz); + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, + &rcv_buf_len), + SyscallSucceeds()); + } + + { + std::vector<char> buf(min); + RandomizeBuffer(buf.data(), buf.size()); + + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + int sent = 4; + if (IsRunningOnGvisor() && !IsRunningWithHostinet()) { + // Linux seems to drop the 4th packet even though technically it should + // fit in the receive buffer. + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + sent++; + } + + for (int i = 0; i < sent - 1; i++) { + // Receive the data. + std::vector<char> received(buf.size()); + EXPECT_THAT( + recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT), + SyscallSucceedsWithValue(received.size())); + EXPECT_EQ(memcmp(buf.data(), received.data(), buf.size()), 0); + } + + // The last receive should fail with EAGAIN as the last packet should have + // been dropped due to lack of space in the receive buffer. + std::vector<char> received(buf.size()); + EXPECT_THAT( + recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + } +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/udp_socket_test_cases.h b/test/syscalls/linux/udp_socket_test_cases.h new file mode 100644 index 000000000..f7e25c805 --- /dev/null +++ b/test/syscalls/linux/udp_socket_test_cases.h @@ -0,0 +1,82 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ +#define THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ + +#include <sys/socket.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" + +namespace gvisor { +namespace testing { + +// The initial port to be be used on gvisor. +constexpr int TestPort = 40000; + +// Fixture for tests parameterized by the address family to use (AF_INET and +// AF_INET6) when creating sockets. +class UdpSocketTest + : public ::testing::TestWithParam<gvisor::testing::AddressFamily> { + protected: + // Creates two sockets that will be used by test cases. + void SetUp() override; + + // Binds the socket bind_ to the loopback and updates bind_addr_. + PosixError BindLoopback(); + + // Binds the socket bind_ to Any and updates bind_addr_. + PosixError BindAny(); + + // Binds given socket to address addr and updates. + PosixError BindSocket(int socket, struct sockaddr* addr); + + // Return initialized Any address to port 0. + struct sockaddr_storage InetAnyAddr(); + + // Return initialized Loopback address to port 0. + struct sockaddr_storage InetLoopbackAddr(); + + // Disconnects socket sockfd. + void Disconnect(int sockfd); + + // Get family for the test. + int GetFamily(); + + // Socket used by Bind methods + FileDescriptor bind_; + + // Second socket used for tests. + FileDescriptor sock_; + + // Address for bind_ socket. + struct sockaddr* bind_addr_; + + // Initialized to the length based on GetFamily(). + socklen_t addrlen_; + + // Storage for bind_addr_. + struct sockaddr_storage bind_addr_storage_; + + private: + // Helper to initialize addrlen_ for the test case. + socklen_t GetAddrLength(); +}; +} // namespace testing +} // namespace gvisor + +#endif // THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ diff --git a/test/syscalls/linux/uidgid.cc b/test/syscalls/linux/uidgid.cc new file mode 100644 index 000000000..64d6d0b8f --- /dev/null +++ b/test/syscalls/linux/uidgid.cc @@ -0,0 +1,276 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <grp.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "test/util/capability_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/uid_util.h" + +ABSL_FLAG(int32_t, scratch_uid1, 65534, "first scratch UID"); +ABSL_FLAG(int32_t, scratch_uid2, 65533, "second scratch UID"); +ABSL_FLAG(int32_t, scratch_gid1, 65534, "first scratch GID"); +ABSL_FLAG(int32_t, scratch_gid2, 65533, "second scratch GID"); + +using ::testing::UnorderedElementsAreArray; + +namespace gvisor { +namespace testing { + +namespace { + +TEST(UidGidTest, Getuid) { + uid_t ruid, euid, suid; + EXPECT_THAT(getresuid(&ruid, &euid, &suid), SyscallSucceeds()); + EXPECT_THAT(getuid(), SyscallSucceedsWithValue(ruid)); + EXPECT_THAT(geteuid(), SyscallSucceedsWithValue(euid)); +} + +TEST(UidGidTest, Getgid) { + gid_t rgid, egid, sgid; + EXPECT_THAT(getresgid(&rgid, &egid, &sgid), SyscallSucceeds()); + EXPECT_THAT(getgid(), SyscallSucceedsWithValue(rgid)); + EXPECT_THAT(getegid(), SyscallSucceedsWithValue(egid)); +} + +TEST(UidGidTest, Getgroups) { + // "If size is zero, list is not modified, but the total number of + // supplementary group IDs for the process is returned." - getgroups(2) + int nr_groups; + ASSERT_THAT(nr_groups = getgroups(0, nullptr), SyscallSucceeds()); + std::vector<gid_t> list(nr_groups); + EXPECT_THAT(getgroups(list.size(), list.data()), SyscallSucceeds()); + + // "EINVAL: size is less than the number of supplementary group IDs, but is + // not zero." + EXPECT_THAT(getgroups(-1, nullptr), SyscallFailsWithErrno(EINVAL)); + + // Testing for EFAULT requires actually having groups, which isn't guaranteed + // here; see the setgroups test below. +} + +// Checks that the calling process' real/effective/saved user IDs are +// ruid/euid/suid respectively. +PosixError CheckUIDs(uid_t ruid, uid_t euid, uid_t suid) { + uid_t actual_ruid, actual_euid, actual_suid; + int rc = getresuid(&actual_ruid, &actual_euid, &actual_suid); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "getresuid"); + } + if (ruid != actual_ruid || euid != actual_euid || suid != actual_suid) { + return PosixError( + EPERM, absl::StrCat( + "incorrect user IDs: got (", + absl::StrJoin({actual_ruid, actual_euid, actual_suid}, ", "), + ", wanted (", absl::StrJoin({ruid, euid, suid}, ", "), ")")); + } + return NoError(); +} + +PosixError CheckGIDs(gid_t rgid, gid_t egid, gid_t sgid) { + gid_t actual_rgid, actual_egid, actual_sgid; + int rc = getresgid(&actual_rgid, &actual_egid, &actual_sgid); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "getresgid"); + } + if (rgid != actual_rgid || egid != actual_egid || sgid != actual_sgid) { + return PosixError( + EPERM, absl::StrCat( + "incorrect group IDs: got (", + absl::StrJoin({actual_rgid, actual_egid, actual_sgid}, ", "), + ", wanted (", absl::StrJoin({rgid, egid, sgid}, ", "), ")")); + } + return NoError(); +} + +// N.B. These tests may break horribly unless run via a gVisor test runner, +// because changing UID in one test may forfeit permissions required by other +// tests. (The test runner runs each test in a separate process.) + +TEST(UidGidRootTest, Setuid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + EXPECT_THAT(syscall(SYS_setuid, -1), SyscallFailsWithErrno(EINVAL)); + + const uid_t uid = absl::GetFlag(FLAGS_scratch_uid1); + EXPECT_THAT(syscall(SYS_setuid, uid), SyscallSucceeds()); + // "If the effective UID of the caller is root (more precisely: if the + // caller has the CAP_SETUID capability), the real UID and saved set-user-ID + // are also set." - setuid(2) + EXPECT_NO_ERRNO(CheckUIDs(uid, uid, uid)); + }); +} + +TEST(UidGidRootTest, Setgid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + EXPECT_THAT(setgid(-1), SyscallFailsWithErrno(EINVAL)); + + const gid_t gid = absl::GetFlag(FLAGS_scratch_gid1); + ASSERT_THAT(setgid(gid), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(gid, gid, gid)); +} + +TEST(UidGidRootTest, SetgidNotFromThreadGroupLeader) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + const gid_t gid = absl::GetFlag(FLAGS_scratch_gid1); + // NOTE(b/64676707): Do setgid in a separate thread so that we can test if + // info.si_pid is set correctly. + ScopedThread([gid] { ASSERT_THAT(setgid(gid), SyscallSucceeds()); }); + EXPECT_NO_ERRNO(CheckGIDs(gid, gid, gid)); +} + +TEST(UidGidRootTest, Setreuid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + // "Supplying a value of -1 for either the real or effective user ID forces + // the system to leave that ID unchanged." - setreuid(2) + EXPECT_THAT(setreuid(-1, -1), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckUIDs(0, 0, 0)); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + const uid_t ruid = absl::GetFlag(FLAGS_scratch_uid1); + const uid_t euid = absl::GetFlag(FLAGS_scratch_uid2); + + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. posix threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + EXPECT_THAT(syscall(SYS_setreuid, ruid, euid), SyscallSucceeds()); + + // "If the real user ID is set or the effective user ID is set to a value + // not equal to the previous real user ID, the saved set-user-ID will be set + // to the new effective user ID." - setreuid(2) + EXPECT_NO_ERRNO(CheckUIDs(ruid, euid, euid)); + }); +} + +TEST(UidGidRootTest, Setregid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + EXPECT_THAT(setregid(-1, -1), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(0, 0, 0)); + + const gid_t rgid = absl::GetFlag(FLAGS_scratch_gid1); + const gid_t egid = absl::GetFlag(FLAGS_scratch_gid2); + ASSERT_THAT(setregid(rgid, egid), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(rgid, egid, egid)); +} + +TEST(UidGidRootTest, Setresuid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + // "If one of the arguments equals -1, the corresponding value is not + // changed." - setresuid(2) + EXPECT_THAT(setresuid(-1, -1, -1), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckUIDs(0, 0, 0)); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + const uid_t ruid = 12345; + const uid_t euid = 23456; + const uid_t suid = 34567; + + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. posix threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + EXPECT_THAT(syscall(SYS_setresuid, ruid, euid, suid), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckUIDs(ruid, euid, suid)); + }); +} + +TEST(UidGidRootTest, Setresgid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + EXPECT_THAT(setresgid(-1, -1, -1), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(0, 0, 0)); + + const gid_t rgid = 12345; + const gid_t egid = 23456; + const gid_t sgid = 34567; + ASSERT_THAT(setresgid(rgid, egid, sgid), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(rgid, egid, sgid)); +} + +TEST(UidGidRootTest, Setgroups) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + std::vector<gid_t> list = {123, 500}; + ASSERT_THAT(setgroups(list.size(), list.data()), SyscallSucceeds()); + std::vector<gid_t> list2(list.size()); + ASSERT_THAT(getgroups(list2.size(), list2.data()), SyscallSucceeds()); + EXPECT_THAT(list, UnorderedElementsAreArray(list2)); + + // "EFAULT: list has an invalid address." + EXPECT_THAT(getgroups(100, reinterpret_cast<gid_t*>(-1)), + SyscallFailsWithErrno(EFAULT)); +} + +TEST(UidGidRootTest, Setuid_prlimit) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + // Do seteuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. + ScopedThread([&] { + // Use syscall instead of glibc setuid wrapper because we want this seteuid + // call to only apply to this task. POSIX threads, however, require that all + // threads have the same UIDs, so using the seteuid wrapper sets all + // threads' UID. + EXPECT_THAT(syscall(SYS_setreuid, -1, 65534), SyscallSucceeds()); + + // Despite the UID change, we should be able to get our own limits. + struct rlimit rl = {}; + EXPECT_THAT(prlimit(0, RLIMIT_NOFILE, NULL, &rl), SyscallSucceeds()); + }); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/uname.cc b/test/syscalls/linux/uname.cc new file mode 100644 index 000000000..d8824b171 --- /dev/null +++ b/test/syscalls/linux/uname.cc @@ -0,0 +1,111 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> +#include <sys/utsname.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/util/capability_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(UnameTest, Sanity) { + struct utsname buf; + ASSERT_THAT(uname(&buf), SyscallSucceeds()); + EXPECT_NE(strlen(buf.release), 0); + EXPECT_NE(strlen(buf.version), 0); + EXPECT_NE(strlen(buf.machine), 0); + EXPECT_NE(strlen(buf.sysname), 0); + EXPECT_NE(strlen(buf.nodename), 0); + EXPECT_NE(strlen(buf.domainname), 0); +} + +TEST(UnameTest, SetNames) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + char hostname[65]; + ASSERT_THAT(sethostname("0123456789", 3), SyscallSucceeds()); + EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(hostname), "012"); + + ASSERT_THAT(sethostname("0123456789\0xxx", 11), SyscallSucceeds()); + EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(hostname), "0123456789"); + + ASSERT_THAT(sethostname("0123456789\0xxx", 12), SyscallSucceeds()); + EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(hostname), "0123456789"); + + constexpr char kHostname[] = "wubbalubba"; + ASSERT_THAT(sethostname(kHostname, sizeof(kHostname)), SyscallSucceeds()); + + constexpr char kDomainname[] = "dubdub.com"; + ASSERT_THAT(setdomainname(kDomainname, sizeof(kDomainname)), + SyscallSucceeds()); + + struct utsname buf; + EXPECT_THAT(uname(&buf), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(buf.nodename), kHostname); + EXPECT_EQ(absl::string_view(buf.domainname), kDomainname); + + // These should just be glibc wrappers that also call uname(2). + EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(hostname), kHostname); + + char domainname[65]; + EXPECT_THAT(getdomainname(domainname, sizeof(domainname)), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(domainname), kDomainname); +} + +TEST(UnameTest, UnprivilegedSetNames) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false)); + } + + EXPECT_THAT(sethostname("", 0), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(setdomainname("", 0), SyscallFailsWithErrno(EPERM)); +} + +TEST(UnameTest, UnshareUTS) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + struct utsname init; + ASSERT_THAT(uname(&init), SyscallSucceeds()); + + ScopedThread([&]() { + EXPECT_THAT(unshare(CLONE_NEWUTS), SyscallSucceeds()); + + constexpr char kHostname[] = "wubbalubba"; + EXPECT_THAT(sethostname(kHostname, sizeof(kHostname)), SyscallSucceeds()); + + char hostname[65]; + EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds()); + }); + + struct utsname after; + EXPECT_THAT(uname(&after), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(after.nodename), init.nodename); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/unix_domain_socket_test_util.cc b/test/syscalls/linux/unix_domain_socket_test_util.cc new file mode 100644 index 000000000..b05ab2900 --- /dev/null +++ b/test/syscalls/linux/unix_domain_socket_test_util.cc @@ -0,0 +1,351 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/unix_domain_socket_test_util.h" + +#include <sys/un.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::string DescribeUnixDomainSocketType(int type) { + const char* type_str = nullptr; + switch (type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC)) { + case SOCK_STREAM: + type_str = "SOCK_STREAM"; + break; + case SOCK_DGRAM: + type_str = "SOCK_DGRAM"; + break; + case SOCK_SEQPACKET: + type_str = "SOCK_SEQPACKET"; + break; + } + if (!type_str) { + return absl::StrCat("Unix domain socket with unknown type ", type); + } else { + return absl::StrCat(((type & SOCK_NONBLOCK) != 0) ? "non-blocking " : "", + ((type & SOCK_CLOEXEC) != 0) ? "close-on-exec " : "", + type_str, " Unix domain socket"); + } +} + +SocketPairKind UnixDomainSocketPair(int type) { + return SocketPairKind{DescribeUnixDomainSocketType(type), AF_UNIX, type, 0, + SyscallSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind FilesystemBoundUnixDomainSocketPair(int type) { + std::string description = absl::StrCat(DescribeUnixDomainSocketType(type), + " created with filesystem binding"); + if ((type & SOCK_DGRAM) == SOCK_DGRAM) { + return SocketPairKind{ + description, AF_UNIX, type, 0, + FilesystemBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)}; + } + return SocketPairKind{ + description, AF_UNIX, type, 0, + FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind AbstractBoundUnixDomainSocketPair(int type) { + std::string description = + absl::StrCat(DescribeUnixDomainSocketType(type), + " created with abstract namespace binding"); + if ((type & SOCK_DGRAM) == SOCK_DGRAM) { + return SocketPairKind{ + description, AF_UNIX, type, 0, + AbstractBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)}; + } + return SocketPairKind{description, AF_UNIX, type, 0, + AbstractAcceptBindSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind SocketpairGoferUnixDomainSocketPair(int type) { + std::string description = absl::StrCat(DescribeUnixDomainSocketType(type), + " created with the socketpair gofer"); + return SocketPairKind{description, AF_UNIX, type, 0, + SocketpairGoferSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind SocketpairGoferFileSocketPair(int type) { + std::string description = + absl::StrCat(((type & O_NONBLOCK) != 0) ? "non-blocking " : "", + ((type & O_CLOEXEC) != 0) ? "close-on-exec " : "", + "file socket created with the socketpair gofer"); + // The socketpair gofer always creates SOCK_STREAM sockets on open(2). + return SocketPairKind{description, AF_UNIX, SOCK_STREAM, 0, + SocketpairGoferFileSocketPairCreator(type)}; +} + +SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type) { + return SocketPairKind{absl::StrCat(DescribeUnixDomainSocketType(type), + " unbound with a filesystem address"), + AF_UNIX, type, 0, + FilesystemUnboundSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind AbstractUnboundUnixDomainSocketPair(int type) { + return SocketPairKind{ + absl::StrCat(DescribeUnixDomainSocketType(type), + " unbound with an abstract namespace address"), + AF_UNIX, type, 0, AbstractUnboundSocketPairCreator(AF_UNIX, type, 0)}; +} + +void SendSingleFD(int sock, int fd, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE(SendFDs(sock, &fd, 1, buf, buf_size)); +} + +void SendFDs(int sock, int fds[], int fds_size, char buf[], int buf_size) { + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(fds_size * sizeof(int))); + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(fds_size * sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + for (int i = 0; i < fds_size; i++) { + memcpy(CMSG_DATA(cmsg) + i * sizeof(int), &fds[i], sizeof(int)); + } + + ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size), + IsPosixErrorOkAndHolds(buf_size)); +} + +void RecvSingleFD(int sock, int* fd, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, buf_size)); +} + +void RecvSingleFD(int sock, int* fd, char buf[], int buf_size, + int expected_size) { + ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, expected_size)); +} + +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE( + RecvFDs(sock, fds, fds_size, buf, buf_size, buf_size)); +} + +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size, + int expected_size, bool peek) { + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(fds_size * sizeof(int))); + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + int flags = 0; + if (peek) { + flags |= MSG_PEEK; + } + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, flags), + SyscallSucceedsWithValue(expected_size)); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(fds_size * sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS); + + for (int i = 0; i < fds_size; i++) { + memcpy(&fds[i], CMSG_DATA(cmsg) + i * sizeof(int), sizeof(int)); + } +} + +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size, + int expected_size) { + ASSERT_NO_FATAL_FAILURE( + RecvFDs(sock, fds, fds_size, buf, buf_size, expected_size, false)); +} + +void PeekSingleFD(int sock, int* fd, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, buf_size, true)); +} + +void RecvNoCmsg(int sock, char buf[], int buf_size, int expected_size) { + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(struct ucred))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0), + SyscallSucceedsWithValue(expected_size)); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_EQ(cmsg, nullptr); +} + +void SendNullCmsg(int sock, char buf[], int buf_size) { + struct msghdr msg = {}; + msg.msg_control = nullptr; + msg.msg_controllen = 0; + + ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size), + IsPosixErrorOkAndHolds(buf_size)); +} + +void SendCreds(int sock, ucred creds, char buf[], int buf_size) { + struct msghdr msg = {}; + + char control[CMSG_SPACE(sizeof(struct ucred))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + memcpy(CMSG_DATA(cmsg), &creds, sizeof(struct ucred)); + + ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size), + IsPosixErrorOkAndHolds(buf_size)); +} + +void SendCredsAndFD(int sock, ucred creds, int fd, char buf[], int buf_size) { + struct msghdr msg = {}; + + char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = {}; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg1 = CMSG_FIRSTHDR(&msg); + cmsg1->cmsg_level = SOL_SOCKET; + cmsg1->cmsg_type = SCM_CREDENTIALS; + cmsg1->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + memcpy(CMSG_DATA(cmsg1), &creds, sizeof(struct ucred)); + + struct cmsghdr* cmsg2 = CMSG_NXTHDR(&msg, cmsg1); + cmsg2->cmsg_level = SOL_SOCKET; + cmsg2->cmsg_type = SCM_RIGHTS; + cmsg2->cmsg_len = CMSG_LEN(sizeof(int)); + memcpy(CMSG_DATA(cmsg2), &fd, sizeof(int)); + + ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size), + IsPosixErrorOkAndHolds(buf_size)); +} + +void RecvCreds(int sock, ucred* creds, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE(RecvCreds(sock, creds, buf, buf_size, buf_size)); +} + +void RecvCreds(int sock, ucred* creds, char buf[], int buf_size, + int expected_size) { + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(struct ucred))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0), + SyscallSucceedsWithValue(expected_size)); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct ucred))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); + + memcpy(creds, CMSG_DATA(cmsg), sizeof(struct ucred)); +} + +void RecvCredsAndFD(int sock, ucred* creds, int* fd, char buf[], int buf_size) { + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0), + SyscallSucceedsWithValue(buf_size)); + + struct cmsghdr* cmsg1 = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg1, nullptr); + ASSERT_EQ(cmsg1->cmsg_len, CMSG_LEN(sizeof(struct ucred))); + ASSERT_EQ(cmsg1->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg1->cmsg_type, SCM_CREDENTIALS); + memcpy(creds, CMSG_DATA(cmsg1), sizeof(struct ucred)); + + struct cmsghdr* cmsg2 = CMSG_NXTHDR(&msg, cmsg1); + ASSERT_NE(cmsg2, nullptr); + ASSERT_EQ(cmsg2->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg2->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg2->cmsg_type, SCM_RIGHTS); + memcpy(fd, CMSG_DATA(cmsg2), sizeof(int)); +} + +void RecvSingleFDUnaligned(int sock, int* fd, char buf[], int buf_size) { + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int)) - sizeof(int)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0), + SyscallSucceedsWithValue(buf_size)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS); + + memcpy(fd, CMSG_DATA(cmsg), sizeof(int)); +} + +void SetSoPassCred(int sock) { + int one = 1; + EXPECT_THAT(setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)), + SyscallSucceeds()); +} + +void UnsetSoPassCred(int sock) { + int zero = 0; + EXPECT_THAT(setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &zero, sizeof(zero)), + SyscallSucceeds()); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/unix_domain_socket_test_util.h b/test/syscalls/linux/unix_domain_socket_test_util.h new file mode 100644 index 000000000..b8073db17 --- /dev/null +++ b/test/syscalls/linux/unix_domain_socket_test_util.h @@ -0,0 +1,162 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_ + +#include <string> + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// DescribeUnixDomainSocketType returns a human-readable string explaining the +// given Unix domain socket type. +std::string DescribeUnixDomainSocketType(int type); + +// UnixDomainSocketPair returns a SocketPairKind that represents SocketPairs +// created by invoking the socketpair() syscall with AF_UNIX and the given type. +SocketPairKind UnixDomainSocketPair(int type); + +// FilesystemBoundUnixDomainSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with a temp file path, +// AF_UNIX and the given type. +SocketPairKind FilesystemBoundUnixDomainSocketPair(int type); + +// AbstractBoundUnixDomainSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with a temp abstract +// path, AF_UNIX and the given type. +SocketPairKind AbstractBoundUnixDomainSocketPair(int type); + +// SocketpairGoferUnixDomainSocketPair returns a SocketPairKind that was created +// with two sockets connected to the socketpair gofer. +SocketPairKind SocketpairGoferUnixDomainSocketPair(int type); + +// SocketpairGoferFileSocketPair returns a SocketPairKind that was created with +// two open() calls on paths backed by the socketpair gofer. +SocketPairKind SocketpairGoferFileSocketPair(int type); + +// FilesystemUnboundUnixDomainSocketPair returns a SocketPairKind that +// represents two unbound sockets and a filesystem path for binding. +SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type); + +// AbstractUnboundUnixDomainSocketPair returns a SocketPairKind that represents +// two unbound sockets and an abstract namespace path for binding. +SocketPairKind AbstractUnboundUnixDomainSocketPair(int type); + +// SendSingleFD sends both a single FD and some data over a unix domain socket +// specified by an FD. Note that calls to this function must be wrapped in +// ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void SendSingleFD(int sock, int fd, char buf[], int buf_size); + +// SendFDs sends an arbitrary number of FDs and some data over a unix domain +// socket specified by an FD. Note that calls to this function must be wrapped +// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void SendFDs(int sock, int fds[], int fds_size, char buf[], int buf_size); + +// RecvSingleFD receives both a single FD and some data over a unix domain +// socket specified by an FD. Note that calls to this function must be wrapped +// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void RecvSingleFD(int sock, int* fd, char buf[], int buf_size); + +// RecvSingleFD receives both a single FD and some data over a unix domain +// socket specified by an FD. This version allows the expected amount of data +// received to be different than the buffer size. Note that calls to this +// function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions +// to halt the test. +void RecvSingleFD(int sock, int* fd, char buf[], int buf_size, + int expected_size); + +// PeekSingleFD peeks at both a single FD and some data over a unix domain +// socket specified by an FD. Note that calls to this function must be wrapped +// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void PeekSingleFD(int sock, int* fd, char buf[], int buf_size); + +// RecvFDs receives both an arbitrary number of FDs and some data over a unix +// domain socket specified by an FD. Note that calls to this function must be +// wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size); + +// RecvFDs receives both an arbitrary number of FDs and some data over a unix +// domain socket specified by an FD. This version allows the expected amount of +// data received to be different than the buffer size. Note that calls to this +// function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions +// to halt the test. +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size, + int expected_size); + +// RecvNoCmsg receives some data over a unix domain socket specified by an FD +// and asserts that no control messages are available for receiving. Note that +// calls to this function must be wrapped in ASSERT_NO_FATAL_FAILURE for +// internal assertions to halt the test. +void RecvNoCmsg(int sock, char buf[], int buf_size, int expected_size); + +inline void RecvNoCmsg(int sock, char buf[], int buf_size) { + RecvNoCmsg(sock, buf, buf_size, buf_size); +} + +// SendCreds sends the credentials of the current process and some data over a +// unix domain socket specified by an FD. Note that calls to this function must +// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the +// test. +void SendCreds(int sock, ucred creds, char buf[], int buf_size); + +// SendCredsAndFD sends the credentials of the current process, a single FD, and +// some data over a unix domain socket specified by an FD. Note that calls to +// this function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal +// assertions to halt the test. +void SendCredsAndFD(int sock, ucred creds, int fd, char buf[], int buf_size); + +// RecvCreds receives some credentials and some data over a unix domain socket +// specified by an FD. Note that calls to this function must be wrapped in +// ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void RecvCreds(int sock, ucred* creds, char buf[], int buf_size); + +// RecvCreds receives some credentials and some data over a unix domain socket +// specified by an FD. This version allows the expected amount of data received +// to be different than the buffer size. Note that calls to this function must +// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the +// test. +void RecvCreds(int sock, ucred* creds, char buf[], int buf_size, + int expected_size); + +// RecvCredsAndFD receives some credentials, a single FD, and some data over a +// unix domain socket specified by an FD. Note that calls to this function must +// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the +// test. +void RecvCredsAndFD(int sock, ucred* creds, int* fd, char buf[], int buf_size); + +// SendNullCmsg sends a null control message and some data over a unix domain +// socket specified by an FD. Note that calls to this function must be wrapped +// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void SendNullCmsg(int sock, char buf[], int buf_size); + +// RecvSingleFDUnaligned sends both a single FD and some data over a unix domain +// socket specified by an FD. This function does not obey the spec, but Linux +// allows it and the apphosting code depends on this quirk. Note that calls to +// this function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal +// assertions to halt the test. +void RecvSingleFDUnaligned(int sock, int* fd, char buf[], int buf_size); + +// SetSoPassCred sets the SO_PASSCRED option on the specified socket. +void SetSoPassCred(int sock); + +// UnsetSoPassCred clears the SO_PASSCRED option on the specified socket. +void UnsetSoPassCred(int sock); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_ diff --git a/test/syscalls/linux/unlink.cc b/test/syscalls/linux/unlink.cc new file mode 100644 index 000000000..2040375c9 --- /dev/null +++ b/test/syscalls/linux/unlink.cc @@ -0,0 +1,214 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(UnlinkTest, IsDir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + EXPECT_THAT(unlink(dir.path().c_str()), SyscallFailsWithErrno(EISDIR)); +} + +TEST(UnlinkTest, DirNotEmpty) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + int fd; + std::string path = JoinPath(dir.path(), "ExistingFile"); + EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(ENOTEMPTY)); +} + +TEST(UnlinkTest, Rmdir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds()); +} + +TEST(UnlinkTest, AtDir) { + int dirfd; + auto tmpdir = GetAbsoluteTestTmpdir(); + EXPECT_THAT(dirfd = open(tmpdir.c_str(), O_DIRECTORY, 0), SyscallSucceeds()); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(tmpdir)); + auto dir_relpath = + ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(tmpdir, dir.path())); + EXPECT_THAT(unlinkat(dirfd, dir_relpath.c_str(), AT_REMOVEDIR), + SyscallSucceeds()); + ASSERT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(UnlinkTest, AtDirDegradedPermissions_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + int dirfd; + ASSERT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0), + SyscallSucceeds()); + + std::string sub_dir = JoinPath(dir.path(), "NewDir"); + EXPECT_THAT(mkdir(sub_dir.c_str(), 0755), SyscallSucceeds()); + EXPECT_THAT(fchmod(dirfd, 0444), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR), + SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(close(dirfd), SyscallSucceeds()); +} + +// Files cannot be unlinked if the parent is not writable and executable. +TEST(UnlinkTest, ParentDegradedPermissions) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + ASSERT_THAT(chmod(dir.path().c_str(), 0000), SyscallSucceeds()); + + struct stat st; + ASSERT_THAT(stat(file.path().c_str(), &st), SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(unlinkat(AT_FDCWD, file.path().c_str(), 0), + SyscallFailsWithErrno(EACCES)); + + // Non-existent files also return EACCES. + const std::string nonexist = JoinPath(dir.path(), "doesnotexist"); + ASSERT_THAT(stat(nonexist.c_str(), &st), SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(unlinkat(AT_FDCWD, nonexist.c_str(), 0), + SyscallFailsWithErrno(EACCES)); +} + +TEST(UnlinkTest, AtBad) { + int dirfd; + EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0), + SyscallSucceeds()); + + // Try removing a directory as a file. + std::string path = JoinPath(GetAbsoluteTestTmpdir(), "NewDir"); + EXPECT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "NewDir", 0), SyscallFailsWithErrno(EISDIR)); + EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR), SyscallSucceeds()); + + // Try removing a file as a directory. + int fd; + EXPECT_THAT(fd = openat(dirfd, "UnlinkAtFile", O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", AT_REMOVEDIR), + SyscallFailsWithErrno(ENOTDIR)); + EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile/", 0), + SyscallFailsWithErrno(ENOTDIR)); + ASSERT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", 0), SyscallSucceeds()); + + // Cleanup. + ASSERT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(UnlinkTest, AbsTmpFile) { + int fd; + std::string path = JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile"); + EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(unlink(path.c_str()), SyscallSucceeds()); +} + +TEST(UnlinkTest, TooLongName) { + EXPECT_THAT(unlink(std::vector<char>(16384, '0').data()), + SyscallFailsWithErrno(ENAMETOOLONG)); +} + +TEST(UnlinkTest, BadNamePtr) { + EXPECT_THAT(unlink(reinterpret_cast<char*>(1)), + SyscallFailsWithErrno(EFAULT)); +} + +TEST(UnlinkTest, AtFile) { + int dirfd; + EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0666), + SyscallSucceeds()); + int fd; + EXPECT_THAT(fd = openat(dirfd, "UnlinkAtFile", O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", 0), SyscallSucceeds()); +} + +TEST(UnlinkTest, OpenFile_NoRandomSave) { + // We can't save unlinked file unless they are on tmpfs. + const DisableSave ds; + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + int fd; + EXPECT_THAT(fd = open(file.path().c_str(), O_RDWR, 0666), SyscallSucceeds()); + EXPECT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(UnlinkTest, CannotRemoveDots) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string self = JoinPath(file.path(), "."); + ASSERT_THAT(unlink(self.c_str()), SyscallFailsWithErrno(ENOTDIR)); + const std::string parent = JoinPath(file.path(), ".."); + ASSERT_THAT(unlink(parent.c_str()), SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(UnlinkTest, CannotRemoveRoot) { + ASSERT_THAT(unlinkat(-1, "/", AT_REMOVEDIR), SyscallFailsWithErrno(EBUSY)); +} + +TEST(UnlinkTest, CannotRemoveRootWithAtDir) { + const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE( + Open(GetAbsoluteTestTmpdir(), O_DIRECTORY, 0666)); + ASSERT_THAT(unlinkat(dirfd.get(), "/", AT_REMOVEDIR), + SyscallFailsWithErrno(EBUSY)); +} + +TEST(RmdirTest, CannotRemoveDots) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string self = JoinPath(dir.path(), "."); + ASSERT_THAT(rmdir(self.c_str()), SyscallFailsWithErrno(EINVAL)); + const std::string parent = JoinPath(dir.path(), ".."); + ASSERT_THAT(rmdir(parent.c_str()), SyscallFailsWithErrno(ENOTEMPTY)); +} + +TEST(RmdirTest, CanRemoveWithTrailingSlashes) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string slash = absl::StrCat(dir1.path(), "/"); + ASSERT_THAT(rmdir(slash.c_str()), SyscallSucceeds()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string slashslash = absl::StrCat(dir2.path(), "//"); + ASSERT_THAT(rmdir(slashslash.c_str()), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/unshare.cc b/test/syscalls/linux/unshare.cc new file mode 100644 index 000000000..e32619efe --- /dev/null +++ b/test/syscalls/linux/unshare.cc @@ -0,0 +1,50 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sched.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/synchronization/mutex.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(UnshareTest, AllowsZeroFlags) { + ASSERT_THAT(unshare(0), SyscallSucceeds()); +} + +TEST(UnshareTest, ThreadFlagFailsIfMultithreaded) { + absl::Mutex mu; + bool finished = false; + ScopedThread t([&] { + mu.Lock(); + mu.Await(absl::Condition(&finished)); + mu.Unlock(); + }); + ASSERT_THAT(unshare(CLONE_THREAD), SyscallFailsWithErrno(EINVAL)); + mu.Lock(); + finished = true; + mu.Unlock(); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc new file mode 100644 index 000000000..e647d2896 --- /dev/null +++ b/test/syscalls/linux/utimes.cc @@ -0,0 +1,319 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> +#include <utime.h> + +#include <string> + +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// TimeBoxed runs fn, setting before and after to (coarse realtime) times +// guaranteed* to come before and after fn started and completed, respectively. +// +// fn may be called more than once if the clock is adjusted. +void TimeBoxed(absl::Time* before, absl::Time* after, + std::function<void()> const& fn) { + do { + // N.B. utimes and friends use CLOCK_REALTIME_COARSE for setting time (i.e., + // current_kernel_time()). See fs/attr.c:notify_change. + // + // notify_change truncates the time to a multiple of s_time_gran, but most + // filesystems set it to 1, so we don't do any truncation. + struct timespec ts; + EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds()); + // FIXME(b/132819225): gVisor filesystem timestamps inconsistently use the + // internal or host clock, which may diverge slightly. Allow some slack on + // times to account for the difference. + *before = absl::TimeFromTimespec(ts) - absl::Seconds(1); + + fn(); + + EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds()); + *after = absl::TimeFromTimespec(ts) + absl::Seconds(1); + + if (*after < *before) { + // Clock jumped backwards; retry. + // + // Technically this misses jumps small enough to keep after > before, + // which could lead to test failures, but that is very unlikely to happen. + continue; + } + } while (*after < *before); +} + +void TestUtimesOnPath(std::string const& path) { + struct stat statbuf; + + struct timeval times[2] = {{10, 0}, {20, 0}}; + EXPECT_THAT(utimes(path.c_str(), times), SyscallSucceeds()); + EXPECT_THAT(stat(path.c_str(), &statbuf), SyscallSucceeds()); + EXPECT_EQ(10, statbuf.st_atime); + EXPECT_EQ(20, statbuf.st_mtime); + + absl::Time before; + absl::Time after; + TimeBoxed(&before, &after, [&] { + EXPECT_THAT(utimes(path.c_str(), nullptr), SyscallSucceeds()); + }); + + EXPECT_THAT(stat(path.c_str(), &statbuf), SyscallSucceeds()); + + absl::Time atime = absl::TimeFromTimespec(statbuf.st_atim); + EXPECT_GE(atime, before); + EXPECT_LE(atime, after); + + absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim); + EXPECT_GE(mtime, before); + EXPECT_LE(mtime, after); +} + +TEST(UtimesTest, OnFile) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + TestUtimesOnPath(f.path()); +} + +TEST(UtimesTest, OnDir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TestUtimesOnPath(dir.path()); +} + +TEST(UtimesTest, MissingPath) { + auto path = NewTempAbsPath(); + struct timeval times[2] = {{10, 0}, {20, 0}}; + EXPECT_THAT(utimes(path.c_str(), times), SyscallFailsWithErrno(ENOENT)); +} + +void TestFutimesat(int dirFd, std::string const& path) { + struct stat statbuf; + + struct timeval times[2] = {{10, 0}, {20, 0}}; + EXPECT_THAT(futimesat(dirFd, path.c_str(), times), SyscallSucceeds()); + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds()); + EXPECT_EQ(10, statbuf.st_atime); + EXPECT_EQ(20, statbuf.st_mtime); + + absl::Time before; + absl::Time after; + TimeBoxed(&before, &after, [&] { + EXPECT_THAT(futimesat(dirFd, path.c_str(), nullptr), SyscallSucceeds()); + }); + + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds()); + + absl::Time atime = absl::TimeFromTimespec(statbuf.st_atim); + EXPECT_GE(atime, before); + EXPECT_LE(atime, after); + + absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim); + EXPECT_GE(mtime, before); + EXPECT_LE(mtime, after); +} + +TEST(FutimesatTest, OnAbsPath) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + TestFutimesat(0, f.path()); +} + +TEST(FutimesatTest, OnRelPath) { + auto d = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(d.path())); + auto basename = std::string(Basename(f.path())); + const FileDescriptor dirFd = + ASSERT_NO_ERRNO_AND_VALUE(Open(d.path(), O_RDONLY | O_DIRECTORY)); + TestFutimesat(dirFd.get(), basename); +} + +TEST(FutimesatTest, InvalidNsec) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + struct timeval times[4][2] = {{ + {0, 1}, // Valid + {1, static_cast<int64_t>(1e7)} // Invalid + }, + { + {1, static_cast<int64_t>(1e7)}, // Invalid + {0, 1} // Valid + }, + { + {0, 1}, // Valid + {1, -1} // Invalid + }, + { + {1, -1}, // Invalid + {0, 1} // Valid + }}; + + for (unsigned int i = 0; i < sizeof(times) / sizeof(times[0]); i++) { + std::cout << "test:" << i << "\n"; + EXPECT_THAT(futimesat(0, f.path().c_str(), times[i]), + SyscallFailsWithErrno(EINVAL)); + } +} + +void TestUtimensat(int dirFd, std::string const& path) { + struct stat statbuf; + const struct timespec times[2] = {{10, 0}, {20, 0}}; + EXPECT_THAT(utimensat(dirFd, path.c_str(), times, 0), SyscallSucceeds()); + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds()); + EXPECT_EQ(10, statbuf.st_atime); + EXPECT_EQ(20, statbuf.st_mtime); + + // Test setting with UTIME_NOW and UTIME_OMIT. + struct stat statbuf2; + const struct timespec times2[2] = { + {0, UTIME_NOW}, // Should set atime to now. + {0, UTIME_OMIT} // Should not change mtime. + }; + + absl::Time before; + absl::Time after; + TimeBoxed(&before, &after, [&] { + EXPECT_THAT(utimensat(dirFd, path.c_str(), times2, 0), SyscallSucceeds()); + }); + + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf2, 0), SyscallSucceeds()); + + absl::Time atime2 = absl::TimeFromTimespec(statbuf2.st_atim); + EXPECT_GE(atime2, before); + EXPECT_LE(atime2, after); + + absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim); + absl::Time mtime2 = absl::TimeFromTimespec(statbuf2.st_mtim); + // mtime should not be changed. + EXPECT_EQ(mtime, mtime2); + + // Test setting with times = NULL. Should set both atime and mtime to the + // current system time. + struct stat statbuf3; + TimeBoxed(&before, &after, [&] { + EXPECT_THAT(utimensat(dirFd, path.c_str(), nullptr, 0), SyscallSucceeds()); + }); + + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf3, 0), SyscallSucceeds()); + + absl::Time atime3 = absl::TimeFromTimespec(statbuf3.st_atim); + EXPECT_GE(atime3, before); + EXPECT_LE(atime3, after); + + absl::Time mtime3 = absl::TimeFromTimespec(statbuf3.st_mtim); + EXPECT_GE(mtime3, before); + EXPECT_LE(mtime3, after); + + EXPECT_EQ(atime3, mtime3); +} + +TEST(UtimensatTest, OnAbsPath) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + TestUtimensat(0, f.path()); +} + +TEST(UtimensatTest, OnRelPath) { + auto d = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(d.path())); + auto basename = std::string(Basename(f.path())); + const FileDescriptor dirFd = + ASSERT_NO_ERRNO_AND_VALUE(Open(d.path(), O_RDONLY | O_DIRECTORY)); + TestUtimensat(dirFd.get(), basename); +} + +TEST(UtimensatTest, OmitNoop) { + // Setting both timespecs to UTIME_OMIT on a nonexistant path should succeed. + auto path = NewTempAbsPath(); + const struct timespec times[2] = {{0, UTIME_OMIT}, {0, UTIME_OMIT}}; + EXPECT_THAT(utimensat(0, path.c_str(), times, 0), SyscallSucceeds()); +} + +// Verify that we can actually set atime and mtime to 0. +TEST(UtimeTest, ZeroAtimeandMtime) { + const auto tmp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const auto tmp_file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(tmp_dir.path())); + + // Stat the file before and after updating atime and mtime. + struct stat stat_before = {}; + EXPECT_THAT(stat(tmp_file.path().c_str(), &stat_before), SyscallSucceeds()); + + ASSERT_NE(stat_before.st_atime, 0); + ASSERT_NE(stat_before.st_mtime, 0); + + const struct utimbuf times = {}; // Zero for both atime and mtime. + EXPECT_THAT(utime(tmp_file.path().c_str(), ×), SyscallSucceeds()); + + struct stat stat_after = {}; + EXPECT_THAT(stat(tmp_file.path().c_str(), &stat_after), SyscallSucceeds()); + + // We should see the atime and mtime changed when we set them to 0. + ASSERT_EQ(stat_after.st_atime, 0); + ASSERT_EQ(stat_after.st_mtime, 0); +} + +TEST(UtimensatTest, InvalidNsec) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + struct timespec times[2][2] = { + { + {0, UTIME_OMIT}, // Valid + {2, static_cast<int64_t>(1e10)} // Invalid + }, + { + {2, static_cast<int64_t>(1e10)}, // Invalid + {0, UTIME_OMIT} // Valid + }}; + + for (unsigned int i = 0; i < sizeof(times) / sizeof(times[0]); i++) { + std::cout << "test:" << i << "\n"; + EXPECT_THAT(utimensat(0, f.path().c_str(), times[i], 0), + SyscallFailsWithErrno(EINVAL)); + } +} + +TEST(Utimensat, NullPath) { + // From man utimensat(2): + // "the Linux utimensat() system call implements a nonstandard feature: if + // pathname is NULL, then the call modifies the timestamps of the file + // referred to by the file descriptor dirfd (which may refer to any type of + // file). + // Note, however, that the glibc wrapper for utimensat() disallows + // passing NULL as the value for file: the wrapper function returns the error + // EINVAL in this case." + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + struct stat statbuf; + const struct timespec times[2] = {{10, 0}, {20, 0}}; + // Call syscall directly. + EXPECT_THAT(syscall(SYS_utimensat, fd.get(), NULL, times, 0), + SyscallSucceeds()); + EXPECT_THAT(fstatat(0, f.path().c_str(), &statbuf, 0), SyscallSucceeds()); + EXPECT_EQ(10, statbuf.st_atime); + EXPECT_EQ(20, statbuf.st_mtime); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/vdso.cc b/test/syscalls/linux/vdso.cc new file mode 100644 index 000000000..19c80add8 --- /dev/null +++ b/test/syscalls/linux/vdso.cc @@ -0,0 +1,48 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <string.h> +#include <sys/mman.h> + +#include <algorithm> + +#include "gtest/gtest.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/proc_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Ensure that the vvar page cannot be made writable. +TEST(VvarTest, WriteVvar) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + auto maps = ASSERT_NO_ERRNO_AND_VALUE(ParseProcMaps(contents)); + auto it = std::find_if(maps.begin(), maps.end(), [](const ProcMapsEntry& e) { + return e.filename == "[vvar]"; + }); + + SKIP_IF(it == maps.end()); + EXPECT_THAT(mprotect(reinterpret_cast<void*>(it->start), kPageSize, + PROT_READ | PROT_WRITE), + SyscallFailsWithErrno(EACCES)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/vdso_clock_gettime.cc b/test/syscalls/linux/vdso_clock_gettime.cc new file mode 100644 index 000000000..ce1899f45 --- /dev/null +++ b/test/syscalls/linux/vdso_clock_gettime.cc @@ -0,0 +1,108 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdint.h> +#include <sys/time.h> +#include <syscall.h> +#include <time.h> +#include <unistd.h> + +#include <map> +#include <string> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +std::string PrintClockId(::testing::TestParamInfo<clockid_t> info) { + switch (info.param) { + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + default: + return absl::StrCat(info.param); + } +} + +class CorrectVDSOClockTest : public ::testing::TestWithParam<clockid_t> {}; + +TEST_P(CorrectVDSOClockTest, IsCorrect) { + struct timespec tvdso, tsys; + absl::Time vdso_time, sys_time; + uint64_t total_calls = 0; + + // It is expected that 82.5% of clock_gettime calls will be less than 100us + // skewed from the system time. + // Unfortunately this is not only influenced by the VDSO clock skew, but also + // by arbitrary scheduling delays and the like. The test is therefore + // regularly disabled. + std::map<absl::Duration, std::tuple<double, uint64_t, uint64_t>> confidence = + { + {absl::Microseconds(100), std::make_tuple(0.825, 0, 0)}, + {absl::Microseconds(250), std::make_tuple(0.94, 0, 0)}, + {absl::Milliseconds(1), std::make_tuple(0.999, 0, 0)}, + }; + + absl::Time start = absl::Now(); + while (absl::Now() < start + absl::Seconds(30)) { + EXPECT_THAT(clock_gettime(GetParam(), &tvdso), SyscallSucceeds()); + EXPECT_THAT(syscall(__NR_clock_gettime, GetParam(), &tsys), + SyscallSucceeds()); + + vdso_time = absl::TimeFromTimespec(tvdso); + + for (auto const& conf : confidence) { + std::get<1>(confidence[conf.first]) += + (sys_time - vdso_time) < conf.first; + } + + sys_time = absl::TimeFromTimespec(tsys); + + for (auto const& conf : confidence) { + std::get<2>(confidence[conf.first]) += + (vdso_time - sys_time) < conf.first; + } + + ++total_calls; + } + + for (auto const& conf : confidence) { + EXPECT_GE(std::get<1>(conf.second) / static_cast<double>(total_calls), + std::get<0>(conf.second)); + EXPECT_GE(std::get<2>(conf.second) / static_cast<double>(total_calls), + std::get<0>(conf.second)); + } +} + +INSTANTIATE_TEST_SUITE_P(ClockGettime, CorrectVDSOClockTest, + ::testing::Values(CLOCK_MONOTONIC, CLOCK_REALTIME, + CLOCK_BOOTTIME), + PrintClockId); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/vfork.cc b/test/syscalls/linux/vfork.cc new file mode 100644 index 000000000..19d05998e --- /dev/null +++ b/test/syscalls/linux/vfork.cc @@ -0,0 +1,195 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <string> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/test_util.h" +#include "test/util/time_util.h" + +ABSL_FLAG(bool, vfork_test_child, false, + "If true, run the VforkTest child workload."); + +namespace gvisor { +namespace testing { + +namespace { + +// We don't test with raw CLONE_VFORK to avoid interacting with glibc's use of +// TLS. +// +// Even with vfork(2), we must be careful to do little more in the child than +// call execve(2). We use the simplest sleep function possible, though this is +// still precarious, as we're officially only allowed to call execve(2) and +// _exit(2). +constexpr absl::Duration kChildDelay = absl::Seconds(10); + +// Exit code for successful child subprocesses. We don't want to use 0 since +// it's too common, and an execve(2) failure causes the child to exit with the +// errno, so kChildExitCode is chosen to be an unlikely errno: +constexpr int kChildExitCode = 118; // ENOTNAM: Not a XENIX named type file + +int64_t MonotonicNow() { + struct timespec now; + TEST_PCHECK(clock_gettime(CLOCK_MONOTONIC, &now) == 0); + return now.tv_sec * 1000000000ll + now.tv_nsec; +} + +TEST(VforkTest, ParentStopsUntilChildExits) { + const auto test = [] { + // N.B. Run the test in a single-threaded subprocess because + // vfork is not safe in a multi-threaded process. + + const int64_t start = MonotonicNow(); + + pid_t pid = vfork(); + if (pid == 0) { + SleepSafe(kChildDelay); + _exit(kChildExitCode); + } + TEST_PCHECK_MSG(pid > 0, "vfork failed"); + MaybeSave(); + + const int64_t end = MonotonicNow(); + + absl::Duration dur = absl::Nanoseconds(end - start); + + TEST_CHECK(dur >= kChildDelay); + + int status = 0; + TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0)); + TEST_CHECK(WIFEXITED(status)); + TEST_CHECK(WEXITSTATUS(status) == kChildExitCode); + }; + + EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0)); +} + +TEST(VforkTest, ParentStopsUntilChildExecves_NoRandomSave) { + ExecveArray const owned_child_argv = {"/proc/self/exe", "--vfork_test_child"}; + char* const* const child_argv = owned_child_argv.get(); + + const auto test = [&] { + const int64_t start = MonotonicNow(); + + pid_t pid = vfork(); + if (pid == 0) { + SleepSafe(kChildDelay); + execve(child_argv[0], child_argv, /* envp = */ nullptr); + _exit(errno); + } + // Don't attempt save/restore until after recording end_time, + // since the test expects an upper bound on the time spent + // stopped. + int saved_errno = errno; + const int64_t end = MonotonicNow(); + errno = saved_errno; + TEST_PCHECK_MSG(pid > 0, "vfork failed"); + MaybeSave(); + + absl::Duration dur = absl::Nanoseconds(end - start); + + // The parent should resume execution after execve, but before + // the post-execve test child exits. + TEST_CHECK(dur >= kChildDelay); + TEST_CHECK(dur <= 2 * kChildDelay); + + int status = 0; + TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0)); + TEST_CHECK(WIFEXITED(status)); + TEST_CHECK(WEXITSTATUS(status) == kChildExitCode); + }; + + EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0)); +} + +// A vfork child does not unstop the parent a second time when it exits after +// exec. +TEST(VforkTest, ExecedChildExitDoesntUnstopParent_NoRandomSave) { + ExecveArray const owned_child_argv = {"/proc/self/exe", "--vfork_test_child"}; + char* const* const child_argv = owned_child_argv.get(); + + const auto test = [&] { + pid_t pid1 = vfork(); + if (pid1 == 0) { + execve(child_argv[0], child_argv, /* envp = */ nullptr); + _exit(errno); + } + TEST_PCHECK_MSG(pid1 > 0, "vfork failed"); + MaybeSave(); + + // pid1 exec'd and is now sleeping. + SleepSafe(kChildDelay / 2); + + const int64_t start = MonotonicNow(); + + pid_t pid2 = vfork(); + if (pid2 == 0) { + SleepSafe(kChildDelay); + _exit(kChildExitCode); + } + TEST_PCHECK_MSG(pid2 > 0, "vfork failed"); + MaybeSave(); + + const int64_t end = MonotonicNow(); + + absl::Duration dur = absl::Nanoseconds(end - start); + + // The parent should resume execution only after pid2 exits, not + // when pid1 exits. + TEST_CHECK(dur >= kChildDelay); + + int status = 0; + TEST_PCHECK(RetryEINTR(waitpid)(pid1, &status, 0)); + TEST_CHECK(WIFEXITED(status)); + TEST_CHECK(WEXITSTATUS(status) == kChildExitCode); + + TEST_PCHECK(RetryEINTR(waitpid)(pid2, &status, 0)); + TEST_CHECK(WIFEXITED(status)); + TEST_CHECK(WEXITSTATUS(status) == kChildExitCode); + }; + + EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0)); +} + +int RunChild() { + SleepSafe(kChildDelay); + return kChildExitCode; +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (absl::GetFlag(FLAGS_vfork_test_child)) { + return gvisor::testing::RunChild(); + } + + return gvisor::testing::RunAllTests(); +} diff --git a/test/syscalls/linux/vsyscall.cc b/test/syscalls/linux/vsyscall.cc new file mode 100644 index 000000000..ae4377108 --- /dev/null +++ b/test/syscalls/linux/vsyscall.cc @@ -0,0 +1,46 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <time.h> + +#include "gtest/gtest.h" +#include "test/util/proc_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#if defined(__x86_64__) || defined(__i386__) +time_t vsyscall_time(time_t* t) { + constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; + return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t); +} + +TEST(VsyscallTest, VsyscallAlwaysAvailableOnGvisor) { + SKIP_IF(!IsRunningOnGvisor()); + // Vsyscall is always advertised by gvisor. + EXPECT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + // Vsyscall should always works on gvisor. + time_t t; + EXPECT_THAT(vsyscall_time(&t), SyscallSucceeds()); +} +#endif + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/wait.cc b/test/syscalls/linux/wait.cc new file mode 100644 index 000000000..944149d5e --- /dev/null +++ b/test/syscalls/linux/wait.cc @@ -0,0 +1,913 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <functional> +#include <tuple> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/time_util.h" + +using ::testing::UnorderedElementsAre; + +// These unit tests focus on the wait4(2) system call, but include a basic +// checks for the i386 waitpid(2) syscall, which is a subset of wait4(2). +// +// NOTE(b/22640830,b/27680907,b/29049891): Some functionality is not tested as +// it is not currently supported by gVisor: +// * Process groups. +// * Core dump status (WCOREDUMP). +// +// Tests for waiting on stopped/continued children are in sigstop.cc. + +namespace gvisor { +namespace testing { + +namespace { + +// The CloneChild function seems to need more than one page of stack space. +static const size_t kStackSize = 2 * kPageSize; + +// The child thread created in CloneAndExit runs this function. +// This child does not have the TLS setup, so it must not use glibc functions. +int CloneChild(void* priv) { + int64_t sleep = reinterpret_cast<int64_t>(priv); + SleepSafe(absl::Seconds(sleep)); + + // glibc's _exit(2) function wrapper will helpfully call exit_group(2), + // exiting the entire process. + syscall(__NR_exit, 0); + return 1; +} + +// ForkAndExit forks a child process which exits with exit_code, after +// sleeping for the specified duration (seconds). +pid_t ForkAndExit(int exit_code, int64_t sleep) { + pid_t child = fork(); + if (child == 0) { + SleepSafe(absl::Seconds(sleep)); + _exit(exit_code); + } + return child; +} + +int64_t clock_gettime_nsecs(clockid_t id) { + struct timespec ts; + TEST_PCHECK(clock_gettime(id, &ts) == 0); + return (ts.tv_sec * 1000000000 + ts.tv_nsec); +} + +void spin(int64_t sec) { + int64_t ns = sec * 1000000000; + int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID); + int64_t end = start + ns; + + do { + constexpr int kLoopCount = 1000000; // large and arbitrary + // volatile to prevent the compiler from skipping this loop. + for (volatile int i = 0; i < kLoopCount; i++) { + } + } while (clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID) < end); +} + +// ForkSpinAndExit forks a child process which exits with exit_code, after +// spinning for the specified duration (seconds). +pid_t ForkSpinAndExit(int exit_code, int64_t spintime) { + pid_t child = fork(); + if (child == 0) { + spin(spintime); + _exit(exit_code); + } + return child; +} + +absl::Duration RusageCpuTime(const struct rusage& ru) { + return absl::DurationFromTimeval(ru.ru_utime) + + absl::DurationFromTimeval(ru.ru_stime); +} + +// Returns the address of the top of the stack. +// Free with FreeStack. +uintptr_t AllocStack() { + void* addr = mmap(nullptr, kStackSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (addr == MAP_FAILED) { + return reinterpret_cast<uintptr_t>(MAP_FAILED); + } + + return reinterpret_cast<uintptr_t>(addr) + kStackSize; +} + +// Frees a stack page allocated with AllocStack. +int FreeStack(uintptr_t addr) { + addr -= kStackSize; + return munmap(reinterpret_cast<void*>(addr), kPageSize); +} + +// CloneAndExit clones a child thread, which exits with 0 after sleeping for +// the specified duration (must be in seconds). extra_flags are ORed against +// the standard clone(2) flags. +int CloneAndExit(int64_t sleep, uintptr_t stack, int extra_flags) { + return clone(CloneChild, reinterpret_cast<void*>(stack), + CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_VM | extra_flags, + reinterpret_cast<void*>(sleep)); +} + +// Simple wrappers around wait4(2) and waitid(2) that ignore interrupts. +constexpr auto Wait4 = RetryEINTR(wait4); +constexpr auto Waitid = RetryEINTR(waitid); + +// Fixture for tests parameterized by a function that waits for any child to +// exit with the given options, checks that it exited with the given code, and +// then returns its PID. +// +// N.B. These tests run in a multi-threaded environment. We assume that +// background threads do not create child processes and are not themselves +// created with clone(... | SIGCHLD). Either may cause these tests to +// erroneously wait on child processes/threads. +class WaitAnyChildTest : public ::testing::TestWithParam< + std::function<PosixErrorOr<pid_t>(int, int)>> { + protected: + PosixErrorOr<pid_t> WaitAny(int code) { return WaitAnyWithOptions(code, 0); } + + PosixErrorOr<pid_t> WaitAnyWithOptions(int code, int options) { + return GetParam()(code, options); + } +}; + +// Wait for any child to exit. +TEST_P(WaitAnyChildTest, Fork) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child)); +} + +// Call wait4 for any process after the child has already exited. +TEST_P(WaitAnyChildTest, AfterExit) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + absl::SleepFor(absl::Seconds(5)); + + EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child)); +} + +// Wait for multiple children to exit, waiting for either at a time. +TEST_P(WaitAnyChildTest, MultipleFork) { + pid_t child1, child2; + ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds()); + ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds()); + + std::vector<pid_t> pids; + pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0))); + pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0))); + EXPECT_THAT(pids, UnorderedElementsAre(child1, child2)); +} + +// Wait for any child to exit. +// A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like +// a forked process. +TEST_P(WaitAnyChildTest, CloneSIGCHLD) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = + Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds()); + + EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child)); +} + +// Wait for a child thread and process. +TEST_P(WaitAnyChildTest, ForkAndClone) { + pid_t process; + ASSERT_THAT(process = ForkAndExit(0, 0), SyscallSucceeds()); + + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = + Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int thread; + // Send SIGCHLD for normal wait semantics. + ASSERT_THAT(thread = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds()); + + std::vector<pid_t> pids; + pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0))); + pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0))); + EXPECT_THAT(pids, UnorderedElementsAre(process, thread)); +} + +// Return immediately if no child has exited. +TEST_P(WaitAnyChildTest, WaitWNOHANG) { + EXPECT_THAT(WaitAnyWithOptions(0, WNOHANG), + PosixErrorIs(ECHILD, ::testing::_)); +} + +// Bad options passed +TEST_P(WaitAnyChildTest, BadOption) { + EXPECT_THAT(WaitAnyWithOptions(0, 123456), + PosixErrorIs(EINVAL, ::testing::_)); +} + +TEST_P(WaitAnyChildTest, WaitedChildRusage) { + struct rusage before; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds()); + + pid_t child; + constexpr absl::Duration kSpin = absl::Seconds(3); + ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)), + SyscallSucceeds()); + ASSERT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child)); + + struct rusage after; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds()); + + EXPECT_GE(RusageCpuTime(after) - RusageCpuTime(before), kSpin); +} + +TEST_P(WaitAnyChildTest, IgnoredChildRusage) { + // "POSIX.1-2001 specifies that if the disposition of SIGCHLD is + // set to SIG_IGN or the SA_NOCLDWAIT flag is set for SIGCHLD (see + // sigaction(2)), then children that terminate do not become zombies and a + // call to wait() or waitpid() will block until all children have terminated, + // and then fail with errno set to ECHILD." - waitpid(2) + // + // "RUSAGE_CHILDREN: Return resource usage statistics for all children of the + // calling process that have terminated *and been waited for*." - + // getrusage(2), emphasis added + + struct sigaction sa; + sa.sa_handler = SIG_IGN; + const auto cleanup_sigact = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa)); + + struct rusage before; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds()); + + const absl::Duration start = + absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC)); + + constexpr absl::Duration kSpin = absl::Seconds(3); + + // ForkAndSpin uses CLOCK_THREAD_CPUTIME_ID, which is lower resolution than, + // and may diverge from, CLOCK_MONOTONIC, so we allow a small grace period but + // still check that we blocked for a while. + constexpr absl::Duration kSpinGrace = absl::Milliseconds(100); + + pid_t child; + ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)), + SyscallSucceeds()); + ASSERT_THAT(WaitAny(0), PosixErrorIs(ECHILD, ::testing::_)); + const absl::Duration end = + absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC)); + EXPECT_GE(end - start, kSpin - kSpinGrace); + + struct rusage after; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds()); + EXPECT_EQ(before.ru_utime.tv_sec, after.ru_utime.tv_sec); + EXPECT_EQ(before.ru_utime.tv_usec, after.ru_utime.tv_usec); + EXPECT_EQ(before.ru_stime.tv_sec, after.ru_stime.tv_sec); + EXPECT_EQ(before.ru_stime.tv_usec, after.ru_stime.tv_usec); +} + +INSTANTIATE_TEST_SUITE_P( + Waiters, WaitAnyChildTest, + ::testing::Values( + [](int code, int options) -> PosixErrorOr<pid_t> { + int status; + auto const pid = Wait4(-1, &status, options, nullptr); + MaybeSave(); + if (pid < 0) { + return PosixError(errno, "wait4"); + } + if (!WIFEXITED(status) || WEXITSTATUS(status) != code) { + return PosixError( + EINVAL, absl::StrCat("unexpected wait status: got ", status, + ", wanted ", code)); + } + return static_cast<pid_t>(pid); + }, + [](int code, int options) -> PosixErrorOr<pid_t> { + siginfo_t si; + auto const rv = Waitid(P_ALL, 0, &si, WEXITED | options); + MaybeSave(); + if (rv < 0) { + return PosixError(errno, "waitid"); + } + if (si.si_signo != SIGCHLD) { + return PosixError( + EINVAL, absl::StrCat("unexpected signo: got ", si.si_signo, + ", wanted ", SIGCHLD)); + } + if (si.si_status != code) { + return PosixError( + EINVAL, absl::StrCat("unexpected status: got ", si.si_status, + ", wanted ", code)); + } + if (si.si_code != CLD_EXITED) { + return PosixError(EINVAL, + absl::StrCat("unexpected code: got ", si.si_code, + ", wanted ", CLD_EXITED)); + } + auto const uid = getuid(); + if (si.si_uid != uid) { + return PosixError(EINVAL, + absl::StrCat("unexpected uid: got ", si.si_uid, + ", wanted ", uid)); + } + return static_cast<pid_t>(si.si_pid); + })); + +// Fixture for tests parameterized by a (sysno, function) tuple. The function +// takes the PID of a specific child to wait for, waits for it to exit, and +// checks that it exits with the given code. +class WaitSpecificChildTest + : public ::testing::TestWithParam< + std::tuple<int, std::function<PosixError(pid_t, int, int)>>> { + protected: + int Sysno() { return std::get<0>(GetParam()); } + + PosixError WaitForWithOptions(pid_t pid, int options, int code) { + return std::get<1>(GetParam())(pid, options, code); + } + + PosixError WaitFor(pid_t pid, int code) { + return std::get<1>(GetParam())(pid, 0, code); + } +}; + +// Wait for specific child to exit. +TEST_P(WaitSpecificChildTest, Fork) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// Non-zero exit codes are correctly propagated. +TEST_P(WaitSpecificChildTest, NormalExit) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 42)); +} + +// Wait for multiple children to exit. +TEST_P(WaitSpecificChildTest, MultipleFork) { + pid_t child1, child2; + ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds()); + ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child1, 0)); + EXPECT_NO_ERRNO(WaitFor(child2, 0)); +} + +// Wait for multiple children to exit, out of the order they were created. +TEST_P(WaitSpecificChildTest, MultipleForkOutOfOrder) { + pid_t child1, child2; + ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds()); + ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child2, 0)); + EXPECT_NO_ERRNO(WaitFor(child1, 0)); +} + +// Wait for specific child to exit, entering wait4 before the exit occurs. +TEST_P(WaitSpecificChildTest, ForkSleep) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// Wait should block until the child exits. +TEST_P(WaitSpecificChildTest, ForkBlock) { + pid_t child; + + auto start = absl::Now(); + ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); + + EXPECT_GE(absl::Now() - start, absl::Seconds(5)); +} + +// Waiting after the child has already exited returns immediately. +TEST_P(WaitSpecificChildTest, AfterExit) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + absl::SleepFor(absl::Seconds(5)); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// Wait for child of sibling thread. +TEST_P(WaitSpecificChildTest, SiblingChildren) { + absl::Mutex mu; + pid_t child; + bool ready = false; + bool stop = false; + + ScopedThread t([&] { + absl::MutexLock ml(&mu); + EXPECT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + ready = true; + mu.Await(absl::Condition(&stop)); + }); + + // N.B. This must be declared after ScopedThread, so it is destructed first, + // thus waking the thread. + absl::MutexLock ml(&mu); + mu.Await(absl::Condition(&ready)); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); + + // Keep the sibling alive until after we've waited so the child isn't + // reparented. + stop = true; +} + +// Waiting for child of sibling thread not allowed with WNOTHREAD. +TEST_P(WaitSpecificChildTest, SiblingChildrenWNOTHREAD) { + // Linux added WNOTHREAD support to waitid(2) in + // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to + // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7. + // + // Skip the test if it isn't supported yet. + if (Sysno() == SYS_waitid) { + int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WNOTHREAD); + SKIP_IF(ret < 0 && errno == EINVAL); + } + + absl::Mutex mu; + pid_t child; + bool ready = false; + bool stop = false; + + ScopedThread t([&] { + absl::MutexLock ml(&mu); + EXPECT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + ready = true; + mu.Await(absl::Condition(&stop)); + + // This thread can wait on child. + EXPECT_NO_ERRNO(WaitForWithOptions(child, __WNOTHREAD, 0)); + }); + + // N.B. This must be declared after ScopedThread, so it is destructed first, + // thus waking the thread. + absl::MutexLock ml(&mu); + mu.Await(absl::Condition(&ready)); + + // This thread can't wait on child. + EXPECT_THAT(WaitForWithOptions(child, __WNOTHREAD, 0), + PosixErrorIs(ECHILD, ::testing::_)); + + // Keep the sibling alive until after we've waited so the child isn't + // reparented. + stop = true; +} + +// Wait for specific child to exit. +// A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like +// a forked process. +TEST_P(WaitSpecificChildTest, CloneSIGCHLD) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = + Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// Wait for specific child to exit. +// A non-CLONE_THREAD child which does not send SIGCHLD upon exit can be waited +// on, but returns ECHILD. +TEST_P(WaitSpecificChildTest, CloneNoSIGCHLD) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = + Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds()); + + EXPECT_THAT(WaitFor(child, 0), PosixErrorIs(ECHILD, ::testing::_)); +} + +// Waiting after the child has already exited returns immediately. +TEST_P(WaitSpecificChildTest, CloneAfterExit) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = + Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + // Send SIGCHLD for normal wait semantics. + ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds()); + + absl::SleepFor(absl::Seconds(5)); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// A CLONE_THREAD child cannot be waited on. +TEST_P(WaitSpecificChildTest, CloneThread) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = + Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(15, stack, CLONE_THREAD), SyscallSucceeds()); + auto start = absl::Now(); + + EXPECT_THAT(WaitFor(child, 0), PosixErrorIs(ECHILD, ::testing::_)); + + // Ensure wait4 didn't block. + EXPECT_LE(absl::Now() - start, absl::Seconds(10)); + + // Since we can't wait on the child, we sleep to try to avoid freeing its + // stack before it exits. + absl::SleepFor(absl::Seconds(5)); +} + +// A child that does not send a SIGCHLD on exit may be waited on with +// the __WCLONE flag. +TEST_P(WaitSpecificChildTest, CloneWCLONE) { + // Linux added WCLONE support to waitid(2) in + // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to + // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7. + // + // Skip the test if it isn't supported yet. + if (Sysno() == SYS_waitid) { + int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WCLONE); + SKIP_IF(ret < 0 && errno == EINVAL); + } + + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = + Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitForWithOptions(child, __WCLONE, 0)); +} + +// A forked child cannot be waited on with WCLONE. +TEST_P(WaitSpecificChildTest, ForkWCLONE) { + // Linux added WCLONE support to waitid(2) in + // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to + // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7. + // + // Skip the test if it isn't supported yet. + if (Sysno() == SYS_waitid) { + int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WCLONE); + SKIP_IF(ret < 0 && errno == EINVAL); + } + + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_THAT(WaitForWithOptions(child, WNOHANG | __WCLONE, 0), + PosixErrorIs(ECHILD, ::testing::_)); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// Any type of child can be waited on with WALL. +TEST_P(WaitSpecificChildTest, WALL) { + // Linux added WALL support to waitid(2) in + // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to + // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7. + // + // Skip the test if it isn't supported yet. + if (Sysno() == SYS_waitid) { + int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WALL); + SKIP_IF(ret < 0 && errno == EINVAL); + } + + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitForWithOptions(child, __WALL, 0)); + + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = + Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitForWithOptions(child, __WALL, 0)); +} + +// Return ECHILD for bad child. +TEST_P(WaitSpecificChildTest, BadChild) { + EXPECT_THAT(WaitFor(42, 0), PosixErrorIs(ECHILD, ::testing::_)); +} + +// Wait for a child process that only exits after calling execve(2) from a +// non-leader thread. +TEST_P(WaitSpecificChildTest, AfterChildExecve) { + ExecveArray const owned_child_argv = {"/bin/true"}; + char* const* const child_argv = owned_child_argv.get(); + + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = + Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + pid_t const child = fork(); + if (child == 0) { + // Give the parent some time to start waiting. + SleepSafe(absl::Seconds(5)); + // Pass CLONE_VFORK to block the original thread in the child process until + // the clone thread calls execve, annihilating them both. (This means that + // if clone returns at all, something went wrong.) + // + // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's + // x86_64 implementation is safe. See glibc + // sysdeps/unix/sysv/linux/x86_64/clone.S. + clone( + +[](void* arg) { + auto child_argv = static_cast<char* const*>(arg); + execve(child_argv[0], child_argv, /* envp = */ nullptr); + return errno; + }, + reinterpret_cast<void*>(stack), + CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM | + CLONE_VFORK, + const_cast<char**>(child_argv)); + _exit(errno); + } + ASSERT_THAT(child, SyscallSucceeds()); + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +PosixError CheckWait4(pid_t pid, int options, int code) { + int status; + auto const rv = Wait4(pid, &status, options, nullptr); + MaybeSave(); + if (rv < 0) { + return PosixError(errno, "wait4"); + } else if (rv != pid) { + return PosixError( + EINVAL, absl::StrCat("unexpected pid: got ", rv, ", wanted ", pid)); + } + if (!WIFEXITED(status) || WEXITSTATUS(status) != code) { + return PosixError(EINVAL, absl::StrCat("unexpected wait status: got ", + status, ", wanted ", code)); + } + return NoError(); +}; + +PosixError CheckWaitid(pid_t pid, int options, int code) { + siginfo_t si; + auto const rv = Waitid(P_PID, pid, &si, options | WEXITED); + MaybeSave(); + if (rv < 0) { + return PosixError(errno, "waitid"); + } + if (si.si_pid != pid) { + return PosixError(EINVAL, absl::StrCat("unexpected pid: got ", si.si_pid, + ", wanted ", pid)); + } + if (si.si_signo != SIGCHLD) { + return PosixError(EINVAL, absl::StrCat("unexpected signo: got ", + si.si_signo, ", wanted ", SIGCHLD)); + } + if (si.si_status != code) { + return PosixError(EINVAL, absl::StrCat("unexpected status: got ", + si.si_status, ", wanted ", code)); + } + if (si.si_code != CLD_EXITED) { + return PosixError(EINVAL, absl::StrCat("unexpected code: got ", si.si_code, + ", wanted ", CLD_EXITED)); + } + return NoError(); +} + +INSTANTIATE_TEST_SUITE_P( + Waiters, WaitSpecificChildTest, + ::testing::Values(std::make_tuple(SYS_wait4, CheckWait4), + std::make_tuple(SYS_waitid, CheckWaitid))); + +// WIFEXITED, WIFSIGNALED, WTERMSIG indicate signal exit. +TEST(WaitTest, SignalExit) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 10), SyscallSucceeds()); + + EXPECT_THAT(kill(child, SIGKILL), SyscallSucceeds()); + + int status; + EXPECT_THAT(Wait4(child, &status, 0, nullptr), + SyscallSucceedsWithValue(child)); + + EXPECT_FALSE(WIFEXITED(status)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(SIGKILL, WTERMSIG(status)); +} + +// waitid requires at least one option. +TEST(WaitTest, WaitidOptions) { + EXPECT_THAT(Waitid(P_ALL, 0, nullptr, 0), SyscallFailsWithErrno(EINVAL)); +} + +// waitid does not wait for a child to exit if not passed WEXITED. +TEST(WaitTest, WaitidNoWEXITED) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WSTOPPED), + SyscallFailsWithErrno(ECHILD)); + EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WEXITED), SyscallSucceeds()); +} + +// WNOWAIT allows the same wait result to be returned again. +TEST(WaitTest, WaitidWNOWAIT) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds()); + + siginfo_t info; + ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED | WNOWAIT), + SyscallSucceeds()); + EXPECT_EQ(child, info.si_pid); + EXPECT_EQ(SIGCHLD, info.si_signo); + EXPECT_EQ(CLD_EXITED, info.si_code); + EXPECT_EQ(42, info.si_status); + + ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED), SyscallSucceeds()); + EXPECT_EQ(child, info.si_pid); + EXPECT_EQ(SIGCHLD, info.si_signo); + EXPECT_EQ(CLD_EXITED, info.si_code); + EXPECT_EQ(42, info.si_status); + + EXPECT_THAT(Waitid(P_PID, child, &info, WEXITED), + SyscallFailsWithErrno(ECHILD)); +} + +// waitpid(pid, status, options) is equivalent to +// wait4(pid, status, options, nullptr). +// This is a dedicated syscall on i386, glibc maps it to wait4 on amd64. +TEST(WaitTest, WaitPid) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(42, WEXITSTATUS(status)); +} + +// Test that signaling a zombie succeeds. This is a signals test that is in this +// file for some reason. +TEST(WaitTest, KillZombie) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds()); + + // Sleep for three seconds to ensure the child has exited. + absl::SleepFor(absl::Seconds(3)); + + // The child is now a zombie. Check that killing it returns 0. + EXPECT_THAT(kill(child, SIGTERM), SyscallSucceeds()); + EXPECT_THAT(kill(child, 0), SyscallSucceeds()); + + EXPECT_THAT(Wait4(child, nullptr, 0, nullptr), + SyscallSucceedsWithValue(child)); +} + +TEST(WaitTest, Wait4Rusage) { + pid_t child; + constexpr absl::Duration kSpin = absl::Seconds(3); + ASSERT_THAT(child = ForkSpinAndExit(21, absl::ToInt64Seconds(kSpin)), + SyscallSucceeds()); + + int status; + struct rusage rusage = {}; + ASSERT_THAT(Wait4(child, &status, 0, &rusage), + SyscallSucceedsWithValue(child)); + + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(21, WEXITSTATUS(status)); + + EXPECT_GE(RusageCpuTime(rusage), kSpin); +} + +TEST(WaitTest, WaitidRusage) { + pid_t child; + constexpr absl::Duration kSpin = absl::Seconds(3); + ASSERT_THAT(child = ForkSpinAndExit(27, absl::ToInt64Seconds(kSpin)), + SyscallSucceeds()); + + siginfo_t si = {}; + struct rusage rusage = {}; + + // From waitid(2): + // The raw waitid() system call takes a fifth argument, of type + // struct rusage *. If this argument is non-NULL, then it is used + // to return resource usage information about the child, in the + // same manner as wait4(2). + EXPECT_THAT( + RetryEINTR(syscall)(SYS_waitid, P_PID, child, &si, WEXITED, &rusage), + SyscallSucceeds()); + EXPECT_EQ(si.si_signo, SIGCHLD); + EXPECT_EQ(si.si_code, CLD_EXITED); + EXPECT_EQ(si.si_status, 27); + EXPECT_EQ(si.si_pid, child); + + EXPECT_GE(RusageCpuTime(rusage), kSpin); +} + +// After bf959931ddb88c4e4366e96dd22e68fa0db9527c ("wait/ptrace: assume __WALL +// if the child is traced") (Linux 4.7), tracees are always eligible for +// waiting, regardless of type. +TEST(WaitTest, TraceeWALL) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + FileDescriptor rfd(fds[0]); + FileDescriptor wfd(fds[1]); + + pid_t child = fork(); + if (child == 0) { + // Child. + rfd.reset(); + + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == 0); + + // Notify parent that we're now a tracee. + wfd.reset(); + + _exit(0); + } + ASSERT_THAT(child, SyscallSucceeds()); + + wfd.reset(); + + // Wait for child to become tracee. + char c; + EXPECT_THAT(ReadFd(rfd.get(), &c, sizeof(c)), SyscallSucceedsWithValue(0)); + + // We can wait on the fork child with WCLONE, as it is a tracee. + int status; + if (IsRunningOnGvisor()) { + ASSERT_THAT(Wait4(child, &status, __WCLONE, nullptr), + SyscallSucceedsWithValue(child)); + + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status; + } else { + // On older versions of Linux, we may get ECHILD. + ASSERT_THAT(Wait4(child, &status, __WCLONE, nullptr), + ::testing::AnyOf(SyscallSucceedsWithValue(child), + SyscallFailsWithErrno(ECHILD))); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/write.cc b/test/syscalls/linux/write.cc new file mode 100644 index 000000000..39b5b2f56 --- /dev/null +++ b/test/syscalls/linux/write.cc @@ -0,0 +1,139 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/cleanup.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// TODO(gvisor.dev/issue/2370): This test is currently very rudimentary. +class WriteTest : public ::testing::Test { + public: + ssize_t WriteBytes(int fd, int bytes) { + std::vector<char> buf(bytes); + std::fill(buf.begin(), buf.end(), 'a'); + return WriteFd(fd, buf.data(), buf.size()); + } +}; + +TEST_F(WriteTest, WriteNoExceedsRLimit) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + int fd; + struct rlimit setlim; + const int target_lim = 1024; + setlim.rlim_cur = target_lim; + setlim.rlim_max = RLIM_INFINITY; + const std::string pathname = NewTempAbsPath(); + ASSERT_THAT(fd = open(pathname.c_str(), O_WRONLY | O_CREAT, S_IRWXU), + SyscallSucceeds()); + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + + EXPECT_THAT(WriteBytes(fd, target_lim), SyscallSucceedsWithValue(target_lim)); + + std::vector<char> buf(target_lim + 1); + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(pwrite(fd, buf.data(), target_lim, 1), SyscallSucceeds()); + EXPECT_THAT(pwrite64(fd, buf.data(), target_lim, 1), SyscallSucceeds()); + + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(WriteTest, WriteExceedsRLimit) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + int fd; + sigset_t filesize_mask; + sigemptyset(&filesize_mask); + sigaddset(&filesize_mask, SIGXFSZ); + + struct rlimit setlim; + const int target_lim = 1024; + setlim.rlim_cur = target_lim; + setlim.rlim_max = RLIM_INFINITY; + + const std::string pathname = NewTempAbsPath(); + ASSERT_THAT(fd = open(pathname.c_str(), O_WRONLY | O_CREAT, S_IRWXU), + SyscallSucceeds()); + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + ASSERT_THAT(sigprocmask(SIG_BLOCK, &filesize_mask, nullptr), + SyscallSucceeds()); + std::vector<char> buf(target_lim + 2); + std::fill(buf.begin(), buf.end(), 'a'); + + EXPECT_THAT(write(fd, buf.data(), target_lim + 1), + SyscallSucceedsWithValue(target_lim)); + EXPECT_THAT(write(fd, buf.data(), 1), SyscallFailsWithErrno(EFBIG)); + siginfo_t info; + struct timespec timelimit = {0, 0}; + ASSERT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, &info, &timelimit), + SyscallSucceedsWithValue(SIGXFSZ)); + EXPECT_EQ(info.si_code, SI_USER); + EXPECT_EQ(info.si_pid, getpid()); + EXPECT_EQ(info.si_uid, getuid()); + + EXPECT_THAT(pwrite(fd, buf.data(), target_lim + 1, 1), + SyscallSucceedsWithValue(target_lim - 1)); + EXPECT_THAT(pwrite(fd, buf.data(), 1, target_lim), + SyscallFailsWithErrno(EFBIG)); + ASSERT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, &info, &timelimit), + SyscallSucceedsWithValue(SIGXFSZ)); + EXPECT_EQ(info.si_code, SI_USER); + EXPECT_EQ(info.si_pid, getpid()); + EXPECT_EQ(info.si_uid, getuid()); + + EXPECT_THAT(pwrite64(fd, buf.data(), target_lim + 1, 1), + SyscallSucceedsWithValue(target_lim - 1)); + EXPECT_THAT(pwrite64(fd, buf.data(), 1, target_lim), + SyscallFailsWithErrno(EFBIG)); + ASSERT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, &info, &timelimit), + SyscallSucceedsWithValue(SIGXFSZ)); + EXPECT_EQ(info.si_code, SI_USER); + EXPECT_EQ(info.si_pid, getpid()); + EXPECT_EQ(info.si_uid, getuid()); + + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &filesize_mask, nullptr), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc new file mode 100644 index 000000000..cbcf08451 --- /dev/null +++ b/test/syscalls/linux/xattr.cc @@ -0,0 +1,610 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <unistd.h> + +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class XattrTest : public FileTest {}; + +TEST_F(XattrTest, XattrNonexistentFile) { + const char* path = "/does/not/exist"; + const char* name = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(listxattr(path, nullptr, 0), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(ENOENT)); +} + +TEST_F(XattrTest, XattrNullName) { + const char* path = test_file_name_.c_str(); + + EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT(getxattr(path, nullptr, nullptr, 0), + SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT(removexattr(path, nullptr), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(XattrTest, XattrEmptyName) { + const char* path = test_file_name_.c_str(); + + EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(getxattr(path, "", nullptr, 0), SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(removexattr(path, ""), SyscallFailsWithErrno(ERANGE)); +} + +TEST_F(XattrTest, XattrLargeName) { + const char* path = test_file_name_.c_str(); + std::string name = "user."; + name += std::string(XATTR_NAME_MAX - name.length(), 'a'); + + if (!IsRunningOnGvisor()) { + // In gVisor, access to xattrs is controlled with an explicit list of + // allowed names. This name isn't going to be configured to allow access, so + // don't test it. + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), + SyscallSucceedsWithValue(0)); + } + + name += "a"; + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), + SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(removexattr(path, name.c_str()), SyscallFailsWithErrno(ERANGE)); +} + +TEST_F(XattrTest, XattrInvalidPrefix) { + const char* path = test_file_name_.c_str(); + std::string name(XATTR_NAME_MAX, 'a'); + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(EOPNOTSUPP)); + EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), + SyscallFailsWithErrno(EOPNOTSUPP)); + EXPECT_THAT(removexattr(path, name.c_str()), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +// Do not allow save/restore cycles after making the test file read-only, as +// the restore will fail to open it with r/w permissions. +TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + char val = 'a'; + size_t size = sizeof(val); + + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + DisableSave ds; + ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IRUSR)); + + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), + SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EACCES)); + + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); + + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); +} + +// Do not allow save/restore cycles after making the test file write-only, as +// the restore will fail to open it with r/w permissions. +TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + DisableSave ds; + ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IWUSR)); + + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + char val = 'a'; + size_t size = sizeof(val); + + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(EACCES)); + + // listxattr will succeed even without read permissions. + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(removexattr(path, name), SyscallSucceeds()); +} + +TEST_F(XattrTest, XattrTrustedWithNonadmin) { + // TODO(b/148380782): Support setxattr and getxattr with "trusted" prefix. + SKIP_IF(IsRunningOnGvisor()); + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + const char* path = test_file_name_.c_str(); + const char name[] = "trusted.abc"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, XattrOnDirectory) { + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(dir.path().c_str(), name, nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(getxattr(dir.path().c_str(), name, nullptr, 0), + SyscallSucceedsWithValue(0)); + + char list[sizeof(name)]; + EXPECT_THAT(listxattr(dir.path().c_str(), list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(removexattr(dir.path().c_str(), name), SyscallSucceeds()); +} + +TEST_F(XattrTest, XattrOnSymlink) { + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(link.path().c_str(), name, nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(getxattr(link.path().c_str(), name, nullptr, 0), + SyscallSucceedsWithValue(0)); + + char list[sizeof(name)]; + EXPECT_THAT(listxattr(link.path().c_str(), list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(removexattr(link.path().c_str(), name), SyscallSucceeds()); +} + +TEST_F(XattrTest, XattrOnInvalidFileTypes) { + const char name[] = "user.test"; + + char char_device[] = "/dev/zero"; + EXPECT_THAT(setxattr(char_device, name, nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(getxattr(char_device, name, nullptr, 0), + SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(listxattr(char_device, nullptr, 0), SyscallSucceedsWithValue(0)); + + // Use tmpfs, where creation of named pipes is supported. + const std::string fifo = NewTempAbsPathInDir("/dev/shm"); + const char* path = fifo.c_str(); + EXPECT_THAT(mknod(path, S_IFIFO | S_IRUSR | S_IWUSR, 0), SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(listxattr(path, nullptr, 0), SyscallSucceedsWithValue(0)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM)); +} + +TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + std::vector<char> val = {'a', 'a'}; + size_t size = 1; + EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), + SyscallSucceeds()); + + std::vector<char> buf = {'-', '-'}; + std::vector<char> expected_buf = {'a', '-'}; + EXPECT_THAT(getxattr(path, name, buf.data(), buf.size()), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, SetxattrZeroSize) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + char val = 'a'; + EXPECT_THAT(setxattr(path, name, &val, 0, /*flags=*/0), SyscallSucceeds()); + + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, XATTR_SIZE_MAX), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(buf, '-'); +} + +TEST_F(XattrTest, SetxattrSizeTooLarge) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + + // Note that each particular fs implementation may stipulate a lower size + // limit, in which case we actually may fail (e.g. error with ENOSPC) for + // some sizes under XATTR_SIZE_MAX. + size_t size = XATTR_SIZE_MAX + 1; + std::vector<char> val(size); + EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), + SyscallFailsWithErrno(E2BIG)); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0), + SyscallFailsWithErrno(EFAULT)); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + std::vector<char> val(XATTR_SIZE_MAX + 1); + std::fill(val.begin(), val.end(), 'a'); + size_t size = 1; + EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), + SyscallSucceeds()); + + std::vector<char> buf = {'-', '-'}; + std::vector<char> expected_buf = {'a', '-'}; + EXPECT_THAT(getxattr(path, name, buf.data(), size), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, SetxattrReplaceWithSmaller) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + std::vector<char> val = {'a', 'a'}; + EXPECT_THAT(setxattr(path, name, val.data(), 2, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, val.data(), 1, /*flags=*/0), + SyscallSucceeds()); + + std::vector<char> buf = {'-', '-'}; + std::vector<char> expected_buf = {'a', '-'}; + EXPECT_THAT(getxattr(path, name, buf.data(), 2), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, SetxattrReplaceWithLarger) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + std::vector<char> val = {'a', 'a'}; + EXPECT_THAT(setxattr(path, name, val.data(), 1, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, val.data(), 2, /*flags=*/0), + SyscallSucceeds()); + + std::vector<char> buf = {'-', '-'}; + EXPECT_THAT(getxattr(path, name, buf.data(), 2), SyscallSucceedsWithValue(2)); + EXPECT_EQ(buf, val); +} + +TEST_F(XattrTest, SetxattrCreateFlag) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), + SyscallFailsWithErrno(EEXIST)); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, SetxattrReplaceFlag) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE), + SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE), + SyscallSucceeds()); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, SetxattrInvalidFlags) { + const char* path = test_file_name_.c_str(); + int invalid_flags = 0xff; + EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(XattrTest, Getxattr) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + int val = 1234; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + int buf = 0; + EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); +} + +TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + std::vector<char> val = {'a', 'a'}; + size_t size = val.size(); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, 1), SyscallFailsWithErrno(ERANGE)); + EXPECT_EQ(buf, '-'); +} + +TEST_F(XattrTest, GetxattrSizeLargerThanValue) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + char val = 'a'; + EXPECT_THAT(setxattr(path, name, &val, 1, /*flags=*/0), SyscallSucceeds()); + + std::vector<char> buf(XATTR_SIZE_MAX); + std::fill(buf.begin(), buf.end(), '-'); + std::vector<char> expected_buf = buf; + expected_buf[0] = 'a'; + EXPECT_THAT(getxattr(path, name, buf.data(), buf.size()), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, GetxattrZeroSize) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + char val = 'a'; + EXPECT_THAT(setxattr(path, name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, 0), + SyscallSucceedsWithValue(sizeof(val))); + EXPECT_EQ(buf, '-'); +} + +TEST_F(XattrTest, GetxattrSizeTooLarge) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + char val = 'a'; + EXPECT_THAT(setxattr(path, name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + + std::vector<char> buf(XATTR_SIZE_MAX + 1); + std::fill(buf.begin(), buf.end(), '-'); + std::vector<char> expected_buf = buf; + expected_buf[0] = 'a'; + EXPECT_THAT(getxattr(path, name, buf.data(), buf.size()), + SyscallSucceedsWithValue(sizeof(val))); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, GetxattrNullValue) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + char val = 'a'; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + EXPECT_THAT(getxattr(path, name, nullptr, size), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + char val = 'a'; + size_t size = sizeof(val); + // Set value with zero size. + EXPECT_THAT(setxattr(path, name, &val, 0, /*flags=*/0), SyscallSucceeds()); + // Get value with nonzero size. + EXPECT_THAT(getxattr(path, name, nullptr, size), SyscallSucceedsWithValue(0)); + + // Set value with nonzero size. + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + // Get value with zero size. + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(size)); +} + +TEST_F(XattrTest, GetxattrNonexistentName) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, Listxattr) { + const char* path = test_file_name_.c_str(); + const std::string name = "user.test"; + const std::string name2 = "user.test2"; + const std::string name3 = "user.test3"; + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name2.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name3.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + + std::vector<char> list(name.size() + 1 + name2.size() + 1 + name3.size() + 1); + char* buf = list.data(); + EXPECT_THAT(listxattr(path, buf, XATTR_SIZE_MAX), + SyscallSucceedsWithValue(list.size())); + + absl::flat_hash_set<std::string> got = {}; + for (char* p = buf; p < buf + list.size(); p += strlen(p) + 1) { + got.insert(std::string{p}); + } + + absl::flat_hash_set<std::string> expected = {name, name2, name3}; + EXPECT_EQ(got, expected); +} + +TEST_F(XattrTest, ListxattrNoXattrs) { + const char* path = test_file_name_.c_str(); + + std::vector<char> list, expected; + EXPECT_THAT(listxattr(path, list.data(), sizeof(list)), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(list, expected); + + // Listxattr should succeed if there are no attributes, even if the buffer + // passed in is a nullptr. + EXPECT_THAT(listxattr(path, nullptr, sizeof(list)), + SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, ListxattrNullBuffer) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + + EXPECT_THAT(listxattr(path, nullptr, sizeof(name)), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(XattrTest, ListxattrSizeTooSmall) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + + char list[sizeof(name) - 1]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallFailsWithErrno(ERANGE)); +} + +TEST_F(XattrTest, ListxattrZeroSize) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + EXPECT_THAT(listxattr(path, nullptr, 0), + SyscallSucceedsWithValue(sizeof(name))); +} + +TEST_F(XattrTest, RemoveXattr) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + EXPECT_THAT(removexattr(path, name), SyscallSucceeds()); + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, RemoveXattrNonexistentName) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, LXattrOnSymlink) { + const char name[] = "user.test"; + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); + + EXPECT_THAT(lsetxattr(link.path().c_str(), name, nullptr, 0, 0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(lgetxattr(link.path().c_str(), name, nullptr, 0), + SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(llistxattr(link.path().c_str(), nullptr, 0), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(lremovexattr(link.path().c_str(), name), + SyscallFailsWithErrno(EPERM)); +} + +TEST_F(XattrTest, LXattrOnNonsymlink) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + int val = 1234; + size_t size = sizeof(val); + EXPECT_THAT(lsetxattr(path, name, &val, size, /*flags=*/0), + SyscallSucceeds()); + + int buf = 0; + EXPECT_THAT(lgetxattr(path, name, &buf, size), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); + + char list[sizeof(name)]; + EXPECT_THAT(llistxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(lremovexattr(path, name), SyscallSucceeds()); +} + +TEST_F(XattrTest, XattrWithFD) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_.c_str(), 0)); + const char name[] = "user.test"; + int val = 1234; + size_t size = sizeof(val); + EXPECT_THAT(fsetxattr(fd.get(), name, &val, size, /*flags=*/0), + SyscallSucceeds()); + + int buf = 0; + EXPECT_THAT(fgetxattr(fd.get(), name, &buf, size), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); + + char list[sizeof(name)]; + EXPECT_THAT(flistxattr(fd.get(), list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor |