diff options
author | Brian Geffon <bgeffon@google.com> | 2018-12-10 14:41:40 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-12-10 14:42:34 -0800 |
commit | d3bc79bc8438206ac6a14fde4eaa288fc07eee82 (patch) | |
tree | e820398591bfd1503456e877fa0c2bdd0f994959 /test/syscalls/linux | |
parent | 833edbd10b49db1f934dcb2495dcb41c1310eea4 (diff) |
Open source system call tests.
PiperOrigin-RevId: 224886231
Change-Id: I0fccb4d994601739d8b16b1d4e6b31f40297fb22
Diffstat (limited to 'test/syscalls/linux')
209 files changed, 48969 insertions, 0 deletions
diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc new file mode 100644 index 000000000..b8d5f0355 --- /dev/null +++ b/test/syscalls/linux/32bit.cc @@ -0,0 +1,226 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <string.h> +#include <sys/mman.h> + +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "gtest/gtest.h" + +#ifndef __x86_64__ +#error "This test is x86-64 specific." +#endif + +namespace gvisor { +namespace testing { + +namespace { + +constexpr char kInt3 = '\xcc'; + +constexpr char kInt80[2] = {'\xcd', '\x80'}; +constexpr char kSyscall[2] = {'\x0f', '\x05'}; +constexpr char kSysenter[2] = {'\x0f', '\x34'}; + +void ExitGroup32(const char instruction[2], int code) { + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0)); + + // Fill with INT 3 in case we execute too far. + memset(m.ptr(), kInt3, m.len()); + + memcpy(m.ptr(), instruction, 2); + + // We're playing *extremely* fast-and-loose with the various syscall ABIs + // here, which we can more-or-less get away with since exit_group doesn't + // return. + // + // SYSENTER expects the user stack in (%ebp) and arg6 in 0(%ebp). The kernel + // will unconditionally dereference %ebp for arg6, so we must pass a valid + // address or it will return EFAULT. + // + // SYSENTER also unconditionally returns to thread_info->sysenter_return which + // is ostensibly a stub in the 32-bit VDSO. But a 64-bit binary doesn't have + // the 32-bit VDSO mapped, so sysenter_return will simply be the value + // inherited from the most recent 32-bit ancestor, or NULL if there is none. + // As a result, return would not return from SYSENTER. + asm volatile( + "movl $252, %%eax\n" // exit_group + "movl %[code], %%ebx\n" // code + "movl %%edx, %%ebp\n" // SYSENTER: user stack (use IP as a valid addr) + "leaq -20(%%rsp), %%rsp\n" + "movl $0x2b, 16(%%rsp)\n" // SS = CPL3 data segment + "movl $0,12(%%rsp)\n" // ESP = nullptr (unused) + "movl $0, 8(%%rsp)\n" // EFLAGS + "movl $0x23, 4(%%rsp)\n" // CS = CPL3 32-bit code segment + "movl %%edx, 0(%%rsp)\n" // EIP + "iretl\n" + "int $3\n" + : + : [code] "m"(code), [ip] "d"(m.ptr()) + : "rax", "rbx", "rsp"); +} + +constexpr int kExitCode = 42; + +TEST(Syscall32Bit, Int80) { + switch (GvisorPlatform()) { + case Platform::kKVM: + // TODO: 32-bit segments are broken (but not explictly + // disabled). + return; + case Platform::kPtrace: + // TODO: The ptrace platform does not have a consistent story + // here. + return; + case Platform::kNative: + break; + } + + // Upstream Linux. 32-bit syscalls allowed. + EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), ::testing::ExitedWithCode(42), + ""); +} + +TEST(Syscall32Bit, Sysenter) { + switch (GvisorPlatform()) { + case Platform::kKVM: + // TODO: See above. + return; + case Platform::kPtrace: + // TODO: See above. + return; + case Platform::kNative: + break; + } + + if (GetCPUVendor() == CPUVendor::kAMD) { + // SYSENTER is an illegal instruction in compatibility mode on AMD. + EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), + ::testing::KilledBySignal(SIGILL), ""); + return; + } + + // Upstream Linux on !AMD, 32-bit syscalls allowed. + EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), ::testing::ExitedWithCode(42), + ""); +} + +TEST(Syscall32Bit, Syscall) { + switch (GvisorPlatform()) { + case Platform::kKVM: + // TODO: See above. + return; + case Platform::kPtrace: + // TODO: See above. + return; + case Platform::kNative: + break; + } + + if (GetCPUVendor() == CPUVendor::kIntel) { + // SYSCALL is an illegal instruction in compatibility mode on Intel. + EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), + ::testing::KilledBySignal(SIGILL), ""); + return; + } + + // Upstream Linux on !Intel, 32-bit syscalls allowed. + EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), ::testing::ExitedWithCode(42), + ""); +} + +// Far call code called below. +// +// Input stack layout: +// +// %esp+12 lcall segment +// %esp+8 lcall address offset +// %esp+0 return address +// +// The lcall will enter compatibility mode and jump to the call address (the +// address of the lret). The lret will return to 64-bit mode at the retq, which +// will return to the external caller of this function. +// +// Since this enters compatibility mode, it must be mapped in a 32-bit region of +// address space and have a 32-bit stack pointer. +constexpr char kFarCall[] = { + '\x67', '\xff', '\x5c', '\x24', '\x08', // lcall *8(%esp) + '\xc3', // retq + '\xcb', // lret +}; + +void FarCall32() { + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0)); + + // Fill with INT 3 in case we execute too far. + memset(m.ptr(), kInt3, m.len()); + + // 32-bit code. + memcpy(m.ptr(), kFarCall, sizeof(kFarCall)); + + // Use the end of the code page as its stack. + uintptr_t stack = m.endaddr(); + + uintptr_t lcall = m.addr(); + uintptr_t lret = m.addr() + sizeof(kFarCall) - 1; + + // N.B. We must save and restore RSP manually. GCC can do so automatically + // with an "rsp" clobber, but clang cannot. + asm volatile( + // Place the address of lret (%edx) and the 32-bit code segment (0x23) on + // the 32-bit stack for lcall. + "subl $0x8, %%ecx\n" + "movl $0x23, 4(%%ecx)\n" + "movl %%edx, 0(%%ecx)\n" + + // Save the current stack and switch to 32-bit stack. + "pushq %%rbp\n" + "movq %%rsp, %%rbp\n" + "movq %%rcx, %%rsp\n" + + // Run the lcall code. + "callq *%%rbx\n" + + // Restore the old stack. + "leaveq\n" + : "+c"(stack) + : "b"(lcall), "d"(lret)); +} + +TEST(Call32Bit, Disallowed) { + switch (GvisorPlatform()) { + case Platform::kKVM: + // TODO: See above. + return; + case Platform::kPtrace: + // The ptrace platform cannot prevent switching to compatibility mode. + ABSL_FALLTHROUGH_INTENDED; + case Platform::kNative: + break; + } + + // Shouldn't crash. + FarCall32(); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD new file mode 100644 index 000000000..1c48a2a43 --- /dev/null +++ b/test/syscalls/linux/BUILD @@ -0,0 +1,2951 @@ +package( + default_visibility = ["//:sandbox"], + licenses = ["notice"], # Apache 2.0 +) + +cc_binary( + name = "sigaltstack_check", + testonly = 1, + srcs = ["sigaltstack_check.cc"], + deps = ["//test/util:logging"], +) + +cc_binary( + name = "exec_assert_closed_workload", + testonly = 1, + srcs = ["exec_assert_closed_workload.cc"], + deps = [ + "@com_google_absl//absl/strings", + ], +) + +cc_binary( + name = "exec_basic_workload", + testonly = 1, + srcs = [ + "exec.h", + "exec_basic_workload.cc", + ], +) + +cc_binary( + name = "exec_proc_exe_workload", + testonly = 1, + srcs = ["exec_proc_exe_workload.cc"], + deps = [ + "//test/util:fs_util", + "//test/util:posix_error", + ], +) + +cc_binary( + name = "exec_state_workload", + testonly = 1, + srcs = ["exec_state_workload.cc"], +) + +sh_binary( + name = "exit_script", + testonly = 1, + srcs = [ + "exit_script.sh", + ], +) + +cc_binary( + name = "priority_execve", + testonly = 1, + srcs = [ + "priority_execve.cc", + ], +) + +cc_library( + name = "base_poll_test", + testonly = 1, + srcs = ["base_poll_test.cc"], + hdrs = ["base_poll_test.h"], + deps = [ + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "file_base", + testonly = 1, + hdrs = ["file_base.h"], + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "socket_netlink_util", + testonly = 1, + srcs = ["socket_netlink_util.cc"], + hdrs = ["socket_netlink_util.h"], + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:posix_error", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "socket_test_util", + testonly = 1, + srcs = ["socket_test_util.cc"], + hdrs = ["socket_test_util.h"], + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "temp_umask", + hdrs = ["temp_umask.h"], +) + +cc_library( + name = "unix_domain_socket_test_util", + testonly = 1, + srcs = ["unix_domain_socket_test_util.cc"], + hdrs = ["unix_domain_socket_test_util.h"], + deps = [ + ":socket_test_util", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "ip_socket_test_util", + testonly = 1, + srcs = ["ip_socket_test_util.cc"], + hdrs = ["ip_socket_test_util.h"], + deps = [ + ":socket_test_util", + ], +) + +cc_binary( + name = "clock_nanosleep_test", + testonly = 1, + srcs = ["clock_nanosleep.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "32bit_test", + testonly = 1, + srcs = ["32bit.cc"], + linkstatic = 1, + deps = [ + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "accept_bind_test", + testonly = 1, + srcs = ["accept_bind.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "accept_bind_stream_test", + testonly = 1, + srcs = ["accept_bind_stream.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "access_test", + testonly = 1, + srcs = ["access.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "affinity_test", + testonly = 1, + srcs = ["affinity.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "aio_test", + testonly = 1, + srcs = [ + "aio.cc", + "file_base.h", + ], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "alarm_test", + testonly = 1, + srcs = ["alarm.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "bad_test", + testonly = 1, + srcs = ["bad.cc"], + linkstatic = 1, + visibility = [ + "//:sandbox", + ], + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "bind_test", + testonly = 1, + srcs = ["bind.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "brk_test", + testonly = 1, + srcs = ["brk.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "chdir_test", + testonly = 1, + srcs = ["chdir.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "chmod_test", + testonly = 1, + srcs = ["chmod.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "chown_test", + testonly = 1, + srcs = ["chown.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sticky_test", + testonly = 1, + srcs = ["sticky.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "chroot_test", + testonly = 1, + srcs = ["chroot.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:mount_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "clock_getres_test", + testonly = 1, + srcs = ["clock_getres.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "clock_gettime_test", + testonly = 1, + srcs = ["clock_gettime.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "concurrency_test", + testonly = 1, + srcs = ["concurrency.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "creat_test", + testonly = 1, + srcs = ["creat.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "dev_test", + testonly = 1, + srcs = ["dev.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "dup_test", + testonly = 1, + srcs = ["dup.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "epoll_test", + testonly = 1, + srcs = ["epoll.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "eventfd_test", + testonly = 1, + srcs = ["eventfd.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "exceptions_test", + testonly = 1, + srcs = ["exceptions.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "getcpu_test", + testonly = 1, + srcs = ["getcpu.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "getcpu_host_test", + testonly = 1, + srcs = ["getcpu.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "getrusage_test", + testonly = 1, + srcs = ["getrusage.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:memory_util", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "exec_binary_test", + testonly = 1, + srcs = ["exec_binary.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:proc_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "exec_test", + testonly = 1, + srcs = [ + "exec.cc", + "exec.h", + ], + data = [ + ":exec_assert_closed_workload", + ":exec_basic_workload", + ":exec_proc_exe_workload", + ":exec_state_workload", + ":exit_script", + ":priority_execve", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "exit_test", + testonly = 1, + srcs = ["exit.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "fallocate_test", + testonly = 1, + srcs = ["fallocate.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "fault_test", + testonly = 1, + srcs = ["fault.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "fchdir_test", + testonly = 1, + srcs = ["fchdir.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "fcntl_test", + testonly = 1, + srcs = ["fcntl.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:cleanup", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + "//test/util:timer_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "flock_test", + testonly = 1, + srcs = [ + "file_base.h", + "flock.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "fork_test", + testonly = 1, + srcs = ["fork.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "fpsig_fork_test", + testonly = 1, + srcs = ["fpsig_fork.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "fpsig_nested_test", + testonly = 1, + srcs = ["fpsig_nested.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sync_file_range_test", + testonly = 1, + srcs = ["sync_file_range.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "fsync_test", + testonly = 1, + srcs = ["fsync.cc"], + linkstatic = 1, + deps = [ + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "futex_test", + testonly = 1, + srcs = ["futex.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:memory_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "getdents_test", + testonly = 1, + srcs = ["getdents.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "getrandom_test", + testonly = 1, + srcs = ["getrandom.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "inotify_test", + testonly = 1, + srcs = ["inotify.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + ], +) + +cc_binary( + name = "ioctl_test", + testonly = 1, + srcs = ["ioctl.cc"], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:file_descriptor", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "itimer_test", + testonly = 1, + srcs = ["itimer.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "kill_test", + testonly = 1, + srcs = ["kill.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "link_test", + testonly = 1, + srcs = ["link.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "lseek_test", + testonly = 1, + srcs = ["lseek.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "madvise_test", + testonly = 1, + srcs = ["madvise.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "mempolicy_test", + testonly = 1, + srcs = ["mempolicy.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/memory", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "mincore_test", + testonly = 1, + srcs = ["mincore.cc"], + linkstatic = 1, + deps = [ + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "mkdir_test", + testonly = 1, + srcs = ["mkdir.cc"], + linkstatic = 1, + deps = [ + ":temp_umask", + "//test/util:capability_util", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "mknod_test", + testonly = 1, + srcs = ["mknod.cc"], + linkstatic = 1, + deps = [ + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "mmap_test", + testonly = 1, + srcs = ["mmap.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "mount_test", + testonly = 1, + srcs = ["mount.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:mount_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "mremap_test", + testonly = 1, + srcs = ["mremap.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "msync_test", + testonly = 1, + srcs = ["msync.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "munmap_test", + testonly = 1, + srcs = ["munmap.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "open_test", + testonly = 1, + srcs = [ + "file_base.h", + "open.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "open_create_test", + testonly = 1, + srcs = ["open_create.cc"], + linkstatic = 1, + deps = [ + ":temp_umask", + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "pty_test", + testonly = 1, + srcs = ["pty.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "partial_bad_buffer_test", + testonly = 1, + srcs = ["partial_bad_buffer.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "pause_test", + testonly = 1, + srcs = ["pause.cc"], + linkstatic = 1, + deps = [ + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "pipe_test", + testonly = 1, + srcs = ["pipe.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "poll_test", + testonly = 1, + srcs = ["poll.cc"], + linkstatic = 1, + deps = [ + ":base_poll_test", + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "ppoll_test", + testonly = 1, + srcs = ["ppoll.cc"], + linkstatic = 1, + deps = [ + ":base_poll_test", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "arch_prctl_test", + testonly = 1, + srcs = ["arch_prctl.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "prctl_test", + testonly = 1, + srcs = ["prctl.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "prctl_setuid_test", + testonly = 1, + srcs = ["prctl_setuid.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "pread64_test", + testonly = 1, + srcs = ["pread64.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "preadv_test", + testonly = 1, + srcs = ["preadv.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:memory_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "preadv2_test", + testonly = 1, + srcs = [ + "preadv2.cc", + "readv_common.cc", + "readv_common.h", + ], + linkstatic = 1, + deps = [ + ":file_base", + "//test/util:file_descriptor", + "//test/util:memory_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "priority_test", + testonly = 1, + srcs = ["priority.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:fs_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "proc_test", + testonly = 1, + srcs = ["proc.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "proc_net_test", + testonly = 1, + srcs = ["proc_net.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "pselect_test", + testonly = 1, + srcs = ["pselect.cc"], + linkstatic = 1, + deps = [ + ":base_poll_test", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "ptrace_test", + testonly = 1, + srcs = ["ptrace.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "pwrite64_test", + testonly = 1, + srcs = ["pwrite64.cc"], + linkstatic = 1, + deps = [ + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "read_test", + testonly = 1, + srcs = ["read.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "readv_test", + testonly = 1, + srcs = [ + "file_base.h", + "readv.cc", + "readv_common.cc", + "readv_common.h", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:timer_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "readv_socket_test", + testonly = 1, + srcs = [ + "file_base.h", + "readv_common.cc", + "readv_common.h", + "readv_socket.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "rename_test", + testonly = 1, + srcs = ["rename.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "rlimits_test", + testonly = 1, + srcs = ["rlimits.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "rtsignal_test", + testonly = 1, + srcs = ["rtsignal.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:logging", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sched_test", + testonly = 1, + srcs = ["sched.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sched_yield_test", + testonly = 1, + srcs = ["sched_yield.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "seccomp_test", + testonly = 1, + srcs = ["seccomp.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:proc_util", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "select_test", + testonly = 1, + srcs = ["select.cc"], + linkstatic = 1, + deps = [ + ":base_poll_test", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sendfile_test", + testonly = 1, + srcs = ["sendfile.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sendfile_socket_test", + testonly = 1, + srcs = ["sendfile_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sigaction_test", + testonly = 1, + srcs = ["sigaction.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sigaltstack_test", + testonly = 1, + srcs = ["sigaltstack.cc"], + data = [ + ":sigaltstack_check", + ], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:fs_util", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sigiret_test", + testonly = 1, + srcs = ["sigiret.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:timer_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sigprocmask_test", + testonly = 1, + srcs = ["sigprocmask.cc"], + linkstatic = 1, + deps = [ + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sigstop_test", + testonly = 1, + srcs = ["sigstop.cc"], + linkstatic = 1, + deps = [ + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sigtimedwait_test", + testonly = 1, + srcs = ["sigtimedwait.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "socket_generic_test_cases", + testonly = 1, + srcs = [ + "socket_generic.cc", + ], + hdrs = [ + "socket_generic.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_unix_dgram_test_cases", + testonly = 1, + srcs = ["socket_unix_dgram.cc"], + hdrs = ["socket_unix_dgram.h"], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_unix_seqpacket_test_cases", + testonly = 1, + srcs = ["socket_unix_seqpacket.cc"], + hdrs = ["socket_unix_seqpacket.h"], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_ip_tcp_generic_test_cases", + testonly = 1, + srcs = [ + "socket_ip_tcp_generic.cc", + ], + hdrs = [ + "socket_ip_tcp_generic.h", + ], + deps = [ + ":socket_test_util", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_non_blocking_test_cases", + testonly = 1, + srcs = [ + "socket_non_blocking.cc", + ], + hdrs = [ + "socket_non_blocking.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_unix_non_stream_test_cases", + testonly = 1, + srcs = [ + "socket_unix_non_stream.cc", + ], + hdrs = [ + "socket_unix_non_stream.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:memory_util", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_non_stream_test_cases", + testonly = 1, + srcs = [ + "socket_non_stream.cc", + ], + hdrs = [ + "socket_non_stream.h", + ], + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_binary( + name = "socket_abstract_test", + testonly = 1, + srcs = [ + "socket_abstract.cc", + ], + linkstatic = 1, + deps = [ + ":socket_generic_test_cases", + ":socket_test_util", + ":socket_unix_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_abstract_non_blocking_test", + testonly = 1, + srcs = [ + "socket_unix_abstract_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_dgram_local_test", + testonly = 1, + srcs = ["socket_unix_dgram_local.cc"], + linkstatic = 1, + deps = [ + ":socket_non_stream_test_cases", + ":socket_test_util", + ":socket_unix_dgram_test_cases", + ":socket_unix_non_stream_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_dgram_non_blocking_test", + testonly = 1, + srcs = ["socket_unix_dgram_non_blocking.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_unix_seqpacket_local_test", + testonly = 1, + srcs = [ + "socket_unix_seqpacket_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_stream_test_cases", + ":socket_test_util", + ":socket_unix_non_stream_test_cases", + ":socket_unix_seqpacket_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_stream_test", + testonly = 1, + srcs = ["socket_unix_stream.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_ip_tcp_generic_loopback_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_generic_loopback.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_ip_tcp_generic_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_tcp_udp_generic_loopback_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_udp_generic.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_ip_tcp_loopback_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_loopback.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_generic_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_tcp_loopback_non_blocking_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_loopback_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_non_blocking_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_udp_loopback_test", + testonly = 1, + srcs = [ + "socket_ip_udp_loopback.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_generic_test_cases", + ":socket_non_stream_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_ip_udp_loopback_non_blocking_test", + testonly = 1, + srcs = [ + "socket_ip_udp_loopback_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_non_blocking_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_domain_test", + testonly = 1, + srcs = [ + "socket_unix_domain.cc", + ], + linkstatic = 1, + deps = [ + ":socket_generic_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_domain_non_blocking_test", + testonly = 1, + srcs = [ + "socket_unix_pair_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_filesystem_test", + testonly = 1, + srcs = [ + "socket_filesystem.cc", + ], + linkstatic = 1, + deps = [ + ":socket_generic_test_cases", + ":socket_test_util", + ":socket_unix_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_filesystem_non_blocking_test", + testonly = 1, + srcs = [ + "socket_unix_filesystem_nonblock.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_inet_loopback_test", + testonly = 1, + srcs = ["socket_inet_loopback.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_netlink_route_test", + testonly = 1, + srcs = ["socket_netlink_route.cc"], + linkstatic = 1, + deps = [ + ":socket_netlink_util", + ":socket_test_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +# These socket tests are in a library because the test cases are shared +# across several test build targets. +cc_library( + name = "socket_stream_test_cases", + testonly = 1, + srcs = [ + "socket_stream.cc", + ], + hdrs = [ + "socket_stream.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_unix_test_cases", + testonly = 1, + srcs = [ + "socket_unix.cc", + ], + hdrs = [ + "socket_unix.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_stream_blocking_test_cases", + testonly = 1, + srcs = [ + "socket_stream_blocking.cc", + ], + hdrs = [ + "socket_stream_blocking.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_library( + name = "socket_stream_nonblocking_test_cases", + testonly = 1, + srcs = [ + "socket_stream_nonblock.cc", + ], + hdrs = [ + "socket_stream_nonblock.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_binary( + name = "socket_stream_local_test", + testonly = 1, + srcs = [ + "socket_unix_stream_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_stream_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_stream_blocking_local_test", + testonly = 1, + srcs = [ + "socket_unix_stream_blocking_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_stream_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_stream_blocking_tcp_test", + testonly = 1, + srcs = [ + "socket_ip_tcp_loopback_blocking.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_stream_blocking_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_stream_nonblock_local_test", + testonly = 1, + srcs = [ + "socket_unix_stream_nonblock_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_stream_nonblocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_abstract_test", + testonly = 1, + srcs = [ + "socket_unix_abstract.cc", + ], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":socket_unix_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "socket_unix_unbound_dgram_test", + testonly = 1, + srcs = ["socket_unix_unbound_dgram.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_unix_unbound_abstract_test", + testonly = 1, + srcs = ["socket_unix_unbound_abstract.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_unix_unbound_filesystem_test", + testonly = 1, + srcs = ["socket_unix_unbound_filesystem.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_unix_filesystem_test", + testonly = 1, + srcs = [ + "socket_unix_filesystem.cc", + ], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":socket_unix_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "socket_non_stream_blocking_test_cases", + testonly = 1, + srcs = [ + "socket_non_stream_blocking.cc", + ], + hdrs = [ + "socket_non_stream_blocking.h", + ], + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_util", + "//test/util:thread_util", + "//test/util:timer_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], + alwayslink = 1, +) + +cc_binary( + name = "socket_non_stream_blocking_local_test", + testonly = 1, + srcs = [ + "socket_unix_non_stream_blocking_local.cc", + ], + linkstatic = 1, + deps = [ + ":socket_non_stream_blocking_test_cases", + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_non_stream_blocking_udp_test", + testonly = 1, + srcs = [ + "socket_ip_udp_loopback_blocking.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_non_stream_blocking_test_cases", + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_unix_pair_test", + testonly = 1, + srcs = [ + "socket_unix_pair.cc", + ], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":socket_unix_test_cases", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_unix_unbound_seqpacket_test", + testonly = 1, + srcs = ["socket_unix_unbound_seqpacket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_unix_unbound_stream_test", + testonly = 1, + srcs = ["socket_unix_unbound_stream.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + ":unix_domain_socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "socket_netdevice_test", + testonly = 1, + srcs = ["socket_netdevice.cc"], + linkstatic = 1, + deps = [ + ":socket_netlink_util", + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/base:endian", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "stat_test", + testonly = 1, + srcs = [ + "file_base.h", + "stat.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "stat_times_test", + testonly = 1, + srcs = ["stat_times.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "statfs_test", + testonly = 1, + srcs = [ + "file_base.h", + "statfs.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "symlink_test", + testonly = 1, + srcs = ["symlink.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sync_test", + testonly = 1, + srcs = ["sync.cc"], + linkstatic = 1, + deps = [ + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sysinfo_test", + testonly = 1, + srcs = ["sysinfo.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "syslog_test", + testonly = 1, + srcs = ["syslog.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "sysret_test", + testonly = 1, + srcs = ["sysret.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "tcp_socket_test", + testonly = 1, + srcs = ["tcp_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "tgkill_test", + testonly = 1, + srcs = ["tgkill.cc"], + linkstatic = 1, + deps = [ + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "time_test", + testonly = 1, + srcs = ["time.cc"], + linkstatic = 1, + deps = [ + "//test/util:proc_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "timerfd_test", + testonly = 1, + srcs = ["timerfd.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "timers_test", + testonly = 1, + srcs = ["timers.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "tkill_test", + testonly = 1, + srcs = ["tkill.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "truncate_test", + testonly = 1, + srcs = ["truncate.cc"], + linkstatic = 1, + deps = [ + ":file_base", + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "udp_socket_test", + testonly = 1, + srcs = ["udp_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "udp_bind_test", + testonly = 1, + srcs = ["udp_bind.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "uidgid_test", + testonly = 1, + srcs = ["uidgid.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "uname_test", + testonly = 1, + srcs = ["uname.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "unlink_test", + testonly = 1, + srcs = ["unlink.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "unshare_test", + testonly = 1, + srcs = ["unshare.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "utimes_test", + testonly = 1, + srcs = ["utimes.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "vdso_test", + testonly = 1, + srcs = ["vdso.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:proc_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "vfork_test", + testonly = 1, + srcs = ["vfork.cc"], + linkstatic = 1, + deps = [ + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:test_util", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "wait_test", + testonly = 1, + srcs = ["wait.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:signal_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "write_test", + testonly = 1, + srcs = ["write.cc"], + linkstatic = 1, + deps = [ + "//test/util:cleanup", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "memory_accounting_test", + testonly = 1, + srcs = ["memory_accounting.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "semaphore_test", + testonly = 1, + srcs = ["semaphore.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "shm_test", + testonly = 1, + srcs = ["shm.cc"], + linkstatic = 1, + deps = [ + "//test/util:multiprocess_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "fadvise64_test", + testonly = 1, + srcs = ["fadvise64.cc"], + linkstatic = 1, + deps = [ + "//test/util:file_descriptor", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "vdso_clock_gettime_test", + testonly = 1, + srcs = ["vdso_clock_gettime.cc"], + linkstatic = 1, + deps = [ + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( + name = "vsyscall_test", + testonly = 1, + srcs = ["vsyscall.cc"], + linkstatic = 1, + deps = [ + "//test/util:proc_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) diff --git a/test/syscalls/linux/accept_bind.cc b/test/syscalls/linux/accept_bind.cc new file mode 100644 index 000000000..7c6e92317 --- /dev/null +++ b/test/syscalls/linux/accept_bind.cc @@ -0,0 +1,600 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> +#include <algorithm> +#include <vector> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(AllSocketPairTest, Listen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, ListenIncreaseBacklog) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5), + SyscallSucceeds()); + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 10), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, ListenDecreaseBacklog) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5), + SyscallSucceeds()); + ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 1), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, ListenWithoutBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(listen(sockets->first_fd(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, DoubleBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, BindListenBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, DoubleListen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, DoubleConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EISCONN)); +} + +TEST_P(AllSocketPairTest, Connect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, ConnectToFilePath) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + constexpr char kPath[] = "/tmp"; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + ASSERT_THAT( + connect(sockets->second_fd(), + reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +TEST_P(AllSocketPairTest, ConnectToInvalidAbstractPath) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + constexpr char kPath[] = "\0nonexistent"; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + ASSERT_THAT( + connect(sockets->second_fd(), + reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +TEST_P(AllSocketPairTest, SelfConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, ConnectWithoutListen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +TEST_P(AllSocketPairTest, Accept) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + ASSERT_THAT(close(accepted), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, AcceptValidAddrLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + struct sockaddr_un addr = {}; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + accepted = accept(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), &addr_len), + SyscallSucceeds()); + ASSERT_THAT(close(accepted), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, AcceptNegativeAddrLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // With a negative addr_len, accept returns EINVAL, + struct sockaddr_un addr = {}; + socklen_t addr_len = -1; + ASSERT_THAT(accept(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), &addr_len), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, AcceptLargePositiveAddrLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // With a large (positive) addr_len, accept does not return EINVAL. + int accepted = -1; + char addr_buf[200]; + socklen_t addr_len = sizeof(addr_buf); + ASSERT_THAT(accepted = accept(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(addr_buf), + &addr_len), + SyscallSucceeds()); + // addr_len should have been updated by accept(). + EXPECT_LT(addr_len, sizeof(addr_buf)); + ASSERT_THAT(close(accepted), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, AcceptVeryLargePositiveAddrLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // With a large (positive) addr_len, accept does not return EINVAL. + int accepted = -1; + char addr_buf[2000]; + socklen_t addr_len = sizeof(addr_buf); + ASSERT_THAT(accepted = accept(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(addr_buf), + &addr_len), + SyscallSucceeds()); + // addr_len should have been updated by accept(). + EXPECT_LT(addr_len, sizeof(addr_buf)); + ASSERT_THAT(close(accepted), SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, AcceptWithoutBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, AcceptWithoutListen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, GetRemoteAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + socklen_t addr_len = sockets->first_addr_size(); + struct sockaddr_storage addr = {}; + ASSERT_THAT( + getpeername(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, sockets->first_addr_len()); + EXPECT_EQ(0, memcmp(&addr, sockets->first_addr(), sockets->first_addr_len())); +} + +TEST_P(AllSocketPairTest, UnboundGetLocalAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + socklen_t addr_len = sockets->first_addr_size(); + struct sockaddr_storage addr = {}; + ASSERT_THAT( + getsockname(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, 2); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +TEST_P(AllSocketPairTest, BoundGetLocalAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + socklen_t addr_len = sockets->first_addr_size(); + struct sockaddr_storage addr = {}; + ASSERT_THAT( + getsockname(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +TEST_P(AllSocketPairTest, BoundConnector) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, UnboundSenderAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor accepted_fd(accepted); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(i))); + if (!IsRunningOnGvisor()) { + // Linux returns a zero length for addresses from recvfrom(2) and + // recvmsg(2). This differs from the behavior of getpeername(2) and + // getsockname(2). For simplicity, we use the getpeername(2) and + // getsockname(2) behavior for recvfrom(2) and recvmsg(2). + EXPECT_EQ(addr_len, 0); + return; + } + EXPECT_EQ(addr_len, 2); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +TEST_P(AllSocketPairTest, BoundSenderAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor accepted_fd(accepted); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(i))); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +TEST_P(AllSocketPairTest, BindAfterConnectSenderAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor accepted_fd(accepted); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(i))); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +TEST_P(AllSocketPairTest, BindAfterAcceptSenderAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor accepted_fd(accepted); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(i))); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/accept_bind_stream.cc b/test/syscalls/linux/accept_bind_stream.cc new file mode 100644 index 000000000..f7113a6fc --- /dev/null +++ b/test/syscalls/linux/accept_bind_stream.cc @@ -0,0 +1,93 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> +#include <algorithm> +#include <vector> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(AllSocketPairTest, BoundSenderAddrCoalesced) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + int accepted = -1; + ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr), + SyscallSucceeds()); + FileDescriptor closer(accepted); + + int i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(), + sockets->second_addr_size()), + SyscallSucceeds()); + + i = 0; + ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + + int ri[2] = {0, 0}; + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT( + RetryEINTR(recvfrom)(accepted, ri, sizeof(ri), 0, + reinterpret_cast<sockaddr*>(&addr), &addr_len), + SyscallSucceedsWithValue(sizeof(ri))); + EXPECT_EQ(addr_len, sockets->second_addr_len()); + + EXPECT_EQ( + memcmp(&addr, sockets->second_addr(), + std::min((size_t)addr_len, (size_t)sockets->second_addr_len())), + 0); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/access.cc b/test/syscalls/linux/access.cc new file mode 100644 index 000000000..6ea070a5d --- /dev/null +++ b/test/syscalls/linux/access.cc @@ -0,0 +1,170 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::Ge; + +namespace gvisor { +namespace testing { + +namespace { + +class AccessTest : public ::testing::Test { + public: + std::string CreateTempFile(int perm) { + const std::string path = NewTempAbsPath(); + const int fd = open(path.c_str(), O_CREAT | O_RDONLY, perm); + TEST_PCHECK(fd > 0); + TEST_PCHECK(close(fd) == 0); + return path; + } + + protected: + // SetUp creates various configurations of files. + void SetUp() override { + // Move to the temporary directory. This allows us to reason more easily + // about absolute and relative paths. + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + + // Create an empty file, standard permissions. + relfile_ = NewTempRelPath(); + int fd; + ASSERT_THAT(fd = open(relfile_.c_str(), O_CREAT | O_TRUNC, 0644), + SyscallSucceedsWithValue(Ge(0))); + ASSERT_THAT(close(fd), SyscallSucceeds()); + absfile_ = GetAbsoluteTestTmpdir() + "/" + relfile_; + + // Create an empty directory, no writable permissions. + absdir_ = NewTempAbsPath(); + reldir_ = JoinPath(Basename(absdir_), ""); + ASSERT_THAT(mkdir(reldir_.c_str(), 0555), SyscallSucceeds()); + + // This file doesn't exist. + relnone_ = NewTempRelPath(); + absnone_ = GetAbsoluteTestTmpdir() + "/" + relnone_; + } + + // TearDown unlinks created files. + void TearDown() override { + ASSERT_THAT(unlink(absfile_.c_str()), SyscallSucceeds()); + ASSERT_THAT(rmdir(absdir_.c_str()), SyscallSucceeds()); + } + + std::string relfile_; + std::string reldir_; + + std::string absfile_; + std::string absdir_; + + std::string relnone_; + std::string absnone_; +}; + +TEST_F(AccessTest, RelativeFile) { + EXPECT_THAT(access(relfile_.c_str(), R_OK), SyscallSucceeds()); +} + +TEST_F(AccessTest, RelativeDir) { + EXPECT_THAT(access(reldir_.c_str(), R_OK | X_OK), SyscallSucceeds()); +} + +TEST_F(AccessTest, AbsFile) { + EXPECT_THAT(access(absfile_.c_str(), R_OK), SyscallSucceeds()); +} + +TEST_F(AccessTest, AbsDir) { + EXPECT_THAT(access(absdir_.c_str(), R_OK | X_OK), SyscallSucceeds()); +} + +TEST_F(AccessTest, RelDoesNotExist) { + EXPECT_THAT(access(relnone_.c_str(), R_OK), SyscallFailsWithErrno(ENOENT)); +} + +TEST_F(AccessTest, AbsDoesNotExist) { + EXPECT_THAT(access(absnone_.c_str(), R_OK), SyscallFailsWithErrno(ENOENT)); +} + +TEST_F(AccessTest, InvalidMode) { + EXPECT_THAT(access(relfile_.c_str(), 0xffffffff), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(AccessTest, NoPerms) { + // Drop capabilities that allow us to override permissions. We must drop + // PERMITTED because access() checks those instead of EFFECTIVE. + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE)); + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH)); + + EXPECT_THAT(access(absdir_.c_str(), W_OK), SyscallFailsWithErrno(EACCES)); +} + +TEST_F(AccessTest, InvalidName) { + EXPECT_THAT(access(reinterpret_cast<char*>(0x1234), W_OK), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(AccessTest, UsrReadOnly) { + // Drop capabilities that allow us to override permissions. We must drop + // PERMITTED because access() checks those instead of EFFECTIVE. + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE)); + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH)); + + const std::string filename = CreateTempFile(0400); + EXPECT_THAT(access(filename.c_str(), R_OK), SyscallSucceeds()); + EXPECT_THAT(access(filename.c_str(), W_OK), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(access(filename.c_str(), X_OK), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +TEST_F(AccessTest, UsrReadExec) { + // Drop capabilities that allow us to override permissions. We must drop + // PERMITTED because access() checks those instead of EFFECTIVE. + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE)); + ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH)); + + const std::string filename = CreateTempFile(0500); + EXPECT_THAT(access(filename.c_str(), R_OK | X_OK), SyscallSucceeds()); + EXPECT_THAT(access(filename.c_str(), W_OK), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +TEST_F(AccessTest, UsrReadWrite) { + const std::string filename = CreateTempFile(0600); + EXPECT_THAT(access(filename.c_str(), R_OK | W_OK), SyscallSucceeds()); + EXPECT_THAT(access(filename.c_str(), X_OK), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +TEST_F(AccessTest, UsrReadWriteExec) { + const std::string filename = CreateTempFile(0700); + EXPECT_THAT(access(filename.c_str(), R_OK | W_OK | X_OK), SyscallSucceeds()); + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/affinity.cc b/test/syscalls/linux/affinity.cc new file mode 100644 index 000000000..8a16343d5 --- /dev/null +++ b/test/syscalls/linux/affinity.cc @@ -0,0 +1,241 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/strings/str_split.h" +#include "test/util/cleanup.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +// These tests are for both the sched_getaffinity(2) and sched_setaffinity(2) +// syscalls. +class AffinityTest : public ::testing::Test { + protected: + void SetUp() override { + EXPECT_THAT( + // Needs use the raw syscall to get the actual size. + cpuset_size_ = syscall(SYS_sched_getaffinity, /*pid=*/0, + sizeof(cpu_set_t), &mask_), + SyscallSucceeds()); + // Lots of tests rely on having more than 1 logical processor available. + EXPECT_GT(CPU_COUNT(&mask_), 1); + } + + static PosixError ClearLowestBit(cpu_set_t* mask, size_t cpus) { + const size_t mask_size = CPU_ALLOC_SIZE(cpus); + for (size_t n = 0; n < cpus; ++n) { + if (CPU_ISSET_S(n, mask_size, mask)) { + CPU_CLR_S(n, mask_size, mask); + return NoError(); + } + } + return PosixError(EINVAL, "No bit to clear, mask is empty"); + } + + PosixError ClearLowestBit() { return ClearLowestBit(&mask_, CPU_SETSIZE); } + + // Stores the initial cpu mask for this process. + cpu_set_t mask_ = {}; + int cpuset_size_ = 0; +}; + +// sched_getaffinity(2) is implemented. +TEST_F(AffinityTest, SchedGetAffinityImplemented) { + EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallSucceeds()); +} + +// PID is not found. +TEST_F(AffinityTest, SchedGetAffinityInvalidPID) { + // Flaky, but it's tough to avoid a race condition when finding an unused pid + EXPECT_THAT(sched_getaffinity(/*pid=*/INT_MAX - 1, sizeof(cpu_set_t), &mask_), + SyscallFailsWithErrno(ESRCH)); +} + +// PID is not found. +TEST_F(AffinityTest, SchedSetAffinityInvalidPID) { + // Flaky, but it's tough to avoid a race condition when finding an unused pid + EXPECT_THAT(sched_setaffinity(/*pid=*/INT_MAX - 1, sizeof(cpu_set_t), &mask_), + SyscallFailsWithErrno(ESRCH)); +} + +TEST_F(AffinityTest, SchedSetAffinityZeroMask) { + CPU_ZERO(&mask_); + EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallFailsWithErrno(EINVAL)); +} + +// N.B. This test case relies on cpuset_size_ larger than the actual number of +// of all existing CPUs. Check your machine if the test fails. +TEST_F(AffinityTest, SchedSetAffinityNonexistentCPUDropped) { + cpu_set_t mask = mask_; + // Add a nonexistent CPU. + // + // The number needs to be larger than the possible number of CPU available, + // but smaller than the number of the CPU that the kernel claims to support -- + // it's implicitly returned by raw sched_getaffinity syscall. + CPU_SET(cpuset_size_ * 8 - 1, &mask); + EXPECT_THAT( + // Use raw syscall because it will be rejected by the libc wrapper + // otherwise. + syscall(SYS_sched_setaffinity, /*pid=*/0, sizeof(cpu_set_t), &mask), + SyscallSucceeds()) + << "failed with cpumask : " << CPUSetToString(mask) + << ", cpuset_size_ : " << cpuset_size_; + cpu_set_t newmask; + EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &newmask), + SyscallSucceeds()); + EXPECT_TRUE(CPU_EQUAL(&mask_, &newmask)) + << "got: " << CPUSetToString(newmask) + << " != expected: " << CPUSetToString(mask_); +} + +TEST_F(AffinityTest, SchedSetAffinityOnlyNonexistentCPUFails) { + // Make an empty cpu set. + CPU_ZERO(&mask_); + // Add a nonexistent CPU. + // + // The number needs to be larger than the possible number of CPU available, + // but smaller than the number of the CPU that the kernel claims to support -- + // it's implicitly returned by raw sched_getaffinity syscall. + int cpu = cpuset_size_ * 8 - 1; + if (cpu <= NumCPUs()) { + LOG(INFO) << "Skipping test: cpu " << cpu << " exists"; + return; + } + CPU_SET(cpu, &mask_); + EXPECT_THAT( + // Use raw syscall because it will be rejected by the libc wrapper + // otherwise. + syscall(SYS_sched_setaffinity, /*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(AffinityTest, SchedSetAffinityInvalidSize) { + EXPECT_GT(cpuset_size_, 0); + // Not big enough. + EXPECT_THAT(sched_getaffinity(/*pid=*/0, cpuset_size_ - 1, &mask_), + SyscallFailsWithErrno(EINVAL)); + // Not a multiple of word size. + EXPECT_THAT(sched_getaffinity(/*pid=*/0, cpuset_size_ + 1, &mask_), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(AffinityTest, Sanity) { + ASSERT_NO_ERRNO(ClearLowestBit()); + EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallSucceeds()); + cpu_set_t newmask; + EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &newmask), + SyscallSucceeds()); + EXPECT_TRUE(CPU_EQUAL(&mask_, &newmask)) + << "got: " << CPUSetToString(newmask) + << " != expected: " << CPUSetToString(mask_); +} + +TEST_F(AffinityTest, NewThread) { + ASSERT_NO_ERRNO(ClearLowestBit()); + ASSERT_NO_ERRNO(ClearLowestBit()); + EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_), + SyscallSucceeds()); + ScopedThread([this]() { + cpu_set_t child_mask; + ASSERT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask), + SyscallSucceeds()); + ASSERT_TRUE(CPU_EQUAL(&child_mask, &mask_)) + << "child cpu mask: " << CPUSetToString(child_mask) + << " != parent cpu mask: " << CPUSetToString(mask_); + }); +} + +TEST_F(AffinityTest, ConsistentWithProcCpuInfo) { + // Count how many cpus are shown in /proc/cpuinfo. + std::string cpuinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo")); + int count = 0; + for (auto const& line : absl::StrSplit(cpuinfo, '\n')) { + if (absl::StartsWith(line, "processor")) { + count++; + } + } + EXPECT_GE(count, CPU_COUNT(&mask_)); +} + +TEST_F(AffinityTest, ConsistentWithProcStat) { + // Count how many cpus are shown in /proc/stat. + std::string stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + int count = 0; + for (auto const& line : absl::StrSplit(stat, '\n')) { + if (absl::StartsWith(line, "cpu") && !absl::StartsWith(line, "cpu ")) { + count++; + } + } + EXPECT_GE(count, CPU_COUNT(&mask_)); +} + +TEST_F(AffinityTest, SmallCpuMask) { + const int num_cpus = NumCPUs(); + const size_t mask_size = CPU_ALLOC_SIZE(num_cpus); + cpu_set_t* mask = CPU_ALLOC(num_cpus); + ASSERT_NE(mask, nullptr); + const auto free_mask = Cleanup([&] { CPU_FREE(mask); }); + + CPU_ZERO_S(mask_size, mask); + ASSERT_THAT(sched_getaffinity(0, mask_size, mask), SyscallSucceeds()); +} + +TEST_F(AffinityTest, LargeCpuMask) { + // Allocate mask bigger than cpu_set_t normally allocates. + const size_t cpus = CPU_SETSIZE * 8; + const size_t mask_size = CPU_ALLOC_SIZE(cpus); + + cpu_set_t* large_mask = CPU_ALLOC(cpus); + auto free_mask = Cleanup([large_mask] { CPU_FREE(large_mask); }); + CPU_ZERO_S(mask_size, large_mask); + + // Check that get affinity with large mask works as expected. + ASSERT_THAT(sched_getaffinity(/*pid=*/0, mask_size, large_mask), + SyscallSucceeds()); + EXPECT_TRUE(CPU_EQUAL(&mask_, large_mask)) + << "got: " << CPUSetToString(*large_mask, cpus) + << " != expected: " << CPUSetToString(mask_); + + // Check that set affinity with large mask works as expected. + ASSERT_NO_ERRNO(ClearLowestBit(large_mask, cpus)); + EXPECT_THAT(sched_setaffinity(/*pid=*/0, mask_size, large_mask), + SyscallSucceeds()); + + cpu_set_t* new_mask = CPU_ALLOC(cpus); + auto free_new_mask = Cleanup([new_mask] { CPU_FREE(new_mask); }); + CPU_ZERO_S(mask_size, new_mask); + EXPECT_THAT(sched_getaffinity(/*pid=*/0, mask_size, new_mask), + SyscallSucceeds()); + + EXPECT_TRUE(CPU_EQUAL_S(mask_size, large_mask, new_mask)) + << "got: " << CPUSetToString(*new_mask, cpus) + << " != expected: " << CPUSetToString(*large_mask, cpus); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/aio.cc b/test/syscalls/linux/aio.cc new file mode 100644 index 000000000..cc5392223 --- /dev/null +++ b/test/syscalls/linux/aio.cc @@ -0,0 +1,433 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <linux/aio_abi.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr char kData[] = "hello world!"; + +int SubmitCtx(aio_context_t ctx, long nr, struct iocb** iocbpp) { + return syscall(__NR_io_submit, ctx, nr, iocbpp); +} + +} // namespace + +class AIOTest : public FileTest { + public: + AIOTest() : ctx_(0) {} + + int SetupContext(unsigned int nr) { + return syscall(__NR_io_setup, nr, &ctx_); + } + + int Submit(long nr, struct iocb** iocbpp) { + return SubmitCtx(ctx_, nr, iocbpp); + } + + int GetEvents(long min, long max, struct io_event* events, + struct timespec* timeout) { + return RetryEINTR(syscall)(__NR_io_getevents, ctx_, min, max, events, + timeout); + } + + int DestroyContext() { return syscall(__NR_io_destroy, ctx_); } + + void TearDown() override { + FileTest::TearDown(); + if (ctx_ != 0) { + ASSERT_THAT(DestroyContext(), SyscallSucceeds()); + } + } + + struct iocb CreateCallback() { + struct iocb cb = {}; + cb.aio_data = 0x123; + cb.aio_fildes = test_file_fd_.get(); + cb.aio_lio_opcode = IOCB_CMD_PWRITE; + cb.aio_buf = reinterpret_cast<uint64_t>(kData); + cb.aio_offset = 0; + cb.aio_nbytes = strlen(kData); + return cb; + } + + protected: + aio_context_t ctx_; +}; + +TEST_F(AIOTest, BasicWrite) { + // Copied from fs/aio.c. + constexpr unsigned AIO_RING_MAGIC = 0xa10a10a1; + struct aio_ring { + unsigned id; + unsigned nr; + unsigned head; + unsigned tail; + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; + struct io_event io_events[0]; + }; + + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + // Check that 'ctx_' points to a valid address. libaio uses it to check if + // aio implementation uses aio_ring. gVisor doesn't and returns all zeroes. + // Linux implements aio_ring, so skip the zeroes check. + // + // TODO: Remove when gVisor implements aio_ring. + auto ring = reinterpret_cast<struct aio_ring*>(ctx_); + auto magic = IsRunningOnGvisor() ? 0 : AIO_RING_MAGIC; + EXPECT_EQ(ring->magic, magic); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Submit the request. + ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); + + // Get the reply. + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); + + // Verify that it is as expected. + EXPECT_EQ(events[0].data, 0x123); + EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb)); + EXPECT_EQ(events[0].res, strlen(kData)); + + // Verify that the file contains the contents. + char verify_buf[32] = {}; + ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)), + SyscallSucceeds()); + EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0); +} + +TEST_F(AIOTest, BadWrite) { + // Create a pipe and immediately close the read end. + int pipefd[2]; + ASSERT_THAT(pipe(pipefd), SyscallSucceeds()); + + FileDescriptor rfd(pipefd[0]); + FileDescriptor wfd(pipefd[1]); + + rfd.reset(); // Close the read end. + + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + // Try to write to the read end. + cb.aio_fildes = wfd.get(); + struct iocb* cbs[1] = {&cb}; + + // Submit the request. + ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); + + // Get the reply. + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); + + // Verify that it fails with the right error code. + EXPECT_EQ(events[0].data, 0x123); + EXPECT_EQ(events[0].obj, reinterpret_cast<uint64_t>(&cb)); + EXPECT_LT(events[0].res, 0); +} + +TEST_F(AIOTest, ExitWithPendingIo) { + // Setup a context that is 5 entries deep. + ASSERT_THAT(SetupContext(5), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[] = {&cb}; + + // Submit a request but don't complete it to make it pending. + EXPECT_THAT(Submit(1, cbs), SyscallSucceeds()); +} + +int Submitter(void* arg) { + auto test = reinterpret_cast<AIOTest*>(arg); + + struct iocb cb = test->CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Submit the request. + TEST_CHECK(test->Submit(1, cbs) == 1); + return 0; +} + +TEST_F(AIOTest, CloneVm) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + const size_t kStackSize = 5 * kPageSize; + std::unique_ptr<char[]> stack(new char[kStackSize]); + char* bp = stack.get() + kStackSize; + pid_t child; + ASSERT_THAT(child = clone(Submitter, bp, CLONE_VM | SIGCHLD, + reinterpret_cast<void*>(this)), + SyscallSucceeds()); + + // Get the reply. + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); + + // Verify that it is as expected. + EXPECT_EQ(events[0].data, 0x123); + EXPECT_EQ(events[0].res, strlen(kData)); + + // Verify that the file contains the contents. + char verify_buf[32] = {}; + ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)), + SyscallSucceeds()); + EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +// Tests that AIO context can be remapped to a different address. +TEST_F(AIOTest, Mremap) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Reserve address space for the mremap target so we have something safe to + // map over. + // + // N.B. We reserve 2 pages because we'll attempt to remap to 2 pages below. + // That should fail with EFAULT, but will fail with EINVAL if this mmap + // returns the page immediately below ctx_, as + // [new_address, new_address+2*kPageSize) overlaps [ctx_, ctx_+kPageSize). + void* new_address = mmap(nullptr, 2 * kPageSize, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_THAT(reinterpret_cast<intptr_t>(new_address), SyscallSucceeds()); + auto mmap_cleanup = Cleanup([new_address] { + EXPECT_THAT(munmap(new_address, 2 * kPageSize), SyscallSucceeds()); + }); + + // Test that remapping to a larger address fails. + void* res = mremap(reinterpret_cast<void*>(ctx_), kPageSize, 2 * kPageSize, + MREMAP_FIXED | MREMAP_MAYMOVE, new_address); + ASSERT_THAT(reinterpret_cast<intptr_t>(res), SyscallFailsWithErrno(EFAULT)); + + // Remap context 'handle' to a different address. + res = mremap(reinterpret_cast<void*>(ctx_), kPageSize, kPageSize, + MREMAP_FIXED | MREMAP_MAYMOVE, new_address); + ASSERT_THAT( + reinterpret_cast<intptr_t>(res), + SyscallSucceedsWithValue(reinterpret_cast<intptr_t>(new_address))); + mmap_cleanup.Release(); + aio_context_t old_ctx = ctx_; + ctx_ = reinterpret_cast<aio_context_t>(new_address); + + // Check that submitting the request with the old 'ctx_' fails. + ASSERT_THAT(SubmitCtx(old_ctx, 1, cbs), SyscallFailsWithErrno(EINVAL)); + + // Submit the request with the new 'ctx_'. + ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); + + // Remap again. + new_address = + mmap(nullptr, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_THAT(reinterpret_cast<int64_t>(new_address), SyscallSucceeds()); + auto mmap_cleanup2 = Cleanup([new_address] { + EXPECT_THAT(munmap(new_address, kPageSize), SyscallSucceeds()); + }); + res = mremap(reinterpret_cast<void*>(ctx_), kPageSize, kPageSize, + MREMAP_FIXED | MREMAP_MAYMOVE, new_address); + ASSERT_THAT(reinterpret_cast<int64_t>(res), + SyscallSucceedsWithValue(reinterpret_cast<int64_t>(new_address))); + mmap_cleanup2.Release(); + ctx_ = reinterpret_cast<aio_context_t>(new_address); + + // Get the reply with yet another 'ctx_' and verify it. + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); + EXPECT_EQ(events[0].data, 0x123); + EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb)); + EXPECT_EQ(events[0].res, strlen(kData)); + + // Verify that the file contains the contents. + char verify_buf[32] = {}; + ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)), + SyscallSucceeds()); + EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0); +} + +// Tests that AIO context can be replaced with a different mapping at the same +// address and continue working. Don't ask why, but Linux allows it. +TEST_F(AIOTest, MremapOver) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); + + // Allocate a new VMA, copy 'ctx_' content over, and remap it on top + // of 'ctx_'. + void* new_address = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_THAT(reinterpret_cast<int64_t>(new_address), SyscallSucceeds()); + auto mmap_cleanup = Cleanup([new_address] { + EXPECT_THAT(munmap(new_address, kPageSize), SyscallSucceeds()); + }); + + memcpy(new_address, reinterpret_cast<void*>(ctx_), kPageSize); + void* res = + mremap(new_address, kPageSize, kPageSize, MREMAP_FIXED | MREMAP_MAYMOVE, + reinterpret_cast<void*>(ctx_)); + ASSERT_THAT(reinterpret_cast<int64_t>(res), SyscallSucceedsWithValue(ctx_)); + mmap_cleanup.Release(); + + // Everything continues to work just fine. + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); + EXPECT_EQ(events[0].data, 0x123); + EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb)); + EXPECT_EQ(events[0].res, strlen(kData)); + + // Verify that the file contains the contents. + char verify_buf[32] = {}; + ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)), + SyscallSucceeds()); + EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0); +} + +// Tests that AIO calls fail if context's address is inaccessible. +TEST_F(AIOTest, Mprotect) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); + + // Makes the context 'handle' inaccessible and check that all subsequent + // calls fail. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(ctx_), kPageSize, PROT_NONE), + SyscallSucceeds()); + struct io_event events[1]; + EXPECT_THAT(GetEvents(1, 1, events, nullptr), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(DestroyContext(), SyscallFailsWithErrno(EINVAL)); + + // Prevent TearDown from attempting to destroy the context and fail. + ctx_ = 0; +} + +TEST_F(AIOTest, Timeout) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct timespec timeout; + timeout.tv_sec = 0; + timeout.tv_nsec = 10; + struct io_event events[1]; + ASSERT_THAT(GetEvents(1, 1, events, &timeout), SyscallSucceedsWithValue(0)); +} + +class AIOReadWriteParamTest : public AIOTest, + public ::testing::WithParamInterface<int> {}; + +TEST_P(AIOReadWriteParamTest, BadOffset) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Create a buffer that we can write to. + char buf[] = "hello world!"; + cb.aio_buf = reinterpret_cast<uint64_t>(buf); + + // Set the operation on the callback and give a negative offset. + const int opcode = GetParam(); + cb.aio_lio_opcode = opcode; + + iovec iov = {}; + if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) { + // Create a valid iovec and set it in the callback. + iov.iov_base = reinterpret_cast<void*>(buf); + iov.iov_len = 1; + cb.aio_buf = reinterpret_cast<uint64_t>(&iov); + // aio_nbytes is the number of iovecs. + cb.aio_nbytes = 1; + } + + // Pass a negative offset. + cb.aio_offset = -1; + + // Should get error on submission. + ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL)); +} + +INSTANTIATE_TEST_CASE_P(BadOffset, AIOReadWriteParamTest, + ::testing::Values(IOCB_CMD_PREAD, IOCB_CMD_PWRITE, + IOCB_CMD_PREADV, IOCB_CMD_PWRITEV)); + +class AIOVectorizedParamTest : public AIOTest, + public ::testing::WithParamInterface<int> {}; + +TEST_P(AIOVectorizedParamTest, BadIOVecs) { + // Setup a context that is 128 entries deep. + ASSERT_THAT(SetupContext(128), SyscallSucceeds()); + + struct iocb cb = CreateCallback(); + struct iocb* cbs[1] = {&cb}; + + // Modify the callback to use the operation from the param. + cb.aio_lio_opcode = GetParam(); + + // Create an iovec with address in kernel range, and pass that as the buffer. + iovec iov = {}; + iov.iov_base = reinterpret_cast<void*>(0xFFFFFFFF00000000); + iov.iov_len = 1; + cb.aio_buf = reinterpret_cast<uint64_t>(&iov); + // aio_nbytes is the number of iovecs. + cb.aio_nbytes = 1; + + // Should get error on submission. + ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EFAULT)); +} + +INSTANTIATE_TEST_CASE_P(BadIOVecs, AIOVectorizedParamTest, + ::testing::Values(IOCB_CMD_PREADV, IOCB_CMD_PWRITEV)); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/alarm.cc b/test/syscalls/linux/alarm.cc new file mode 100644 index 000000000..e0ddbb415 --- /dev/null +++ b/test/syscalls/linux/alarm.cc @@ -0,0 +1,193 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// N.B. Below, main blocks SIGALRM. Test cases must unblock it if they want +// delivery. + +void do_nothing_handler(int sig, siginfo_t* siginfo, void* arg) {} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and read. +TEST(AlarmTest, Interrupt_NoRandomSave) { + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + FileDescriptor read_fd(pipe_fds[0]); + FileDescriptor write_fd(pipe_fds[1]); + + // Use a signal handler that interrupts but does nothing rather than using the + // default terminate action. + struct sigaction sa; + sa.sa_sigaction = do_nothing_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well after read blocks below. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + + char buf; + ASSERT_THAT(read(read_fd.get(), &buf, 1), SyscallFailsWithErrno(EINTR)); +} + +/* Count of the number of SIGALARMS handled. */ +static volatile int alarms_received = 0; + +void inc_alarms_handler(int sig, siginfo_t* siginfo, void* arg) { + alarms_received++; +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and read. +TEST(AlarmTest, Restart_NoRandomSave) { + alarms_received = 0; + + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + FileDescriptor read_fd(pipe_fds[0]); + // Write end closed by thread below. + + struct sigaction sa; + sa.sa_sigaction = inc_alarms_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Spawn a thread to eventually unblock the read below. + ScopedThread t([pipe_fds] { + absl::SleepFor(absl::Seconds(30)); + EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); + }); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well after read blocks below, but + // before it returns. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + + // Read and eventually get an EOF from the writer closing. If SA_RESTART + // didn't work, then the alarm would not have fired and we wouldn't increment + // our alarms_received count in our signal handler, or we would have not + // restarted the syscall gracefully, which we expect below in order to be + // able to get the final EOF on the pipe. + char buf; + ASSERT_THAT(read(read_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_EQ(alarms_received, 1); + + t.Join(); +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and pause. +TEST(AlarmTest, SaSiginfo_NoRandomSave) { + // Use a signal handler that interrupts but does nothing rather than using the + // default terminate action. + struct sigaction sa; + sa.sa_sigaction = do_nothing_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well after pause blocks below. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + ASSERT_THAT(pause(), SyscallFailsWithErrno(EINTR)); +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and pause. +TEST(AlarmTest, SaInterrupt_NoRandomSave) { + // Use a signal handler that interrupts but does nothing rather than using the + // default terminate action. + struct sigaction sa; + sa.sa_sigaction = do_nothing_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_INTERRUPT; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well after pause blocks below. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + ASSERT_THAT(pause(), SyscallFailsWithErrno(EINTR)); +} + +TEST(AlarmTest, UserModeSpinning) { + alarms_received = 0; + + struct sigaction sa = {}; + sa.sa_sigaction = inc_alarms_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Actually allow SIGALRM delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + // Alarm in 20 second, which should be well into the loop below. + ASSERT_THAT(alarm(20), SyscallSucceeds()); + // Make sure that the signal gets delivered even if we are spinning in user + // mode when it arrives. + while (!alarms_received) { + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // These tests depend on delivering SIGALRM to the main thread. Block SIGALRM + // so that any other threads created by TestInit will also have SIGALRM + // blocked. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGALRM); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/arch_prctl.cc b/test/syscalls/linux/arch_prctl.cc new file mode 100644 index 000000000..5687ceb86 --- /dev/null +++ b/test/syscalls/linux/arch_prctl.cc @@ -0,0 +1,48 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <asm/prctl.h> +#include <sys/prctl.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +// glibc does not provide a prototype for arch_prctl() so declare it here. +extern "C" int arch_prctl(int code, uintptr_t addr); + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ArchPrctlTest, GetSetFS) { + uintptr_t orig; + const uintptr_t kNonCanonicalFsbase = 0x4141414142424242; + + // Get the original FS.base and then set it to the same value (this is + // intentional because FS.base is the TLS pointer so we cannot change it + // arbitrarily). + ASSERT_THAT(arch_prctl(ARCH_GET_FS, reinterpret_cast<uintptr_t>(&orig)), + SyscallSucceeds()); + ASSERT_THAT(arch_prctl(ARCH_SET_FS, orig), SyscallSucceeds()); + + // Trying to set FS.base to a non-canonical value should return an error. + ASSERT_THAT(arch_prctl(ARCH_SET_FS, kNonCanonicalFsbase), + SyscallFailsWithErrno(EPERM)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/bad.cc b/test/syscalls/linux/bad.cc new file mode 100644 index 000000000..a2634a8bf --- /dev/null +++ b/test/syscalls/linux/bad.cc @@ -0,0 +1,39 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(BadSyscallTest, NotImplemented) { + // get_kernel_syms is not supported in Linux > 2.6, and not implemented in + // gVisor. + EXPECT_THAT(syscall(SYS_get_kernel_syms), SyscallFailsWithErrno(ENOSYS)); +} + +TEST(BadSyscallTest, NegativeOne) { + EXPECT_THAT(syscall(-1), SyscallFailsWithErrno(ENOSYS)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/base_poll_test.cc b/test/syscalls/linux/base_poll_test.cc new file mode 100644 index 000000000..bba0108ea --- /dev/null +++ b/test/syscalls/linux/base_poll_test.cc @@ -0,0 +1,65 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/base_poll_test.h" + +#include <sys/syscall.h> +#include <sys/types.h> +#include <syscall.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +static volatile int timer_fired = 0; +static void SigAlarmHandler(int, siginfo_t*, void*) { timer_fired = 1; } + +BasePollTest::BasePollTest() { + // Register our SIGALRM handler, but save the original so we can restore in + // the destructor. + struct sigaction sa = {}; + sa.sa_sigaction = SigAlarmHandler; + sigfillset(&sa.sa_mask); + TEST_PCHECK(sigaction(SIGALRM, &sa, &original_alarm_sa_) == 0); +} + +BasePollTest::~BasePollTest() { + ClearTimer(); + TEST_PCHECK(sigaction(SIGALRM, &original_alarm_sa_, nullptr) == 0); +} + +void BasePollTest::SetTimer(absl::Duration duration) { + pid_t tgid = getpid(); + pid_t tid = gettid(); + ClearTimer(); + + // Create a new timer thread. + timer_ = absl::make_unique<TimerThread>(absl::Now() + duration, tgid, tid); +} + +bool BasePollTest::TimerFired() const { return timer_fired; } + +void BasePollTest::ClearTimer() { + timer_.reset(); + timer_fired = 0; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/base_poll_test.h b/test/syscalls/linux/base_poll_test.h new file mode 100644 index 000000000..9b9b81933 --- /dev/null +++ b/test/syscalls/linux/base_poll_test.h @@ -0,0 +1,101 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_ +#define GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_ + +#include <signal.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <syscall.h> +#include <time.h> +#include <unistd.h> + +#include <memory> + +#include "gtest/gtest.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +// TimerThread is a cancelable timer. +class TimerThread { + public: + TimerThread(absl::Time deadline, pid_t tgid, pid_t tid) + : thread_([=] { + mu_.Lock(); + mu_.AwaitWithDeadline(absl::Condition(&cancel_), deadline); + if (!cancel_) { + TEST_PCHECK(tgkill(tgid, tid, SIGALRM) == 0); + } + mu_.Unlock(); + }) {} + + ~TimerThread() { Cancel(); } + + void Cancel() { + absl::MutexLock ml(&mu_); + cancel_ = true; + } + + private: + mutable absl::Mutex mu_; + bool cancel_ GUARDED_BY(mu_) = false; + + // Must be last to ensure that the destructor for the thread is run before + // any other member of the object is destroyed. + ScopedThread thread_; +}; + +// Base test fixture for poll, select, ppoll, and pselect tests. +// +// This fixture makes use of SIGALRM. The handler is saved in SetUp() and +// restored in TearDown(). +class BasePollTest : public ::testing::Test { + protected: + BasePollTest(); + ~BasePollTest() override; + + // Sets a timer that will send a signal to the calling thread after + // `duration`. + void SetTimer(absl::Duration duration); + + // Returns true if the timer has fired. + bool TimerFired() const; + + // Stops the pending timer (if any) and clear the "fired" state. + void ClearTimer(); + + private: + // Thread that implements the timer. If the timer is stopped, timer_ is null. + // + // We have to use a thread for this purpose because tests using this fixture + // expect to be interrupted by the timer signal, but itimers/alarm(2) send + // thread-group-directed signals, which may be handled by any thread in the + // test process. + std::unique_ptr<TimerThread> timer_; + + // The original SIGALRM handler, to restore in destructor. + struct sigaction original_alarm_sa_; +}; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_ diff --git a/test/syscalls/linux/bind.cc b/test/syscalls/linux/bind.cc new file mode 100644 index 000000000..354e8e53c --- /dev/null +++ b/test/syscalls/linux/bind.cc @@ -0,0 +1,146 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(AllSocketPairTest, Bind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, BindTooLong) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + // first_addr is a sockaddr_storage being used as a sockaddr_un. Use the full + // length which is longer than expected for a Unix socket. + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sizeof(sockaddr_storage)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, DoubleBindSocket) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + EXPECT_THAT( + bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + // Linux 4.09 returns EINVAL here, but some time before 4.19 it switched + // to EADDRINUSE. + AnyOf(SyscallFailsWithErrno(EADDRINUSE), SyscallFailsWithErrno(EINVAL))); +} + +TEST_P(AllSocketPairTest, GetLocalAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + socklen_t addressLength = sockets->first_addr_size(); + struct sockaddr_storage address = {}; + ASSERT_THAT(getsockname(sockets->first_fd(), (struct sockaddr*)(&address), + &addressLength), + SyscallSucceeds()); + EXPECT_EQ( + 0, memcmp(&address, sockets->first_addr(), sockets->first_addr_size())); +} + +TEST_P(AllSocketPairTest, GetLocalAddrWithoutBind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + socklen_t addressLength = sockets->first_addr_size(); + struct sockaddr_storage received_address = {}; + ASSERT_THAT( + getsockname(sockets->first_fd(), (struct sockaddr*)(&received_address), + &addressLength), + SyscallSucceeds()); + struct sockaddr_storage want_address = {}; + want_address.ss_family = sockets->first_addr()->sa_family; + EXPECT_EQ(0, memcmp(&received_address, &want_address, addressLength)); +} + +TEST_P(AllSocketPairTest, GetRemoteAddressWithoutConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + socklen_t addressLength = sockets->first_addr_size(); + struct sockaddr_storage address = {}; + ASSERT_THAT(getpeername(sockets->second_fd(), (struct sockaddr*)(&address), + &addressLength), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(AllSocketPairTest, DoubleBindAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + EXPECT_THAT(bind(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(AllSocketPairTest, Unbind) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + + // Filesystem Unix sockets do not release their address when closed. + if (sockets->first_addr()->sa_data[0] != 0) { + ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EADDRINUSE)); + return; + } + + ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations( + List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations( + List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, List<int>{0, SOCK_CLOEXEC}))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/brk.cc b/test/syscalls/linux/brk.cc new file mode 100644 index 000000000..33d353959 --- /dev/null +++ b/test/syscalls/linux/brk.cc @@ -0,0 +1,31 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdint.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST(BrkTest, BrkSyscallReturnsOldBrkOnFailure) { + auto old_brk = sbrk(0); + EXPECT_THAT(syscall(SYS_brk, reinterpret_cast<void*>(-1)), + SyscallSucceedsWithValue(reinterpret_cast<uintptr_t>(old_brk))); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/chdir.cc b/test/syscalls/linux/chdir.cc new file mode 100644 index 000000000..4905ffb23 --- /dev/null +++ b/test/syscalls/linux/chdir.cc @@ -0,0 +1,69 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <linux/limits.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ChdirTest, Success) { + auto old_dir = GetAbsoluteTestTmpdir(); + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chdir(temp_dir.path().c_str()), SyscallSucceeds()); + // Temp path destructor deletes the newly created tmp dir and Sentry rejects + // saving when its current dir is still pointing to the path. Switch to a + // permanent path here. + EXPECT_THAT(chdir(old_dir.c_str()), SyscallSucceeds()); +} + +TEST(ChdirTest, PermissionDenied) { + // Drop capabilities that allow us to override directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */)); + EXPECT_THAT(chdir(temp_dir.path().c_str()), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChdirTest, NotDir) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + EXPECT_THAT(chdir(temp_file.path().c_str()), SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(ChdirTest, NameTooLong) { + std::string name(NAME_MAX + 1, 'a'); + ASSERT_THAT(chdir(name.c_str()), SyscallFailsWithErrno(ENAMETOOLONG)); +} + +TEST(ChdirTest, NotExist) { + EXPECT_THAT(chdir("/foo/bar"), SyscallFailsWithErrno(ENOENT)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/chmod.cc b/test/syscalls/linux/chmod.cc new file mode 100644 index 000000000..b7fc17946 --- /dev/null +++ b/test/syscalls/linux/chmod.cc @@ -0,0 +1,262 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/eventfd.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <string> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ChmodTest, ChmodFileSucceeds) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + ASSERT_THAT(chmod(file.path().c_str(), 0466), SyscallSucceeds()); + EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, ChmodDirSucceeds) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string fileInDir = NewTempAbsPathInDir(dir.path()); + + ASSERT_THAT(chmod(dir.path().c_str(), 0466), SyscallSucceeds()); + EXPECT_THAT(open(fileInDir.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodFileSucceeds_NoRandomSave) { + // Drop capabilities that allow us to file directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666)); + int fd; + ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds()); + + { + const DisableSave ds; // File permissions are reduced. + ASSERT_THAT(fchmod(fd, 0444), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + } + + EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodDirSucceeds_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + int fd; + ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + + { + const DisableSave ds; // File permissions are reduced. + ASSERT_THAT(fchmod(fd, 0), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + } + + EXPECT_THAT(open(dir.path().c_str(), O_RDONLY), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodBadF) { + ASSERT_THAT(fchmod(-1, 0444), SyscallFailsWithErrno(EBADF)); +} + +TEST(ChmodTest, FchmodatBadF) { + ASSERT_THAT(fchmodat(-1, "foo", 0444, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST(ChmodTest, FchmodatNotDir) { + ASSERT_THAT(fchmodat(-1, "", 0444, 0), SyscallFailsWithErrno(ENOENT)); +} + +TEST(ChmodTest, FchmodatFileAbsolutePath) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + ASSERT_THAT(fchmodat(-1, file.path().c_str(), 0444, 0), SyscallSucceeds()); + EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodatDirAbsolutePath) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + int fd; + ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_THAT(fchmodat(-1, dir.path().c_str(), 0, 0), SyscallSucceeds()); + EXPECT_THAT(open(dir.path().c_str(), O_RDONLY), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodatFile) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + int parent_fd; + ASSERT_THAT( + parent_fd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + + ASSERT_THAT( + fchmodat(parent_fd, std::string(Basename(temp_file.path())).c_str(), 0444, 0), + SyscallSucceeds()); + EXPECT_THAT(close(parent_fd), SyscallSucceeds()); + + EXPECT_THAT(open(temp_file.path().c_str(), O_RDWR), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodatDir) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + int parent_fd; + ASSERT_THAT( + parent_fd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + + int fd; + ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_THAT(fchmodat(parent_fd, std::string(Basename(dir.path())).c_str(), 0, 0), + SyscallSucceeds()); + EXPECT_THAT(close(parent_fd), SyscallSucceeds()); + + EXPECT_THAT(open(dir.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, ChmodDowngradeWritability_NoRandomSave) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666)); + + int fd; + ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds()); + + const DisableSave ds; // Permissions are dropped. + ASSERT_THAT(chmod(file.path().c_str(), 0444), SyscallSucceeds()); + EXPECT_THAT(write(fd, "hello", 5), SyscallSucceedsWithValue(5)); + + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(ChmodTest, ChmodFileToNoPermissionsSucceeds) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666)); + + ASSERT_THAT(chmod(file.path().c_str(), 0), SyscallSucceeds()); + + EXPECT_THAT(open(file.path().c_str(), O_RDONLY), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ChmodTest, FchmodDowngradeWritability_NoRandomSave) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + int fd; + ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + + const DisableSave ds; // Permissions are dropped. + ASSERT_THAT(fchmod(fd, 0444), SyscallSucceeds()); + EXPECT_THAT(write(fd, "hello", 5), SyscallSucceedsWithValue(5)); + + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(ChmodTest, FchmodFileToNoPermissionsSucceeds_NoRandomSave) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666)); + + int fd; + ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds()); + + { + const DisableSave ds; // Permissions are dropped. + ASSERT_THAT(fchmod(fd, 0), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + } + + EXPECT_THAT(open(file.path().c_str(), O_RDONLY), + SyscallFailsWithErrno(EACCES)); +} + +// Verify that we can get a RW FD after chmod, even if a RO fd is left open. +TEST(ChmodTest, ChmodWritableWithOpenFD) { + // FIXME: broken on hostfs. + if (IsRunningOnGvisor()) { + return; + } + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0444)); + + FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + ASSERT_THAT(fchmod(fd1.get(), 0644), SyscallSucceeds()); + + // This FD is writable, even though fd1 has a read-only reference to the file. + FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + // fd1 is not writable, but fd2 is. + char c = 'a'; + EXPECT_THAT(WriteFd(fd1.get(), &c, 1), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(WriteFd(fd2.get(), &c, 1), SyscallSucceedsWithValue(1)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/chown.cc b/test/syscalls/linux/chown.cc new file mode 100644 index 000000000..aa1df05b1 --- /dev/null +++ b/test/syscalls/linux/chown.cc @@ -0,0 +1,200 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <grp.h> +#include <sys/types.h> +#include <unistd.h> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/synchronization/notification.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +DEFINE_int32(scratch_uid1, 65534, "first scratch UID"); +DEFINE_int32(scratch_uid2, 65533, "second scratch UID"); +DEFINE_int32(scratch_gid, 65534, "first scratch GID"); + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ChownTest, FchownBadF) { + ASSERT_THAT(fchown(-1, 0, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST(ChownTest, FchownatBadF) { + ASSERT_THAT(fchownat(-1, "fff", 0, 0, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST(ChownTest, FchownatEmptyPath) { + const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY | O_RDONLY)); + ASSERT_THAT(fchownat(fd.get(), "", 0, 0, 0), SyscallFailsWithErrno(ENOENT)); +} + +using Chown = + std::function<PosixError(const std::string&, uid_t owner, gid_t group)>; + +class ChownParamTest : public ::testing::TestWithParam<Chown> {}; + +TEST_P(ChownParamTest, ChownFileSucceeds) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_CHOWN))) { + ASSERT_NO_ERRNO(SetCapability(CAP_CHOWN, false)); + } + + const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // At least *try* setting to a group other than the EGID. + gid_t gid; + EXPECT_THAT(gid = getegid(), SyscallSucceeds()); + int num_groups; + EXPECT_THAT(num_groups = getgroups(0, nullptr), SyscallSucceeds()); + if (num_groups > 0) { + std::vector<gid_t> list(num_groups); + EXPECT_THAT(getgroups(list.size(), list.data()), SyscallSucceeds()); + gid = list[0]; + } + + EXPECT_NO_ERRNO(GetParam()(file.path(), geteuid(), gid)); + + struct stat s = {}; + ASSERT_THAT(stat(file.path().c_str(), &s), SyscallSucceeds()); + EXPECT_EQ(s.st_uid, geteuid()); + EXPECT_EQ(s.st_gid, gid); +} + +TEST_P(ChownParamTest, ChownFilePermissionDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0777)); + + // Drop privileges and change IDs only in child thread, or else this parent + // thread won't be able to open some log files after the test ends. + ScopedThread([&] { + // Drop privileges. + if (HaveCapability(CAP_CHOWN).ValueOrDie()) { + EXPECT_NO_ERRNO(SetCapability(CAP_CHOWN, false)); + } + + // Change EUID and EGID. + // + // See note about POSIX below. + EXPECT_THAT(syscall(SYS_setresgid, -1, FLAGS_scratch_gid, -1), + SyscallSucceeds()); + EXPECT_THAT(syscall(SYS_setresuid, -1, FLAGS_scratch_uid1, -1), + SyscallSucceeds()); + + EXPECT_THAT(GetParam()(file.path(), geteuid(), getegid()), + PosixErrorIs(EPERM, ::testing::ContainsRegex("chown"))); + }); +} + +TEST_P(ChownParamTest, ChownFileSucceedsAsRoot) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_CHOWN)))); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_SETUID)))); + + const std::string filename = NewTempAbsPath(); + + absl::Notification fileCreated, fileChowned; + // Change UID only in child thread, or else this parent thread won't be able + // to open some log files after the test ends. + ScopedThread t([&] { + // POSIX requires that all threads in a process share the same UIDs, so + // the NPTL setresuid wrappers use signals to make all threads execute the + // setresuid syscall. However, we want this thread to have its own set of + // credentials different from the parent process, so we use the raw + // syscall. + EXPECT_THAT(syscall(SYS_setresuid, -1, FLAGS_scratch_uid2, -1), + SyscallSucceeds()); + + // Create file and immediately close it. + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0644)); + fd.reset(); // Close the fd. + + fileCreated.Notify(); + fileChowned.WaitForNotification(); + + EXPECT_THAT(open(filename.c_str(), O_RDWR), SyscallFailsWithErrno(EACCES)); + FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDONLY)); + }); + + fileCreated.WaitForNotification(); + + // Set file's owners to someone different. + EXPECT_NO_ERRNO(GetParam()(filename, FLAGS_scratch_uid1, FLAGS_scratch_gid)); + + struct stat s; + EXPECT_THAT(stat(filename.c_str(), &s), SyscallSucceeds()); + EXPECT_EQ(s.st_uid, FLAGS_scratch_uid1); + EXPECT_EQ(s.st_gid, FLAGS_scratch_gid); + + fileChowned.Notify(); +} + +PosixError errorFromReturn(const std::string& name, int ret) { + if (ret == -1) { + return PosixError(errno, absl::StrCat(name, " failed")); + } + return NoError(); +} + +INSTANTIATE_TEST_CASE_P( + ChownKinds, ChownParamTest, + ::testing::Values( + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + int rc = chown(path.c_str(), owner, group); + MaybeSave(); + return errorFromReturn("chown", rc); + }, + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + int rc = lchown(path.c_str(), owner, group); + MaybeSave(); + return errorFromReturn("lchown", rc); + }, + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, O_RDWR)); + int rc = fchown(fd.get(), owner, group); + MaybeSave(); + return errorFromReturn("fchown", rc); + }, + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, O_RDWR)); + int rc = fchownat(fd.get(), "", owner, group, AT_EMPTY_PATH); + MaybeSave(); + return errorFromReturn("fchownat-fd", rc); + }, + [](const std::string& path, uid_t owner, gid_t group) -> PosixError { + ASSIGN_OR_RETURN_ERRNO( + auto dirfd, Open(std::string(Dirname(path)), O_DIRECTORY | O_RDONLY)); + int rc = fchownat(dirfd.get(), std::string(Basename(path)).c_str(), owner, + group, 0); + MaybeSave(); + return errorFromReturn("fchownat-dirfd", rc); + })); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc new file mode 100644 index 000000000..f921f9025 --- /dev/null +++ b/test/syscalls/linux/chroot.cc @@ -0,0 +1,364 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <syscall.h> +#include <unistd.h> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/mount_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::HasSubstr; +using ::testing::Not; + +namespace gvisor { +namespace testing { + +namespace { + +TEST(ChrootTest, Success) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds()); +} + +TEST(ChrootTest, PermissionDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + // CAP_DAC_READ_SEARCH and CAP_DAC_OVERRIDE may override Execute permission on + // directories. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */)); + EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallFailsWithErrno(EACCES)); +} + +TEST(ChrootTest, NotDir) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + EXPECT_THAT(chroot(temp_file.path().c_str()), SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(ChrootTest, NotExist) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + EXPECT_THAT(chroot("/foo/bar"), SyscallFailsWithErrno(ENOENT)); +} + +TEST(ChrootTest, WithoutCapability) { + // Unset CAP_SYS_CHROOT. + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_CHROOT, false)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallFailsWithErrno(EPERM)); +} + +TEST(ChrootTest, CreatesNewRoot) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + // Grab the initial cwd. + char initial_cwd[1024]; + ASSERT_THAT(syscall(__NR_getcwd, initial_cwd, sizeof(initial_cwd)), + SyscallSucceeds()); + + auto new_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto file_in_new_root = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(new_root.path())); + + // chroot into new_root. + ASSERT_THAT(chroot(new_root.path().c_str()), SyscallSucceeds()); + + // getcwd should return "(unreachable)" followed by the initial_cwd. + char cwd[1024]; + ASSERT_THAT(syscall(__NR_getcwd, cwd, sizeof(cwd)), SyscallSucceeds()); + std::string expected_cwd = "(unreachable)"; + expected_cwd += initial_cwd; + EXPECT_STREQ(cwd, expected_cwd.c_str()); + + // Should not be able to stat file by its full path. + struct stat statbuf; + EXPECT_THAT(stat(file_in_new_root.path().c_str(), &statbuf), + SyscallFailsWithErrno(ENOENT)); + + // Should be able to stat file at new rooted path. + auto basename = std::string(Basename(file_in_new_root.path())); + auto rootedFile = "/" + basename; + ASSERT_THAT(stat(rootedFile.c_str(), &statbuf), SyscallSucceeds()); + + // Should be able to stat cwd at '.' even though it's outside root. + ASSERT_THAT(stat(".", &statbuf), SyscallSucceeds()); + + // chdir into new root. + ASSERT_THAT(chdir("/"), SyscallSucceeds()); + + // getcwd should return "/". + EXPECT_THAT(syscall(__NR_getcwd, cwd, sizeof(cwd)), SyscallSucceeds()); + EXPECT_STREQ(cwd, "/"); + + // Statting '.', '..', '/', and '/..' all return the same dev and inode. + struct stat statbuf_dot; + ASSERT_THAT(stat(".", &statbuf_dot), SyscallSucceeds()); + struct stat statbuf_dotdot; + ASSERT_THAT(stat("..", &statbuf_dotdot), SyscallSucceeds()); + EXPECT_EQ(statbuf_dot.st_dev, statbuf_dotdot.st_dev); + EXPECT_EQ(statbuf_dot.st_ino, statbuf_dotdot.st_ino); + struct stat statbuf_slash; + ASSERT_THAT(stat("/", &statbuf_slash), SyscallSucceeds()); + EXPECT_EQ(statbuf_dot.st_dev, statbuf_slash.st_dev); + EXPECT_EQ(statbuf_dot.st_ino, statbuf_slash.st_ino); + struct stat statbuf_slashdotdot; + ASSERT_THAT(stat("/..", &statbuf_slashdotdot), SyscallSucceeds()); + EXPECT_EQ(statbuf_dot.st_dev, statbuf_slashdotdot.st_dev); + EXPECT_EQ(statbuf_dot.st_ino, statbuf_slashdotdot.st_ino); +} + +TEST(ChrootTest, DotDotFromOpenFD) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + auto dir_outside_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(dir_outside_root.path(), O_RDONLY | O_DIRECTORY)); + auto new_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // chroot into new_root. + ASSERT_THAT(chroot(new_root.path().c_str()), SyscallSucceeds()); + + // openat on fd with path .. will succeed. + int other_fd; + ASSERT_THAT(other_fd = openat(fd.get(), "..", O_RDONLY), SyscallSucceeds()); + EXPECT_THAT(close(other_fd), SyscallSucceeds()); + + // getdents on fd should not error. + char buf[1024]; + ASSERT_THAT(syscall(SYS_getdents, fd.get(), buf, sizeof(buf)), + SyscallSucceeds()); +} + +// Test that link resolution in a chroot can escape the root by following an +// open proc fd. +TEST(ChrootTest, ProcFdLinkResolutionInChroot) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + const TempPath file_outside_chroot = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file_outside_chroot.path(), O_RDONLY)); + + const FileDescriptor proc_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open("/proc", O_DIRECTORY | O_RDONLY | O_CLOEXEC)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds()); + + // Opening relative to an already open fd to a node outside the chroot works. + const FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE( + OpenAt(proc_fd.get(), "self/fd", O_DIRECTORY | O_RDONLY | O_CLOEXEC)); + + // Proc fd symlinks can escape the chroot if the fd the symlink refers to + // refers to an object outside the chroot. + struct stat s = {}; + EXPECT_THAT( + fstatat(proc_self_fd.get(), absl::StrCat(fd.get()).c_str(), &s, 0), + SyscallSucceeds()); + + // Try to stat the stdin fd. Internally, this is handled differently from a + // proc fd entry pointing to a file, since stdin is backed by a host fd, and + // isn't a walkable path on the filesystem inside the sandbox. + EXPECT_THAT(fstatat(proc_self_fd.get(), "0", &s, 0), SyscallSucceeds()); +} + +// This test will verify that when you hold a fd to proc before entering +// a chroot that any files inside the chroot will appear rooted to the +// base chroot when examining /proc/self/fd/{num}. +TEST(ChrootTest, ProcMemSelfFdsNoEscapeProcOpen) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + // Get a FD to /proc before we enter the chroot. + const FileDescriptor proc = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY)); + + // Create and enter a chroot directory. + const auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds()); + + // Open a file inside the chroot at /foo. + const FileDescriptor foo = + ASSERT_NO_ERRNO_AND_VALUE(Open("/foo", O_CREAT | O_RDONLY, 0644)); + + // Examine /proc/self/fd/{foo_fd} to see if it exposes the fact that we're + // inside a chroot, the path should be /foo and NOT {chroot_dir}/foo. + const std::string fd_path = absl::StrCat("self/fd/", foo.get()); + char buf[1024] = {}; + size_t bytes_read = 0; + ASSERT_THAT(bytes_read = + readlinkat(proc.get(), fd_path.c_str(), buf, sizeof(buf) - 1), + SyscallSucceeds()); + + // The link should resolve to something. + ASSERT_GT(bytes_read, 0); + + // Assert that the link doesn't contain the chroot path and is only /foo. + EXPECT_STREQ(buf, "/foo"); +} + +// This test will verify that a file inside a chroot when mmapped will not +// expose the full file path via /proc/self/maps and instead honor the chroot. +TEST(ChrootTest, ProcMemSelfMapsNoEscapeProcOpen) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); + + // Get a FD to /proc before we enter the chroot. + const FileDescriptor proc = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY)); + + // Create and enter a chroot directory. + const auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds()); + + // Open a file inside the chroot at /foo. + const FileDescriptor foo = + ASSERT_NO_ERRNO_AND_VALUE(Open("/foo", O_CREAT | O_RDONLY, 0644)); + + // Mmap the newly created file. + void* foo_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + foo.get(), 0); + ASSERT_THAT(reinterpret_cast<int64_t>(foo_map), SyscallSucceeds()); + + // Always unmap. + auto cleanup_map = Cleanup( + [&] { EXPECT_THAT(munmap(foo_map, kPageSize), SyscallSucceeds()); }); + + // Examine /proc/self/maps to be sure that /foo doesn't appear to be + // mapped with the full chroot path. + const FileDescriptor maps = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), "self/maps", O_RDONLY)); + + size_t bytes_read = 0; + char buf[8 * 1024] = {}; + ASSERT_THAT(bytes_read = ReadFd(maps.get(), buf, sizeof(buf)), + SyscallSucceeds()); + + // The maps file should have something. + ASSERT_GT(bytes_read, 0); + + // Finally we want to make sure the maps don't contain the chroot path + ASSERT_EQ(std::string(buf, bytes_read).find(temp_dir.path()), std::string::npos); +} + +// Test that mounts outside the chroot will not appear in /proc/self/mounts or +// /proc/self/mountinfo. +TEST(ChrootTest, ProcMountsMountinfoNoEscape) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + // We are going to create some mounts and then chroot. In order to be able to + // unmount the mounts after the test run, we must chdir to the root and use + // relative paths for all mounts. That way, as long as we never chdir into + // the new root, we can access the mounts via relative paths and unmount them. + ASSERT_THAT(chdir("/"), SyscallSucceeds()); + + // Create nested tmpfs mounts. Note the use of relative paths in Mount calls. + auto const outer_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const outer_mount = ASSERT_NO_ERRNO_AND_VALUE(Mount( + "none", JoinPath(".", outer_dir.path()), "tmpfs", 0, "mode=0700", 0)); + + auto const inner_dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(outer_dir.path())); + auto const inner_mount = ASSERT_NO_ERRNO_AND_VALUE(Mount( + "none", JoinPath(".", inner_dir.path()), "tmpfs", 0, "mode=0700", 0)); + + // Filenames that will be checked for mounts, all relative to /proc dir. + std::string paths[3] = {"mounts", "self/mounts", "self/mountinfo"}; + + for (const std::string& path : paths) { + // We should have both inner and outer mounts. + const std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContents(JoinPath("/proc", path))); + EXPECT_THAT(contents, AllOf(HasSubstr(outer_dir.path()), + HasSubstr(inner_dir.path()))); + // We better have at least two mounts: the mounts we created plus the root. + std::vector<absl::string_view> submounts = + absl::StrSplit(contents, '\n', absl::SkipWhitespace()); + EXPECT_GT(submounts.size(), 2); + } + + // Get a FD to /proc before we enter the chroot. + const FileDescriptor proc = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY)); + + // Chroot to outer mount. + ASSERT_THAT(chroot(outer_dir.path().c_str()), SyscallSucceeds()); + + for (const std::string& path : paths) { + const FileDescriptor proc_file = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), path, O_RDONLY)); + + // Only two mounts visible from this chroot: the inner and outer. Both + // paths should be relative to the new chroot. + const std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContentsFD(proc_file.get())); + EXPECT_THAT(contents, + AllOf(HasSubstr(absl::StrCat(Basename(inner_dir.path()))), + Not(HasSubstr(outer_dir.path())), + Not(HasSubstr(inner_dir.path())))); + std::vector<absl::string_view> submounts = + absl::StrSplit(contents, '\n', absl::SkipWhitespace()); + EXPECT_EQ(submounts.size(), 2); + } + + // Chroot to inner mount. We must use an absolute path accessible to our + // chroot. + const std::string inner_dir_basename = + absl::StrCat("/", Basename(inner_dir.path())); + ASSERT_THAT(chroot(inner_dir_basename.c_str()), SyscallSucceeds()); + + for (const std::string& path : paths) { + const FileDescriptor proc_file = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), path, O_RDONLY)); + const std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContentsFD(proc_file.get())); + + // Only the inner mount visible from this chroot. + std::vector<absl::string_view> submounts = + absl::StrSplit(contents, '\n', absl::SkipWhitespace()); + EXPECT_EQ(submounts.size(), 1); + } + + // Chroot back to ".". + ASSERT_THAT(chroot("."), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/clock_getres.cc b/test/syscalls/linux/clock_getres.cc new file mode 100644 index 000000000..8f8842299 --- /dev/null +++ b/test/syscalls/linux/clock_getres.cc @@ -0,0 +1,37 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/time.h> +#include <time.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// clock_getres works regardless of whether or not a timespec is passed. +TEST(ClockGetres, Timespec) { + struct timespec ts; + EXPECT_THAT(clock_getres(CLOCK_MONOTONIC, &ts), SyscallSucceeds()); + EXPECT_THAT(clock_getres(CLOCK_MONOTONIC, nullptr), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/clock_gettime.cc b/test/syscalls/linux/clock_gettime.cc new file mode 100644 index 000000000..5003928be --- /dev/null +++ b/test/syscalls/linux/clock_gettime.cc @@ -0,0 +1,156 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <pthread.h> +#include <sys/time.h> +#include <cerrno> +#include <cstdint> +#include <ctime> +#include <list> +#include <memory> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +int64_t clock_gettime_nsecs(clockid_t id) { + struct timespec ts; + TEST_PCHECK(clock_gettime(id, &ts) == 0); + return (ts.tv_sec * 1000000000 + ts.tv_nsec); +} + +// Spin on the CPU for at least ns nanoseconds, based on +// CLOCK_THREAD_CPUTIME_ID. +void spin_ns(int64_t ns) { + int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID); + int64_t end = start + ns; + + do { + constexpr int kLoopCount = 1000000; // large and arbitrary + // volatile to prevent the compiler from skipping this loop. + for (volatile int i = 0; i < kLoopCount; i++) { + } + } while (clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID) < end); +} + +// Test that CLOCK_PROCESS_CPUTIME_ID is a superset of CLOCK_THREAD_CPUTIME_ID. +TEST(ClockGettime, CputimeId) { + constexpr int kNumThreads = 13; // arbitrary + + absl::Duration spin_time = absl::Seconds(1); + + // Start off the worker threads and compute the aggregate time spent by + // the workers. Note that we test CLOCK_PROCESS_CPUTIME_ID by having the + // workers execute in parallel and verifying that CLOCK_PROCESS_CPUTIME_ID + // accumulates the runtime of all threads. + int64_t start = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID); + + // Create a kNumThreads threads. + std::list<ScopedThread> threads; + for (int i = 0; i < kNumThreads; i++) { + threads.emplace_back( + [spin_time] { spin_ns(absl::ToInt64Nanoseconds(spin_time)); }); + } + for (auto& t : threads) { + t.Join(); + } + + int64_t end = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID); + + // The aggregate time spent in the worker threads must be at least + // 'kNumThreads' times the time each thread spun. + ASSERT_GE(end - start, kNumThreads * absl::ToInt64Nanoseconds(spin_time)); +} + +TEST(ClockGettime, JavaThreadTime) { + clockid_t clockid; + ASSERT_EQ(0, pthread_getcpuclockid(pthread_self(), &clockid)); + struct timespec tp; + ASSERT_THAT(clock_getres(clockid, &tp), SyscallSucceeds()); + ASSERT_THAT(clock_gettime(clockid, &tp), SyscallSucceeds()); + EXPECT_TRUE(tp.tv_sec > 0 || tp.tv_nsec > 0); +} + +// There is not much to test here, since CLOCK_REALTIME may be discontiguous. +TEST(ClockGettime, RealtimeWorks) { + struct timespec tp; + EXPECT_THAT(clock_gettime(CLOCK_REALTIME, &tp), SyscallSucceeds()); +} + +class MonotonicClockTest : public ::testing::TestWithParam<clockid_t> {}; + +TEST_P(MonotonicClockTest, IsMonotonic) { + auto end = absl::Now() + absl::Seconds(5); + + struct timespec tp; + EXPECT_THAT(clock_gettime(GetParam(), &tp), SyscallSucceeds()); + + auto prev = absl::TimeFromTimespec(tp); + while (absl::Now() < end) { + EXPECT_THAT(clock_gettime(GetParam(), &tp), SyscallSucceeds()); + auto now = absl::TimeFromTimespec(tp); + EXPECT_GE(now, prev); + prev = now; + } +} + +std::string PrintClockId(::testing::TestParamInfo<clockid_t> info) { + switch (info.param) { + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + default: + return absl::StrCat(info.param); + } +} + +INSTANTIATE_TEST_CASE_P(ClockGettime, MonotonicClockTest, + ::testing::Values(CLOCK_MONOTONIC, + CLOCK_MONOTONIC_COARSE, + CLOCK_MONOTONIC_RAW), + PrintClockId); + +TEST(ClockGettime, UnimplementedReturnsEINVAL) { + SKIP_IF(!IsRunningOnGvisor()); + + struct timespec tp; + EXPECT_THAT(clock_gettime(CLOCK_BOOTTIME, &tp), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(clock_gettime(CLOCK_REALTIME_ALARM, &tp), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(clock_gettime(CLOCK_BOOTTIME_ALARM, &tp), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(ClockGettime, InvalidClockIDReturnsEINVAL) { + struct timespec tp; + EXPECT_THAT(clock_gettime(-1, &tp), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/clock_nanosleep.cc b/test/syscalls/linux/clock_nanosleep.cc new file mode 100644 index 000000000..96bb961b4 --- /dev/null +++ b/test/syscalls/linux/clock_nanosleep.cc @@ -0,0 +1,153 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <time.h> + +#include <atomic> +#include <utility> + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/util/cleanup.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// sys_clock_nanosleep is defined because the glibc clock_nanosleep returns +// error numbers directly and does not set errno. This makes our Syscall +// matchers look a little weird when expecting failure: +// "SyscallSucceedsWithValue(ERRNO)". +int sys_clock_nanosleep(clockid_t clkid, int flags, + const struct timespec* request, + struct timespec* remain) { + return syscall(SYS_clock_nanosleep, clkid, flags, request, remain); +} + +PosixErrorOr<absl::Time> GetTime(clockid_t clk) { + struct timespec ts = {}; + int rc = clock_gettime(clk, &ts); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "clock_gettime"); + } + return absl::TimeFromTimespec(ts); +} + +class WallClockNanosleepTest : public ::testing::TestWithParam<clockid_t> {}; + +TEST_P(WallClockNanosleepTest, InvalidValues) { + const struct timespec invalid[] = { + {.tv_sec = -1, .tv_nsec = -1}, {.tv_sec = 0, .tv_nsec = INT32_MIN}, + {.tv_sec = 0, .tv_nsec = INT32_MAX}, {.tv_sec = 0, .tv_nsec = -1}, + {.tv_sec = -1, .tv_nsec = 0}, + }; + + for (auto const ts : invalid) { + EXPECT_THAT(sys_clock_nanosleep(GetParam(), 0, &ts, nullptr), + SyscallFailsWithErrno(EINVAL)); + } +} + +TEST_P(WallClockNanosleepTest, SleepOneSecond) { + absl::Duration const duration = absl::Seconds(1); + struct timespec dur = absl::ToTimespec(duration); + + absl::Time const before = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + EXPECT_THAT(RetryEINTR(sys_clock_nanosleep)(GetParam(), 0, &dur, &dur), + SyscallSucceeds()); + absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + + EXPECT_GE(after - before, duration); +} + +TEST_P(WallClockNanosleepTest, InterruptedNanosleep) { + absl::Duration const duration = absl::Seconds(60); + struct timespec dur = absl::ToTimespec(duration); + + // Install no-op signal handler for SIGALRM. + struct sigaction sa = {}; + sigfillset(&sa.sa_mask); + sa.sa_handler = +[](int signo) {}; + auto const cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Measure time since setting the alarm, since the alarm will interrupt the + // sleep and hence determine how long we sleep. + absl::Time const before = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + + // Set an alarm to go off while sleeping. + struct itimerval timer = {}; + timer.it_value.tv_sec = 1; + timer.it_value.tv_usec = 0; + timer.it_interval.tv_sec = 1; + timer.it_interval.tv_usec = 0; + auto const cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, timer)); + + EXPECT_THAT(sys_clock_nanosleep(GetParam(), 0, &dur, &dur), + SyscallFailsWithErrno(EINTR)); + absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + + absl::Duration const remaining = absl::DurationFromTimespec(dur); + EXPECT_GE(after - before + remaining, duration); +} + +TEST_P(WallClockNanosleepTest, SleepUntil) { + absl::Time const now = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + absl::Time const until = now + absl::Seconds(2); + struct timespec ts = absl::ToTimespec(until); + + EXPECT_THAT( + RetryEINTR(sys_clock_nanosleep)(GetParam(), TIMER_ABSTIME, &ts, nullptr), + SyscallSucceeds()); + absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam())); + + EXPECT_GE(after, until); +} + +INSTANTIATE_TEST_CASE_P(Sleepers, WallClockNanosleepTest, + ::testing::Values(CLOCK_REALTIME, CLOCK_MONOTONIC)); + +TEST(ClockNanosleepProcessTest, SleepFiveSeconds) { + absl::Duration const kDuration = absl::Seconds(5); + struct timespec dur = absl::ToTimespec(kDuration); + + // Ensure that CLOCK_PROCESS_CPUTIME_ID advances. + std::atomic<bool> done(false); + ScopedThread t([&] { + while (!done.load()) { + } + }); + auto const cleanup_done = Cleanup([&] { done.store(true); }); + + absl::Time const before = + ASSERT_NO_ERRNO_AND_VALUE(GetTime(CLOCK_PROCESS_CPUTIME_ID)); + EXPECT_THAT( + RetryEINTR(sys_clock_nanosleep)(CLOCK_PROCESS_CPUTIME_ID, 0, &dur, &dur), + SyscallSucceeds()); + absl::Time const after = + ASSERT_NO_ERRNO_AND_VALUE(GetTime(CLOCK_PROCESS_CPUTIME_ID)); + EXPECT_GE(after - before, kDuration); +} +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/concurrency.cc b/test/syscalls/linux/concurrency.cc new file mode 100644 index 000000000..2c13b315c --- /dev/null +++ b/test/syscalls/linux/concurrency.cc @@ -0,0 +1,124 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <atomic> + +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +// Test that a thread that never yields to the OS does not prevent other threads +// from running. +TEST(ConcurrencyTest, SingleProcessMultithreaded) { + std::atomic<int> a(0); + + ScopedThread t([&a]() { + while (!a.load()) { + } + }); + + absl::SleepFor(absl::Seconds(1)); + + // We are still able to execute code in this thread. The other hasn't + // permanently hung execution in both threads. + a.store(1); +} + +// Test that multiple threads in this process continue to execute in parallel, +// even if an unrelated second process is spawned. +TEST(ConcurrencyTest, MultiProcessMultithreaded) { + // In PID 1, start TIDs 1 and 2, and put both to sleep. + // + // Start PID 3, which spins for 5 seconds, then exits. + // + // TIDs 1 and 2 wake and attempt to Activate, which cannot occur until PID 3 + // exits. + // + // Both TIDs 1 and 2 should be woken. If they are not both woken, the test + // hangs. + // + // This is all fundamentally racy. If we are failing to wake all threads, the + // expectation is that this test becomes flaky, rather than consistently + // failing. + // + // If additional background threads fail to block, we may never schedule the + // child, at which point this test effectively becomes + // MultiProcessConcurrency. That's not expected to occur. + + std::atomic<int> a(0); + ScopedThread t([&a]() { + // Block so that PID 3 can execute and we can wait on its exit. + absl::SleepFor(absl::Seconds(1)); + while (!a.load()) { + } + }); + + pid_t child_pid; + ASSERT_THAT(child_pid = fork(), SyscallSucceeds()); + if (child_pid == 0) { + // Busy wait without making any blocking syscalls. + auto end = absl::Now() + absl::Seconds(5); + while (absl::Now() < end) { + } + _exit(0); + } + + absl::SleepFor(absl::Seconds(1)); + + // If only TID 1 is woken, thread.Join will hang. + // If only TID 2 is woken, both will hang. + a.store(1); + t.Join(); + + int status = 0; + EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(WEXITSTATUS(status), 0); +} + +// Test that multiple processes can execute concurrently, even if one process +// never yields. +TEST(ConcurrencyTest, MultiProcessConcurrency) { + + pid_t child_pid; + ASSERT_THAT(child_pid = fork(), SyscallSucceeds()); + if (child_pid == 0) { + while (true) { + } + __builtin_unreachable(); + } + + absl::SleepFor(absl::Seconds(5)); + + // We are still able to execute code in this process. The other hasn't + // permanently hung execution in both processes. + ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + int status = 0; + + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(WTERMSIG(status), SIGKILL); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/creat.cc b/test/syscalls/linux/creat.cc new file mode 100644 index 000000000..72a016b4c --- /dev/null +++ b/test/syscalls/linux/creat.cc @@ -0,0 +1,57 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <string> + +#include "gtest/gtest.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr int kMode = 0666; + +TEST(CreatTest, CreatCreatesNewFile) { + std::string const path = NewTempAbsPath(); + struct stat buf; + int fd; + ASSERT_THAT(stat(path.c_str(), &buf), SyscallFailsWithErrno(ENOENT)); + ASSERT_THAT(fd = creat(path.c_str(), kMode), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(stat(path.c_str(), &buf), SyscallSucceeds()); +} + +TEST(CreatTest, CreatTruncatesExistingFile) { + auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + int fd; + ASSERT_NO_ERRNO(SetContents(temp_path.path(), "non-empty")); + ASSERT_THAT(fd = creat(temp_path.path().c_str(), kMode), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + std::string new_contents; + ASSERT_NO_ERRNO(GetContents(temp_path.path(), &new_contents)); + EXPECT_EQ("", new_contents); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc new file mode 100644 index 000000000..a140d3b30 --- /dev/null +++ b/test/syscalls/linux/dev.cc @@ -0,0 +1,149 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <unistd.h> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(DevTest, LseekDevUrandom) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/urandom", O_RDONLY)); + EXPECT_THAT(lseek(fd.get(), -10, SEEK_CUR), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), -10, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); +} + +TEST(DevTest, LseekDevNull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + EXPECT_THAT(lseek(fd.get(), -10, SEEK_CUR), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), -10, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(DevTest, LseekDevZero) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(DevTest, LseekDevFull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_RDONLY)); + EXPECT_THAT(lseek(fd.get(), 123, SEEK_SET), SyscallSucceedsWithValue(0)); + EXPECT_THAT(lseek(fd.get(), 123, SEEK_CUR), SyscallSucceedsWithValue(0)); + EXPECT_THAT(lseek(fd.get(), 123, SEEK_END), SyscallSucceedsWithValue(0)); +} + +TEST(DevTest, LseekDevNullFreshFile) { + // Seeks to /dev/null always return 0. + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + EXPECT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceedsWithValue(0)); + EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + const FileDescriptor fd3 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + EXPECT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); +} + +TEST(DevTest, OpenTruncate) { + // Truncation is ignored on linux and gvisor for device files. + ASSERT_NO_ERRNO_AND_VALUE( + Open("/dev/null", O_CREAT | O_TRUNC | O_WRONLY, 0644)); + ASSERT_NO_ERRNO_AND_VALUE( + Open("/dev/zero", O_CREAT | O_TRUNC | O_WRONLY, 0644)); + ASSERT_NO_ERRNO_AND_VALUE( + Open("/dev/full", O_CREAT | O_TRUNC | O_WRONLY, 0644)); +} + +TEST(DevTest, Pread64DevNull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + char buf[1]; + EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(0)); +} + +TEST(DevTest, Pread64DevZero) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + char buf[1]; + EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(1)); +} + +TEST(DevTest, Pread64DevFull) { + // /dev/full behaves like /dev/zero with respect to reads. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_RDONLY)); + char buf[1]; + EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(1)); +} + +TEST(DevTest, ReadDevNull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY)); + std::vector<char> buf(1); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), 1), SyscallSucceeds()); +} + +// Do not allow random save as it could lead to partial reads. +TEST(DevTest, ReadDevZero_NoRandomSave) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + + constexpr int kReadSize = 128 * 1024; + std::vector<char> buf(kReadSize, 1); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), kReadSize), + SyscallSucceedsWithValue(kReadSize)); + EXPECT_EQ(std::vector<char>(kReadSize, 0), buf); +} + +TEST(DevTest, WriteDevNull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_WRONLY)); + EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallSucceedsWithValue(1)); +} + +TEST(DevTest, WriteDevZero) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY)); + EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallSucceedsWithValue(1)); +} + +TEST(DevTest, WriteDevFull) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_WRONLY)); + EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallFailsWithErrno(ENOSPC)); +} + +} // namespace +} // namespace testing + +} // namespace gvisor diff --git a/test/syscalls/linux/dup.cc b/test/syscalls/linux/dup.cc new file mode 100644 index 000000000..fc11844fb --- /dev/null +++ b/test/syscalls/linux/dup.cc @@ -0,0 +1,139 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/eventfd.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<FileDescriptor> Dup2(const FileDescriptor& fd, int target_fd) { + int new_fd = dup2(fd.get(), target_fd); + if (new_fd < 0) { + return PosixError(errno, "Dup2"); + } + return FileDescriptor(new_fd); +} + +PosixErrorOr<FileDescriptor> Dup3(const FileDescriptor& fd, int target_fd, + int flags) { + int new_fd = dup3(fd.get(), target_fd, flags); + if (new_fd < 0) { + return PosixError(errno, "Dup2"); + } + return FileDescriptor(new_fd); +} + +void CheckSameFile(const FileDescriptor& fd1, const FileDescriptor& fd2) { + struct stat stat_result1, stat_result2; + ASSERT_THAT(fstat(fd1.get(), &stat_result1), SyscallSucceeds()); + ASSERT_THAT(fstat(fd2.get(), &stat_result2), SyscallSucceeds()); + EXPECT_EQ(stat_result1.st_dev, stat_result2.st_dev); + EXPECT_EQ(stat_result1.st_ino, stat_result2.st_ino); +} + +TEST(DupTest, Dup) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Dup the descriptor and make sure it's the same file. + FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup()); + ASSERT_NE(fd.get(), nfd.get()); + CheckSameFile(fd, nfd); +} + +TEST(DupTest, DupClearsCloExec) { + FileDescriptor nfd; + + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag set. + int event_fd = 0; + ASSERT_THAT(event_fd = eventfd(0, EFD_CLOEXEC), SyscallSucceeds()); + FileDescriptor event_fd_closer(event_fd); + + EXPECT_THAT(fcntl(event_fd_closer.get(), F_GETFD), + SyscallSucceedsWithValue(FD_CLOEXEC)); + + // Duplicate the descriptor. Ensure that it doesn't have FD_CLOEXEC set. + nfd = ASSERT_NO_ERRNO_AND_VALUE(event_fd_closer.Dup()); + ASSERT_NE(event_fd_closer.get(), nfd.get()); + CheckSameFile(event_fd_closer, nfd); + EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(0)); +} + +TEST(DupTest, Dup2) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Regular dup once. + FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup()); + + ASSERT_NE(fd.get(), nfd.get()); + CheckSameFile(fd, nfd); + + // Dup over the file above. + int target_fd = nfd.release(); + FileDescriptor nfd2 = ASSERT_NO_ERRNO_AND_VALUE(Dup2(fd, target_fd)); + EXPECT_EQ(target_fd, nfd2.get()); + CheckSameFile(fd, nfd2); +} + +TEST(DupTest, Dup2SameFD) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Should succeed. + ASSERT_THAT(dup2(fd.get(), fd.get()), SyscallSucceedsWithValue(fd.get())); +} + +TEST(DupTest, Dup3) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Regular dup once. + FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup()); + ASSERT_NE(fd.get(), nfd.get()); + CheckSameFile(fd, nfd); + + // Dup over the file above, check that it has no CLOEXEC. + nfd = ASSERT_NO_ERRNO_AND_VALUE(Dup3(fd, nfd.release(), 0)); + CheckSameFile(fd, nfd); + EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(0)); + + // Dup over the file again, check that it does not CLOEXEC. + nfd = ASSERT_NO_ERRNO_AND_VALUE(Dup3(fd, nfd.release(), O_CLOEXEC)); + CheckSameFile(fd, nfd); + EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); +} + +TEST(DupTest, Dup3FailsSameFD) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Only dup3 fails if the new and old fd are the same. + ASSERT_THAT(dup3(fd.get(), fd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc new file mode 100644 index 000000000..9ae87c00b --- /dev/null +++ b/test/syscalls/linux/epoll.cc @@ -0,0 +1,468 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <limits.h> +#include <pthread.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/eventfd.h> +#include <time.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr int kFDsPerEpoll = 3; +constexpr uint64_t kMagicConstant = 0x0102030405060708; + +// Returns a new epoll file descriptor. +PosixErrorOr<FileDescriptor> NewEpollFD() { + // "Since Linux 2.6.8, the size argument is ignored, but must be greater than + // zero." - epoll_create(2) + int fd = epoll_create(/* size = */ 1); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, "epoll_create"); + } + return FileDescriptor(fd); +} + +// Returns a new eventfd. +PosixErrorOr<FileDescriptor> NewEventFD() { + int fd = eventfd(/* initval = */ 0, /* flags = */ 0); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, "eventfd"); + } + return FileDescriptor(fd); +} + +// Registers `target_fd` with the epoll instance represented by `epoll_fd` for +// the epoll events `events`. Events on `target_fd` will be indicated by setting +// data.u64 to `data` in the returned epoll_event. +PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, + uint64_t data) { + struct epoll_event event; + event.events = events; + event.data.u64 = data; + int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "epoll_ctl"); + } + return NoError(); +} + +uint64_t ms_elapsed(const struct timespec* begin, const struct timespec* end) { + return (end->tv_sec - begin->tv_sec) * 1000 + + (end->tv_nsec - begin->tv_nsec) / 1000000; +} + +TEST(EpollTest, AllWritable) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), + EPOLLIN | EPOLLOUT, kMagicConstant + i)); + } + + struct epoll_event result[kFDsPerEpoll]; + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(kFDsPerEpoll)); + // TODO: Why do some tests check epoll_event::data, and others + // don't? Does Linux actually guarantee that, in any of these test cases, + // epoll_wait will necessarily write out the epoll_events in the order that + // they were registered? + for (int i = 0; i < kFDsPerEpoll; i++) { + ASSERT_EQ(result[i].events, EPOLLOUT); + } +} + +TEST(EpollTest, LastReadable) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), + EPOLLIN | EPOLLOUT, kMagicConstant + i)); + } + + uint64_t tmp = 1; + ASSERT_THAT(WriteFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + + struct epoll_event result[kFDsPerEpoll]; + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(kFDsPerEpoll)); + + int i; + for (i = 0; i < kFDsPerEpoll - 1; i++) { + EXPECT_EQ(result[i].events, EPOLLOUT); + } + EXPECT_EQ(result[i].events, EPOLLOUT | EPOLLIN); + EXPECT_EQ(result[i].data.u64, kMagicConstant + i); +} + +TEST(EpollTest, LastNonWritable) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), + EPOLLIN | EPOLLOUT, kMagicConstant + i)); + } + + // Write the maximum value to the event fd so that writing to it again would + // block. + uint64_t tmp = ULLONG_MAX - 1; + ASSERT_THAT(WriteFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + + struct epoll_event result[kFDsPerEpoll]; + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(kFDsPerEpoll)); + + int i; + for (i = 0; i < kFDsPerEpoll - 1; i++) { + EXPECT_EQ(result[i].events, EPOLLOUT); + } + EXPECT_EQ(result[i].events, EPOLLIN); + EXPECT_THAT(ReadFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)), + sizeof(tmp)); + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(kFDsPerEpoll)); + + for (i = 0; i < kFDsPerEpoll; i++) { + EXPECT_EQ(result[i].events, EPOLLOUT); + } +} + +TEST(EpollTest, Timeout_NoRandomSave) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, + kMagicConstant + i)); + } + + constexpr int kTimeoutMs = 200; + struct timespec begin; + struct timespec end; + struct epoll_event result[kFDsPerEpoll]; + + { + const DisableSave ds; // Timing-related. + EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds()); + + ASSERT_THAT( + RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, kTimeoutMs), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds()); + } + + // Check the lower bound on the timeout. Checking for an upper bound is + // fragile because Linux can overrun the timeout due to scheduling delays. + EXPECT_GT(ms_elapsed(&begin, &end), kTimeoutMs - 1); +} + +void* writer(void* arg) { + int fd = *reinterpret_cast<int*>(arg); + uint64_t tmp = 1; + + usleep(200000); + if (WriteFd(fd, &tmp, sizeof(tmp)) != sizeof(tmp)) { + fprintf(stderr, "writer failed: errno %s\n", strerror(errno)); + } + + return nullptr; +} + +TEST(EpollTest, WaitThenUnblock) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, + kMagicConstant + i)); + } + + // Fire off a thread that will make at least one of the event fds readable. + pthread_t thread; + int make_readable = eventfds[0].get(); + ASSERT_THAT(pthread_create(&thread, nullptr, writer, &make_readable), + SyscallSucceedsWithValue(0)); + + struct epoll_event result[kFDsPerEpoll]; + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_THAT(pthread_detach(thread), SyscallSucceeds()); +} + +void sighandler(int s) {} + +void* signaler(void* arg) { + pthread_t* t = reinterpret_cast<pthread_t*>(arg); + // Repeatedly send the real-time signal until we are detached, because it's + // difficult to know exactly when epoll_wait on another thread (which this + // is intending to interrupt) has started blocking. + while (1) { + usleep(200000); + pthread_kill(*t, SIGRTMIN); + } + return nullptr; +} + +TEST(EpollTest, UnblockWithSignal) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, + kMagicConstant + i)); + } + + signal(SIGRTMIN, sighandler); + // Unblock the real time signals that InitGoogle blocks :( + sigset_t unblock; + sigemptyset(&unblock); + sigaddset(&unblock, SIGRTMIN); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &unblock, nullptr), SyscallSucceeds()); + + pthread_t thread; + pthread_t cur = pthread_self(); + ASSERT_THAT(pthread_create(&thread, nullptr, signaler, &cur), + SyscallSucceedsWithValue(0)); + + struct epoll_event result[kFDsPerEpoll]; + EXPECT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallFailsWithErrno(EINTR)); + EXPECT_THAT(pthread_cancel(thread), SyscallSucceeds()); + EXPECT_THAT(pthread_detach(thread), SyscallSucceeds()); +} + +TEST(EpollTest, TimeoutNoFds) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + struct epoll_event result[kFDsPerEpoll]; + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); +} + +struct addr_ctx { + int epollfd; + int eventfd; +}; + +void* fd_adder(void* arg) { + struct addr_ctx* actx = reinterpret_cast<struct addr_ctx*>(arg); + struct epoll_event event; + event.events = EPOLLIN | EPOLLOUT; + event.data.u64 = 0xdeadbeeffacefeed; + + usleep(200000); + if (epoll_ctl(actx->epollfd, EPOLL_CTL_ADD, actx->eventfd, &event) == -1) { + fprintf(stderr, "epoll_ctl failed: %s\n", strerror(errno)); + } + + return nullptr; +} + +TEST(EpollTest, UnblockWithNewFD) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + + pthread_t thread; + struct addr_ctx actx = {epollfd.get(), eventfd.get()}; + ASSERT_THAT(pthread_create(&thread, nullptr, fd_adder, &actx), + SyscallSucceedsWithValue(0)); + + struct epoll_event result[kFDsPerEpoll]; + // Wait while no FDs are ready, but after 200ms fd_adder will add a ready FD + // to epoll which will wake us up. + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_THAT(pthread_detach(thread), SyscallSucceeds()); + EXPECT_EQ(result[0].data.u64, 0xdeadbeeffacefeed); +} + +TEST(EpollTest, Oneshot) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + std::vector<FileDescriptor> eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, + kMagicConstant + i)); + } + + struct epoll_event event; + event.events = EPOLLOUT | EPOLLONESHOT; + event.data.u64 = kMagicConstant; + ASSERT_THAT( + epoll_ctl(epollfd.get(), EPOLL_CTL_MOD, eventfds[0].get(), &event), + SyscallSucceeds()); + + struct epoll_event result[kFDsPerEpoll]; + // One-shot entry means that the first epoll_wait should succeed. + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // One-shot entry means that the second epoll_wait should timeout. + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); +} + +TEST(EpollTest, EdgeTriggered_NoRandomSave) { + // Test edge-triggered entry: make it edge-triggered, first wait should + // return it, second one should time out, make it writable again, third wait + // should return it, fourth wait should timeout. + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfd.get(), + EPOLLOUT | EPOLLET, kMagicConstant)); + + struct epoll_event result[kFDsPerEpoll]; + + { + const DisableSave ds; // May trigger spurious event. + + // Edge-triggered entry means that the first epoll_wait should return the + // event. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // Edge-triggered entry means that the second epoll_wait should time out. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); + } + + uint64_t tmp = ULLONG_MAX - 1; + + // Make an fd non-writable. + ASSERT_THAT(WriteFd(eventfd.get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + + // Make the same fd non-writable to trigger a change, which will trigger an + // edge-triggered event. + ASSERT_THAT(ReadFd(eventfd.get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + + { + const DisableSave ds; // May trigger spurious event. + + // An edge-triggered event should now be returned. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // The edge-triggered event had been consumed above, we don't expect to + // get it again. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); + } +} + +TEST(EpollTest, OneshotAndEdgeTriggered) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfd.get(), + EPOLLOUT | EPOLLET | EPOLLONESHOT, + kMagicConstant)); + + struct epoll_event result[kFDsPerEpoll]; + // First time one shot edge-triggered entry means that epoll_wait should + // return the event. + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // Edge-triggered entry means that the second epoll_wait should time out. + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); + + uint64_t tmp = ULLONG_MAX - 1; + // Make an fd non-writable. + ASSERT_THAT(WriteFd(eventfd.get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + // Make the same fd non-writable to trigger a change, which will not trigger + // an edge-triggered event because we've also included EPOLLONESHOT. + ASSERT_THAT(ReadFd(eventfd.get(), &tmp, sizeof(tmp)), + SyscallSucceedsWithValue(sizeof(tmp))); + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); +} + +TEST(EpollTest, CycleOfOneDisallowed) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + + struct epoll_event event; + event.events = EPOLLOUT; + event.data.u64 = kMagicConstant; + + ASSERT_THAT(epoll_ctl(epollfd.get(), EPOLL_CTL_ADD, epollfd.get(), &event), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(EpollTest, CycleOfThreeDisallowed) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto epollfd1 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto epollfd2 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + + ASSERT_NO_ERRNO( + RegisterEpollFD(epollfd.get(), epollfd1.get(), EPOLLIN, kMagicConstant)); + ASSERT_NO_ERRNO( + RegisterEpollFD(epollfd1.get(), epollfd2.get(), EPOLLIN, kMagicConstant)); + + struct epoll_event event; + event.events = EPOLLIN; + event.data.u64 = kMagicConstant; + EXPECT_THAT(epoll_ctl(epollfd2.get(), EPOLL_CTL_ADD, epollfd.get(), &event), + SyscallFailsWithErrno(ELOOP)); +} + +TEST(EpollTest, CloseFile) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); + ASSERT_NO_ERRNO( + RegisterEpollFD(epollfd.get(), eventfd.get(), EPOLLOUT, kMagicConstant)); + + struct epoll_event result[kFDsPerEpoll]; + ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].data.u64, kMagicConstant); + + // Close the event fd early. + eventfd.reset(); + + EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100), + SyscallSucceedsWithValue(0)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc new file mode 100644 index 000000000..ffcd20622 --- /dev/null +++ b/test/syscalls/linux/eventfd.cc @@ -0,0 +1,189 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/eventfd.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(EventfdTest, Nonblock) { + int efd; + ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE), + SyscallSucceeds()); + + uint64_t l; + ASSERT_THAT(read(efd, &l, sizeof(l)), SyscallFailsWithErrno(EAGAIN)); + + l = 1; + ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); + + l = 0; + ASSERT_THAT(read(efd, &l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l, 1); + + ASSERT_THAT(read(efd, &l, sizeof(l)), SyscallFailsWithErrno(EAGAIN)); +} + +void* read_three_times(void* arg) { + int efd = *reinterpret_cast<int*>(arg); + uint64_t l; + read(efd, &l, sizeof(l)); + read(efd, &l, sizeof(l)); + read(efd, &l, sizeof(l)); + return nullptr; +} + +TEST(EventfdTest, BlockingWrite) { + int efd; + ASSERT_THAT(efd = eventfd(0, EFD_SEMAPHORE), SyscallSucceeds()); + + pthread_t p; + ASSERT_THAT(pthread_create(&p, nullptr, read_three_times, + reinterpret_cast<void*>(&efd)), + SyscallSucceeds()); + + uint64_t l = 1; + ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l, 1); + + ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l, 1); + + ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l, 1); + + ASSERT_THAT(pthread_join(p, nullptr), SyscallSucceeds()); +} + +TEST(EventfdTest, SmallWrite) { + int efd; + ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE), + SyscallSucceeds()); + + uint64_t l = 16; + ASSERT_THAT(write(efd, &l, 4), SyscallFailsWithErrno(EINVAL)); +} + +TEST(EventfdTest, SmallRead) { + int efd; + ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE), + SyscallSucceeds()); + + uint64_t l = 1; + ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); + + l = 0; + ASSERT_THAT(read(efd, &l, 4), SyscallFailsWithErrno(EINVAL)); +} + +TEST(EventfdTest, BigWrite) { + int efd; + ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE), + SyscallSucceeds()); + + uint64_t big[16]; + big[0] = 16; + ASSERT_THAT(write(efd, big, sizeof(big)), SyscallSucceeds()); +} + +TEST(EventfdTest, BigRead) { + int efd; + ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE), + SyscallSucceeds()); + + uint64_t l = 1; + ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); + + uint64_t big[16]; + ASSERT_THAT(read(efd, big, sizeof(big)), SyscallSucceeds()); + EXPECT_EQ(big[0], 1); +} + +TEST(EventfdTest, BigWriteBigRead) { + int efd; + ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE), + SyscallSucceeds()); + + uint64_t l[16]; + l[0] = 16; + ASSERT_THAT(write(efd, l, sizeof(l)), SyscallSucceeds()); + ASSERT_THAT(read(efd, l, sizeof(l)), SyscallSucceeds()); + EXPECT_EQ(l[0], 1); +} + +// NotifyNonZero is inherently racy, so random save is disabled. +TEST(EventfdTest, NotifyNonZero_NoRandomSave) { + // Waits will time out at 10 seconds. + constexpr int kEpollTimeoutMs = 10000; + // Create an eventfd descriptor. + int efd; + ASSERT_THAT(efd = eventfd(7, EFD_SEMAPHORE | EFD_NONBLOCK), + SyscallSucceeds()); + // Create an epoll fd to listen to efd. + int epollfd; + ASSERT_THAT(epollfd = epoll_create1(0), SyscallSucceeds()); + // Add efd to epoll. + struct epoll_event add_ev; + add_ev.events = EPOLLIN | EPOLLET; + add_ev.data.fd = efd; + ASSERT_THAT(epoll_ctl(epollfd, EPOLL_CTL_ADD, efd, &add_ev), + SyscallSucceeds()); + + // Use epoll to get a value from efd. + struct epoll_event out_ev; + int wait_out = epoll_wait(epollfd, &out_ev, 1, kEpollTimeoutMs); + EXPECT_EQ(wait_out, 1); + EXPECT_EQ(efd, out_ev.data.fd); + uint64_t val = 0; + ASSERT_THAT(read(efd, &val, sizeof(val)), SyscallSucceeds()); + EXPECT_EQ(val, 1); + + // Start a thread that, after this thread blocks on epoll_wait, will write to + // efd. This is racy -- it's possible that this write will happen after + // epoll_wait times out. + ScopedThread t([efd] { + sleep(5); + uint64_t val = 1; + write(efd, &val, sizeof(val)); + }); + + // epoll_wait should return once the thread writes. + wait_out = epoll_wait(epollfd, &out_ev, 1, kEpollTimeoutMs); + EXPECT_EQ(wait_out, 1); + EXPECT_EQ(efd, out_ev.data.fd); + + val = 0; + ASSERT_THAT(read(efd, &val, sizeof(val)), SyscallSucceeds()); + EXPECT_EQ(val, 1); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/exceptions.cc b/test/syscalls/linux/exceptions.cc new file mode 100644 index 000000000..72ab354e3 --- /dev/null +++ b/test/syscalls/linux/exceptions.cc @@ -0,0 +1,146 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +void inline Halt() { asm("hlt\r\n"); } + +void inline SetAlignmentCheck() { + asm("pushf\r\n" + "pop %%rax\r\n" + "or $0x40000, %%rax\r\n" + "push %%rax\r\n" + "popf\r\n" + : + : + : "ax"); +} + +void inline ClearAlignmentCheck() { + asm("pushf\r\n" + "pop %%rax\r\n" + "mov $0x40000, %%rbx\r\n" + "not %%rbx\r\n" + "and %%rbx, %%rax\r\n" + "push %%rax\r\n" + "popf\r\n" + : + : + : "ax", "bx"); +} + +void inline Int3Normal() { asm(".byte 0xcd, 0x03\r\n"); } + +void inline Int3Compact() { asm(".byte 0xcc\r\n"); } + +TEST(ExceptionTest, Halt) { + // In order to prevent the regular handler from messing with things (and + // perhaps refaulting until some other signal occurs), we reset the handler to + // the default action here and ensure that it dies correctly. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa)); + + EXPECT_EXIT(Halt(), ::testing::KilledBySignal(SIGSEGV), ""); +} + +TEST(ExceptionTest, DivideByZero) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa)); + + EXPECT_EXIT( + { + uint32_t remainder; + uint32_t quotient; + uint32_t divisor = 0; + uint64_t value = 1; + asm("divl 0(%2)\r\n" + : "=d"(remainder), "=a"(quotient) + : "r"(&divisor), "d"(value >> 32), "a"(value)); + TEST_CHECK(quotient > 0); // Force dependency. + }, + ::testing::KilledBySignal(SIGFPE), ""); +} + +TEST(ExceptionTest, Alignment) { + SetAlignmentCheck(); + ClearAlignmentCheck(); +} + +TEST(ExceptionTest, AlignmentHalt) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa)); + + // Reported upstream. We need to ensure that bad flags are cleared even in + // fault paths. Set the alignment flag and then generate an exception. + EXPECT_EXIT( + { + SetAlignmentCheck(); + Halt(); + }, + ::testing::KilledBySignal(SIGSEGV), ""); +} + +TEST(ExceptionTest, AlignmentCheck) { + + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGBUS, sa)); + + EXPECT_EXIT( + { + char array[16]; + SetAlignmentCheck(); + for (int i = 0; i < 8; i++) { + // At least 7/8 offsets will be unaligned here. + uint64_t* ptr = reinterpret_cast<uint64_t*>(&array[i]); + asm("mov %0, 0(%0)\r\n" : : "r"(ptr) : "ax"); + } + }, + ::testing::KilledBySignal(SIGBUS), ""); +} + +TEST(ExceptionTest, Int3Normal) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGTRAP, sa)); + + EXPECT_EXIT(Int3Normal(), ::testing::KilledBySignal(SIGTRAP), ""); +} + +TEST(ExceptionTest, Int3Compact) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGTRAP, sa)); + + EXPECT_EXIT(Int3Compact(), ::testing::KilledBySignal(SIGTRAP), ""); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc new file mode 100644 index 000000000..1ef40b502 --- /dev/null +++ b/test/syscalls/linux/exec.cc @@ -0,0 +1,625 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/exec.h" + +#include <errno.h> +#include <fcntl.h> +#include <sys/eventfd.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <unistd.h> + +#include <memory> +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr char kBasicWorkload[] = "exec_basic_workload"; +constexpr char kExitScript[] = "exit_script"; +constexpr char kStateWorkload[] = "exec_state_workload"; +constexpr char kProcExeWorkload[] = "exec_proc_exe_workload"; +constexpr char kAssertClosedWorkload[] = "exec_assert_closed_workload"; +constexpr char kPriorityWorkload[] = "priority_execve"; + +std::string WorkloadPath(absl::string_view binary) { + std::string full_path; + char* test_src = getenv("TEST_SRCDIR"); + if (test_src) { + full_path = JoinPath(test_src, "__main__/test/syscalls/linux", binary); + } + TEST_CHECK(full_path.empty() == false); + return full_path; +} + +constexpr char kExit42[] = "--exec_exit_42"; +constexpr char kExecWithThread[] = "--exec_exec_with_thread"; +constexpr char kExecFromThread[] = "--exec_exec_from_thread"; + +// Runs filename with argv and checks that the exit status is expect_status and +// that stderr contains expect_stderr. +void CheckOutput(const std::string& filename, const ExecveArray& argv, + const ExecveArray& envv, int expect_status, + const std::string& expect_stderr) { + int pipe_fds[2]; + ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds()); + + FileDescriptor read_fd(pipe_fds[0]); + FileDescriptor write_fd(pipe_fds[1]); + + pid_t child; + int execve_errno; + + const auto remap_stderr = [pipe_fds] { + // Remap stdin and stdout to /dev/null. + int fd = open("/dev/null", O_RDWR | O_CLOEXEC); + if (fd < 0) { + _exit(errno); + } + + int ret = dup2(fd, 0); + if (ret < 0) { + _exit(errno); + } + + ret = dup2(fd, 1); + if (ret < 0) { + _exit(errno); + } + + // And stderr to the pipe. + ret = dup2(pipe_fds[1], 2); + if (ret < 0) { + _exit(errno); + } + + // Here, we'd ideally close all other FDs inherited from the parent. + // However, that's not worth the effort and CloexecNormalFile and + // CloexecEventfd depend on that not happening. + }; + + auto kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(filename, argv, envv, remap_stderr, &child, &execve_errno)); + + ASSERT_EQ(0, execve_errno); + + // Not needed anymore. + write_fd.reset(); + + // Read stderr until the child exits. + std::string output; + constexpr int kSize = 128; + char buf[kSize]; + int n; + do { + ASSERT_THAT(n = ReadFd(read_fd.get(), buf, kSize), SyscallSucceeds()); + if (n > 0) { + output.append(buf, n); + } + } while (n > 0); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); + EXPECT_EQ(status, expect_status); + + // Process cleanup no longer needed. + kill.Release(); + + EXPECT_TRUE(absl::StrContains(output, expect_stderr)) << output; +} + +TEST(ExecDeathTest, EmptyPath) { + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec("", {}, {}, nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, ENOENT); +} + +TEST(ExecDeathTest, Basic) { + CheckOutput(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload)}, {}, + ArgEnvExitStatus(0, 0), + absl::StrCat(WorkloadPath(kBasicWorkload), "\n")); +} + +TEST(ExecDeathTest, OneArg) { + CheckOutput(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload), "1"}, + {}, ArgEnvExitStatus(1, 0), + absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n")); +} + +TEST(ExecDeathTest, FiveArg) { + CheckOutput(WorkloadPath(kBasicWorkload), + {WorkloadPath(kBasicWorkload), "1", "2", "3", "4", "5"}, {}, + ArgEnvExitStatus(5, 0), + absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n2\n3\n4\n5\n")); +} + +TEST(ExecDeathTest, OneEnv) { + CheckOutput(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload)}, + {"1"}, ArgEnvExitStatus(0, 1), + absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n")); +} + +TEST(ExecDeathTest, FiveEnv) { + CheckOutput(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload)}, + {"1", "2", "3", "4", "5"}, ArgEnvExitStatus(0, 5), + absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n2\n3\n4\n5\n")); +} + +TEST(ExecDeathTest, OneArgOneEnv) { + CheckOutput(WorkloadPath(kBasicWorkload), + {WorkloadPath(kBasicWorkload), "arg"}, {"env"}, + ArgEnvExitStatus(1, 1), + absl::StrCat(WorkloadPath(kBasicWorkload), "\narg\nenv\n")); +} + +TEST(ExecDeathTest, InterpreterScript) { + CheckOutput(WorkloadPath(kExitScript), {WorkloadPath(kExitScript), "25"}, {}, + ArgEnvExitStatus(25, 0), ""); +} + +// Everything after the path in the interpreter script is a single argument. +TEST(ExecDeathTest, InterpreterScriptArgSplit) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " foo bar"), + 0755)); + + CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0), + absl::StrCat(link.path(), "\nfoo bar\n", script.path(), "\n")); +} + +// Original argv[0] is replaced with the script path. +TEST(ExecDeathTest, InterpreterScriptArgvZero) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755)); + + CheckOutput(script.path(), {"REPLACED"}, {}, ArgEnvExitStatus(1, 0), + absl::StrCat(link.path(), "\n", script.path(), "\n")); +} + +// Original argv[0] is replaced with the script path, exactly as passed to +// execve. +TEST(ExecDeathTest, InterpreterScriptArgvZeroRelative) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755)); + + auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); + auto script_relative = + ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, script.path())); + + CheckOutput(script_relative, {"REPLACED"}, {}, ArgEnvExitStatus(1, 0), + absl::StrCat(link.path(), "\n", script_relative, "\n")); +} + +// argv[0] is added as the script path, even if there was none. +TEST(ExecDeathTest, InterpreterScriptArgvZeroAdded) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755)); + + CheckOutput(script.path(), {}, {}, ArgEnvExitStatus(1, 0), + absl::StrCat(link.path(), "\n", script.path(), "\n")); +} + +// A NUL byte in the script line ends parsing. +TEST(ExecDeathTest, InterpreterScriptArgNUL) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), + absl::StrCat("#!", link.path(), " foo", std::string(1, '\0'), "bar"), 0755)); + + CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0), + absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n")); +} + +// Trailing whitespace following interpreter path is ignored. +TEST(ExecDeathTest, InterpreterScriptTrailingWhitespace) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " "), 0755)); + + CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(1, 0), + absl::StrCat(link.path(), "\n", script.path(), "\n")); +} + +// Multiple whitespace characters between interpreter and arg allowed. +TEST(ExecDeathTest, InterpreterScriptArgWhitespace) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " foo"), 0755)); + + CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0), + absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n")); +} + +TEST(ExecDeathTest, InterpreterScriptNoPath) { + TempPath script = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "#!", 0755)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, ENOEXEC); +} + +// AT_EXECFN is the path passed to execve. +TEST(ExecDeathTest, ExecFn) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kStateWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " PrintExecFn"), + 0755)); + + // Pass the script as a relative path and assert that is what appears in + // AT_EXECFN. + auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); + auto script_relative = + ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, script.path())); + + CheckOutput(script_relative, {script_relative}, {}, ArgEnvExitStatus(0, 0), + absl::StrCat(script_relative, "\n")); +} + +TEST(ExecDeathTest, ExecName) { + std::string path = WorkloadPath(kStateWorkload); + + CheckOutput(path, {path, "PrintExecName"}, {}, ArgEnvExitStatus(0, 0), + absl::StrCat(Basename(path).substr(0, 15), "\n")); +} + +TEST(ExecDeathTest, ExecNameScript) { + // Symlink through /tmp to ensure the path is short enough. + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kStateWorkload))); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), + absl::StrCat("#!", link.path(), " PrintExecName"), 0755)); + + std::string script_path = script.path(); + + CheckOutput(script_path, {script_path}, {}, ArgEnvExitStatus(0, 0), + absl::StrCat(Basename(script_path).substr(0, 15), "\n")); +} + +// execve may be called by a multithreaded process. +TEST(ExecDeathTest, WithSiblingThread) { + CheckOutput("/proc/self/exe", {"/proc/self/exe", kExecWithThread}, {}, + W_EXITCODE(42, 0), ""); +} + +// execve may be called from a thread other than the leader of a multithreaded +// process. +TEST(ExecDeathTest, FromSiblingThread) { + CheckOutput("/proc/self/exe", {"/proc/self/exe", kExecFromThread}, {}, + W_EXITCODE(42, 0), ""); +} + +TEST(ExecTest, NotFound) { + char* const argv[] = {nullptr}; + char* const envp[] = {nullptr}; + EXPECT_THAT(execve("/file/does/not/exist", argv, envp), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(ExecTest, NoExecPerm) { + char* const argv[] = {nullptr}; + char* const envp[] = {nullptr}; + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + EXPECT_THAT(execve(f.path().c_str(), argv, envp), + SyscallFailsWithErrno(EACCES)); +} + +// A signal handler we never expect to be called. +void SignalHandler(int signo) { + std::cerr << "Signal " << signo << " raised." << std::endl; + exit(1); +} + +// Signal handlers are reset on execve(2), unless they have default or ignored +// disposition. +TEST(ExecStateDeathTest, HandlerReset) { + struct sigaction sa; + sa.sa_handler = SignalHandler; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + ExecveArray args = { + WorkloadPath(kStateWorkload), + "CheckSigHandler", + absl::StrCat(SIGUSR1), + absl::StrCat(absl::Hex(reinterpret_cast<uintptr_t>(SIG_DFL))), + }; + + CheckOutput(WorkloadPath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); +} + +// Ignored signal dispositions are not reset. +TEST(ExecStateDeathTest, IgnorePreserved) { + struct sigaction sa; + sa.sa_handler = SIG_IGN; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + ExecveArray args = { + WorkloadPath(kStateWorkload), + "CheckSigHandler", + absl::StrCat(SIGUSR1), + absl::StrCat(absl::Hex(reinterpret_cast<uintptr_t>(SIG_IGN))), + }; + + CheckOutput(WorkloadPath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); +} + +// Signal masks are not reset on exec +TEST(ExecStateDeathTest, SignalMask) { + sigset_t s; + sigemptyset(&s); + sigaddset(&s, SIGUSR1); + ASSERT_THAT(sigprocmask(SIG_BLOCK, &s, nullptr), SyscallSucceeds()); + + ExecveArray args = { + WorkloadPath(kStateWorkload), + "CheckSigBlocked", + absl::StrCat(SIGUSR1), + }; + + CheckOutput(WorkloadPath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); +} + +// itimers persist across execve. +// N.B. Timers created with timer_create(2) should not be preserved! +TEST(ExecStateDeathTest, ItimerPreserved) { + // The fork in ForkAndExec clears itimers, so only set them up after fork. + auto setup_itimer = [] { + // Ignore SIGALRM, as we don't actually care about timer + // expirations. + struct sigaction sa; + sa.sa_handler = SIG_IGN; + int ret = sigaction(SIGALRM, &sa, nullptr); + if (ret < 0) { + _exit(errno); + } + + struct itimerval itv; + itv.it_interval.tv_sec = 1; + itv.it_interval.tv_usec = 0; + itv.it_value.tv_sec = 1; + itv.it_value.tv_usec = 0; + ret = setitimer(ITIMER_REAL, &itv, nullptr); + if (ret < 0) { + _exit(errno); + } + }; + + std::string filename = WorkloadPath(kStateWorkload); + ExecveArray argv = { + filename, + "CheckItimerEnabled", + absl::StrCat(ITIMER_REAL), + }; + + pid_t child; + int execve_errno; + auto kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(filename, argv, {}, setup_itimer, &child, &execve_errno)); + ASSERT_EQ(0, execve_errno); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); + EXPECT_EQ(0, status); + + // Process cleanup no longer needed. + kill.Release(); +} + +TEST(ProcSelfExe, ChangesAcrossExecve) { + // See exec_proc_exe_workload for more details. We simply + // assert that the /proc/self/exe link changes across execve. + CheckOutput(WorkloadPath(kProcExeWorkload), + {WorkloadPath(kProcExeWorkload), + ASSERT_NO_ERRNO_AND_VALUE(ProcessExePath(getpid()))}, + {}, W_EXITCODE(0, 0), ""); +} + +TEST(ExecTest, CloexecNormalFile) { + const FileDescriptor fd_closed_on_exec = ASSERT_NO_ERRNO_AND_VALUE( + Open("/usr/share/zoneinfo", O_RDONLY | O_CLOEXEC)); + + CheckOutput(WorkloadPath(kAssertClosedWorkload), + {WorkloadPath(kAssertClosedWorkload), + absl::StrCat(fd_closed_on_exec.get())}, + {}, W_EXITCODE(0, 0), ""); + + // The assert closed workload exits with code 2 if the file still exists. We + // can use this to do a negative test. + const FileDescriptor fd_open_on_exec = + ASSERT_NO_ERRNO_AND_VALUE(Open("/usr/share/zoneinfo", O_RDONLY)); + + CheckOutput(WorkloadPath(kAssertClosedWorkload), + {WorkloadPath(kAssertClosedWorkload), + absl::StrCat(fd_open_on_exec.get())}, + {}, W_EXITCODE(2, 0), ""); +} + +TEST(ExecTest, CloexecEventfd) { + int efd; + ASSERT_THAT(efd = eventfd(0, EFD_CLOEXEC), SyscallSucceeds()); + FileDescriptor fd(efd); + + CheckOutput(WorkloadPath(kAssertClosedWorkload), + {WorkloadPath(kAssertClosedWorkload), absl::StrCat(fd.get())}, {}, + W_EXITCODE(0, 0), ""); +} + +// Priority consistent across calls to execve() +TEST(GetpriorityTest, ExecveMaintainsPriority) { + int prio = 16; + ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), prio), SyscallSucceeds()); + + // To avoid trying to use negative exit values, check for + // 20 - prio. Since prio should always be in the range [-20, 19], + // this leave expected_exit_code in the range [1, 40]. + int expected_exit_code = 20 - prio; + + // Program run (priority_execve) will exit(X) where + // X=getpriority(PRIO_PROCESS,0). Check that this exit value is prio. + CheckOutput(WorkloadPath(kPriorityWorkload), + {WorkloadPath(kPriorityWorkload)}, {}, + W_EXITCODE(expected_exit_code, 0), ""); +} + +void ExecWithThread() { + // Used to ensure that the thread has actually started. + absl::Mutex mu; + bool started = false; + + ScopedThread t([&] { + mu.Lock(); + started = true; + mu.Unlock(); + + while (true) { + pause(); + } + }); + + mu.LockWhen(absl::Condition(&started)); + mu.Unlock(); + + const ExecveArray argv = {"/proc/self/exe", kExit42}; + const ExecveArray envv; + + execve("/proc/self/exe", argv.get(), envv.get()); + exit(errno); +} + +void ExecFromThread() { + ScopedThread t([] { + const ExecveArray argv = {"/proc/self/exe", kExit42}; + const ExecveArray envv; + + execve("/proc/self/exe", argv.get(), envv.get()); + exit(errno); + }); + + while (true) { + pause(); + } +} + +bool ValidateProcCmdlineVsArgv(const int argc, const char* const* argv) { + auto contents_or = GetContents("/proc/self/cmdline"); + if (!contents_or.ok()) { + LOG(ERROR) << "Unable to get /proc/self/cmdline: " << contents_or.error(); + return false; + } + auto contents = contents_or.ValueOrDie(); + if (contents.back() != '\0') { + LOG(ERROR) << "Non-null terminated /proc/self/cmdline!"; + return false; + } + contents.pop_back(); + std::vector<std::string> procfs_cmdline = absl::StrSplit(contents, '\0'); + + if (static_cast<int>(procfs_cmdline.size()) != argc) { + LOG(ERROR) << "argc = " << argc << " != " << procfs_cmdline.size(); + return false; + } + + for (int i = 0; i < argc; ++i) { + if (procfs_cmdline[i] != argv[i]) { + LOG(ERROR) << "Procfs command line argument " << i << " mismatch " + << procfs_cmdline[i] << " != " << argv[i]; + return false; + } + } + return true; +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // Start by validating that the stack argv is consistent with procfs. + if (!gvisor::testing::ValidateProcCmdlineVsArgv(argc, argv)) { + return 1; + } + + // Some of these tests require no background threads, so check for them before + // TestInit. + for (int i = 0; i < argc; i++) { + absl::string_view arg(argv[i]); + + if (arg == gvisor::testing::kExit42) { + return 42; + } + if (arg == gvisor::testing::kExecWithThread) { + gvisor::testing::ExecWithThread(); + return 1; + } + if (arg == gvisor::testing::kExecFromThread) { + gvisor::testing::ExecFromThread(); + return 1; + } + } + + gvisor::testing::TestInit(&argc, &argv); + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/exec.h b/test/syscalls/linux/exec.h new file mode 100644 index 000000000..b82bfffd1 --- /dev/null +++ b/test/syscalls/linux/exec.h @@ -0,0 +1,34 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_EXEC_H_ +#define GVISOR_TEST_SYSCALLS_EXEC_H_ + +#include <sys/wait.h> + +namespace gvisor { +namespace testing { + +// Returns the exit code used by exec_basic_workload. +inline int ArgEnvExitCode(int args, int envs) { return args + envs * 10; } + +// Returns the exit status used by exec_basic_workload. +inline int ArgEnvExitStatus(int args, int envs) { + return W_EXITCODE(ArgEnvExitCode(args, envs), 0); +} + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_EXEC_H_ diff --git a/test/syscalls/linux/exec_assert_closed_workload.cc b/test/syscalls/linux/exec_assert_closed_workload.cc new file mode 100644 index 000000000..4448431e1 --- /dev/null +++ b/test/syscalls/linux/exec_assert_closed_workload.cc @@ -0,0 +1,45 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <iostream> + +#include "absl/strings/numbers.h" + +int main(int argc, char** argv) { + if (argc != 2) { + std::cerr << "need two arguments, got " << argc; + exit(1); + } + int fd; + if (!absl::SimpleAtoi(argv[1], &fd)) { + std::cerr << "fd: " << argv[1] << " could not be parsed" << std::endl; + exit(1); + } + struct stat s; + if (fstat(fd, &s) == 0) { + std::cerr << "fd: " << argv[1] << " should not be valid" << std::endl; + exit(2); + } + if (errno != EBADF) { + std::cerr << "fstat fd: " << argv[1] << " got errno: " << errno + << " wanted: " << EBADF << std::endl; + exit(1); + } + return 0; +} diff --git a/test/syscalls/linux/exec_basic_workload.cc b/test/syscalls/linux/exec_basic_workload.cc new file mode 100644 index 000000000..d4bdf511f --- /dev/null +++ b/test/syscalls/linux/exec_basic_workload.cc @@ -0,0 +1,31 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdlib.h> + +#include <iostream> + +#include "test/syscalls/linux/exec.h" + +int main(int argc, char** argv, char** envp) { + int i; + for (i = 0; i < argc; i++) { + std::cerr << argv[i] << std::endl; + } + for (i = 0; envp[i] != nullptr; i++) { + std::cerr << envp[i] << std::endl; + } + exit(gvisor::testing::ArgEnvExitCode(argc - 1, i)); + return 0; +} diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc new file mode 100644 index 000000000..cfc898699 --- /dev/null +++ b/test/syscalls/linux/exec_binary.cc @@ -0,0 +1,1367 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <elf.h> +#include <errno.h> +#include <signal.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/user.h> +#include <unistd.h> +#include <algorithm> +#include <functional> +#include <iterator> +#include <tuple> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/proc_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +#ifndef __x86_64__ +// The assembly stub and ELF internal details must be ported to other arches. +#error "Test only supported on x86-64" +#endif // __x86_64__ + +// amd64 stub that calls PTRACE_TRACEME and sends itself SIGSTOP. +const char kPtraceCode[] = { + // movq $101, %rax /* ptrace */ + '\x48', + '\xc7', + '\xc0', + '\x65', + '\x00', + '\x00', + '\x00', + // movq $0, %rsi /* PTRACE_TRACEME */ + '\x48', + '\xc7', + '\xc6', + '\x00', + '\x00', + '\x00', + '\x00', + // movq $0, %rdi + '\x48', + '\xc7', + '\xc7', + '\x00', + '\x00', + '\x00', + '\x00', + // movq $0, %rdx + '\x48', + '\xc7', + '\xc2', + '\x00', + '\x00', + '\x00', + '\x00', + // movq $0, %r10 + '\x49', + '\xc7', + '\xc2', + '\x00', + '\x00', + '\x00', + '\x00', + // syscall + '\x0f', + '\x05', + + // movq $39, %rax /* getpid */ + '\x48', + '\xc7', + '\xc0', + '\x27', + '\x00', + '\x00', + '\x00', + // syscall + '\x0f', + '\x05', + + // movq %rax, %rdi /* pid */ + '\x48', + '\x89', + '\xc7', + // movq $62, %rax /* kill */ + '\x48', + '\xc7', + '\xc0', + '\x3e', + '\x00', + '\x00', + '\x00', + // movq $19, %rsi /* SIGSTOP */ + '\x48', + '\xc7', + '\xc6', + '\x13', + '\x00', + '\x00', + '\x00', + // syscall + '\x0f', + '\x05', +}; + +// Size of a syscall instruction. +constexpr int kSyscallSize = 2; + +// This test suite tests executable loading in the kernel (ELF and interpreter +// scripts). + +// Parameterized ELF types for 64 and 32 bit. +template <int Size> +struct ElfTypes; + +template <> +struct ElfTypes<64> { + typedef Elf64_Ehdr ElfEhdr; + typedef Elf64_Phdr ElfPhdr; +}; + +template <> +struct ElfTypes<32> { + typedef Elf32_Ehdr ElfEhdr; + typedef Elf32_Phdr ElfPhdr; +}; + +template <int Size> +struct ElfBinary { + using ElfEhdr = typename ElfTypes<Size>::ElfEhdr; + using ElfPhdr = typename ElfTypes<Size>::ElfPhdr; + + ElfEhdr header = {}; + std::vector<ElfPhdr> phdrs; + std::vector<char> data; + + // UpdateOffsets updates p_offset, p_vaddr in all phdrs to account for the + // space taken by the header and phdrs. + // + // It also updates header.e_phnum and adds the offset to header.e_entry to + // account for the headers residing in the first PT_LOAD segment. + // + // Before calling UpdateOffsets each of those fields should be the appropriate + // offset into data. + void UpdateOffsets() { + size_t offset = sizeof(header) + phdrs.size() * sizeof(ElfPhdr); + header.e_entry += offset; + header.e_phnum = phdrs.size(); + for (auto& p : phdrs) { + p.p_offset += offset; + p.p_vaddr += offset; + } + } + + // AddInterpreter adds a PT_INTERP segment with the passed contents. + // + // A later call to UpdateOffsets is required to make the new phdr valid. + void AddInterpreter(std::vector<char> contents) { + const int start = data.size(); + data.insert(data.end(), contents.begin(), contents.end()); + const int size = data.size() - start; + + ElfPhdr phdr = {}; + phdr.p_type = PT_INTERP; + phdr.p_offset = start; + phdr.p_filesz = size; + phdr.p_memsz = size; + // "If [PT_INTERP] is present, it must precede any loadable segment entry." + phdrs.insert(phdrs.begin(), phdr); + } + + // Writes the header, phdrs, and data to fd. + PosixError Write(int fd) const { + int ret = WriteFd(fd, &header, sizeof(header)); + if (ret < 0) { + return PosixError(errno, "failed to write header"); + } else if (ret != sizeof(header)) { + return PosixError(EIO, absl::StrCat("short write of header: ", ret)); + } + + for (auto const& p : phdrs) { + ret = WriteFd(fd, &p, sizeof(p)); + if (ret < 0) { + return PosixError(errno, "failed to write phdr"); + } else if (ret != sizeof(p)) { + return PosixError(EIO, absl::StrCat("short write of phdr: ", ret)); + } + } + + ret = WriteFd(fd, data.data(), data.size()); + if (ret < 0) { + return PosixError(errno, "failed to write data"); + } else if (ret != static_cast<int>(data.size())) { + return PosixError(EIO, absl::StrCat("short write of data: ", ret)); + } + + return NoError(); + } +}; + +// Creates a new temporary executable ELF file in parent with elf as the +// contents. +template <int Size> +PosixErrorOr<TempPath> CreateElfWith(absl::string_view parent, + ElfBinary<Size> const& elf) { + ASSIGN_OR_RETURN_ERRNO( + auto file, TempPath::CreateFileWith(parent, absl::string_view(), 0755)); + ASSIGN_OR_RETURN_ERRNO(auto fd, Open(file.path(), O_RDWR)); + RETURN_IF_ERRNO(elf.Write(fd.get())); + return std::move(file); +} + +// Creates a new temporary executable ELF file with elf as the contents. +template <int Size> +PosixErrorOr<TempPath> CreateElfWith(ElfBinary<Size> const& elf) { + return CreateElfWith(GetAbsoluteTestTmpdir(), elf); +} + +// Wait for pid to stop, and assert that it stopped via SIGSTOP. +PosixError WaitStopped(pid_t pid) { + int status; + int ret = RetryEINTR(waitpid)(pid, &status, 0); + MaybeSave(); + if (ret < 0) { + return PosixError(errno, "wait failed"); + } else if (ret != pid) { + return PosixError(ESRCH, absl::StrCat("wait got ", ret, " want ", pid)); + } + + if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) { + return PosixError(EINVAL, + absl::StrCat("pid did not SIGSTOP; status = ", status)); + } + + return NoError(); +} + +// Returns a valid ELF that PTRACE_TRACEME and SIGSTOPs itself. +// +// UpdateOffsets must be called before writing this ELF. +ElfBinary<64> StandardElf() { + ElfBinary<64> elf; + elf.header.e_ident[EI_MAG0] = ELFMAG0; + elf.header.e_ident[EI_MAG1] = ELFMAG1; + elf.header.e_ident[EI_MAG2] = ELFMAG2; + elf.header.e_ident[EI_MAG3] = ELFMAG3; + elf.header.e_ident[EI_CLASS] = ELFCLASS64; + elf.header.e_ident[EI_DATA] = ELFDATA2LSB; + elf.header.e_ident[EI_VERSION] = EV_CURRENT; + elf.header.e_type = ET_EXEC; + elf.header.e_machine = EM_X86_64; + elf.header.e_version = EV_CURRENT; + elf.header.e_phoff = sizeof(elf.header); + elf.header.e_phentsize = sizeof(decltype(elf)::ElfPhdr); + + // TODO: Always include a PT_GNU_STACK segment to disable + // executable stacks. With this omitted the stack (and all PROT_READ) mappings + // should be executable, but gVisor doesn't support that. + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_GNU_STACK; + phdr.p_flags = PF_R | PF_W; + elf.phdrs.push_back(phdr); + + phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_X; + phdr.p_offset = 0; + phdr.p_vaddr = 0x40000; + phdr.p_filesz = sizeof(kPtraceCode); + phdr.p_memsz = phdr.p_filesz; + elf.phdrs.push_back(phdr); + + elf.header.e_entry = phdr.p_vaddr; + + elf.data.assign(kPtraceCode, kPtraceCode + sizeof(kPtraceCode)); + + return elf; +} + +// Test that a trivial binary executes. +TEST(ElfTest, Execute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + // Ensure it made it to SIGSTOP. + ASSERT_NO_ERRNO(WaitStopped(child)); + + struct user_regs_struct regs; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + // RIP is just beyond the final syscall instruction. + EXPECT_EQ(regs.rip, elf.header.e_entry + sizeof(kPtraceCode)); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + }))); +} + +// StandardElf without data completes execve, but faults once running. +TEST(ElfTest, MissingText) { + ElfBinary<64> elf = StandardElf(); + elf.data.clear(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + // It runs off the end of the zeroes filling the end of the page. + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) << status; +} + +// Typical ELF with a data + bss segment +TEST(ElfTest, DataSegment) { + ElfBinary<64> elf = StandardElf(); + + // Create a standard ELF, but extend to 1.5 pages. The second page will be the + // beginning of a multi-page data + bss segment. + elf.data.resize(kPageSize + kPageSize / 2); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_W; + phdr.p_offset = kPageSize; + phdr.p_vaddr = 0x41000; + phdr.p_filesz = kPageSize / 2; + // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a + // bit less than 2 pages so this mapping doesn't extend beyond 0x43000. + phdr.p_memsz = 2 * kPageSize - kPageSize / 2; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT( + child, ContainsMappings(std::vector<ProcMapsEntry>({ + // text page. + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + // data + bss page from file. + {0x41000, 0x42000, true, true, false, true, kPageSize, 0, 0, 0, + file.path().c_str()}, + // bss page from anon. + {0x42000, 0x43000, true, true, false, true, 0, 0, 0, 0, ""}, + }))); +} + +// Linux will allow PT_LOAD segments to overlap. +TEST(ElfTest, DirectlyOverlappingSegments) { + // NOTE: see PIEOutOfOrderSegments. + SKIP_IF(IsRunningOnGvisor()); + + ElfBinary<64> elf = StandardElf(); + + // Same as the StandardElf mapping. + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + // Add PF_W so we can differentiate this mapping from the first. + phdr.p_flags = PF_R | PF_W | PF_X; + phdr.p_offset = 0; + phdr.p_vaddr = 0x40000; + phdr.p_filesz = sizeof(kPtraceCode); + phdr.p_memsz = phdr.p_filesz; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + {0x40000, 0x41000, true, true, true, true, 0, 0, 0, 0, + file.path().c_str()}, + }))); +} + +// Linux allows out-of-order PT_LOAD segments. +TEST(ElfTest, OutOfOrderSegments) { + // NOTE: see PIEOutOfOrderSegments. + SKIP_IF(IsRunningOnGvisor()); + + ElfBinary<64> elf = StandardElf(); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_X; + phdr.p_offset = 0; + phdr.p_vaddr = 0x20000; + phdr.p_filesz = sizeof(kPtraceCode); + phdr.p_memsz = phdr.p_filesz; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + {0x20000, 0x21000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + file.path().c_str()}, + }))); +} + +// header.e_phoff is bound the end of the file. +TEST(ElfTest, OutOfBoundsPhdrs) { + ElfBinary<64> elf = StandardElf(); + elf.header.e_phoff = 0x100000; + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + // On Linux 3.11, this caused EIO. On newer Linux, it causes ENOEXEC. + EXPECT_THAT(execve_errno, AnyOf(Eq(ENOEXEC), Eq(EIO))); +} + +// Claim there is a phdr beyond the end of the file, but don't include it. +TEST(ElfTest, MissingPhdr) { + ElfBinary<64> elf = StandardElf(); + + // Clear data so the file ends immediately after the phdrs. + // N.B. Per ElfTest.MissingData, StandardElf without data completes execve + // without error. + elf.data.clear(); + elf.UpdateOffsets(); + + // Claim that there is another phdr just beyond the end of the file. Of + // course, it isn't accessible. + elf.header.e_phnum++; + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + // On Linux 3.11, this caused EIO. On newer Linux, it causes ENOEXEC. + EXPECT_THAT(execve_errno, AnyOf(Eq(ENOEXEC), Eq(EIO))); +} + +// No headers at all, just the ELF magic. +TEST(ElfTest, MissingHeader) { + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0755)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + const char kElfMagic[] = {0x7f, 'E', 'L', 'F'}; + + ASSERT_THAT(WriteFd(fd.get(), &kElfMagic, sizeof(kElfMagic)), + SyscallSucceeds()); + fd.reset(); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ENOEXEC); +} + +// Load a PIE ELF with a data + bss segment. +TEST(ElfTest, PIE) { + ElfBinary<64> elf = StandardElf(); + + elf.header.e_type = ET_DYN; + + // Create a standard ELF, but extend to 1.5 pages. The second page will be the + // beginning of a multi-page data + bss segment. + elf.data.resize(kPageSize + kPageSize / 2); + + elf.header.e_entry = 0x0; + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_W; + phdr.p_offset = kPageSize; + // Put the data segment at a bit of an offset. + phdr.p_vaddr = 0x20000; + phdr.p_filesz = kPageSize / 2; + // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a + // bit less than 2 pages so this mapping doesn't extend beyond 0x43000. + phdr.p_memsz = 2 * kPageSize - kPageSize / 2; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + const uint64_t offset = elf.phdrs[1].p_offset; + elf.phdrs[1].p_offset = 0x0; + elf.phdrs[1].p_vaddr = 0x0; + elf.phdrs[1].p_filesz += offset; + elf.phdrs[1].p_memsz += offset; + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // RIP tells us which page the first segment was loaded into. + struct user_regs_struct regs; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + + const uint64_t load_addr = regs.rip & ~(kPageSize - 1); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + // text page. + {load_addr, load_addr + 0x1000, true, false, true, + true, 0, 0, 0, 0, file.path().c_str()}, + // data + bss page from file. + {load_addr + 0x20000, load_addr + 0x21000, true, true, + false, true, kPageSize, 0, 0, 0, file.path().c_str()}, + // bss page from anon. + {load_addr + 0x21000, load_addr + 0x22000, true, true, + false, true, 0, 0, 0, 0, ""}, + }))); +} + +// PIE binary with a non-zero start address. +// +// This is non-standard for a PIE binary, but valid. The binary is still loaded +// at an arbitrary address, not the first PT_LOAD vaddr. +// +// N.B. Linux changed this behavior in d1fd836dcf00d2028c700c7e44d2c23404062c90. +// Previously, with "randomization" enabled, PIE binaries with a non-zero start +// address would be be loaded at the address they specified because mmap was +// passed the load address, which wasn't 0 as expected. +// +// This change is present in kernel v4.1+. +TEST(ElfTest, PIENonZeroStart) { + // gVisor has the newer behavior. + if (!IsRunningOnGvisor()) { + auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); + SKIP_IF(version.major < 4 || (version.major == 4 && version.minor < 1)); + } + + ElfBinary<64> elf = StandardElf(); + + elf.header.e_type = ET_DYN; + + // Create a standard ELF, but extend to 1.5 pages. The second page will be the + // beginning of a multi-page data + bss segment. + elf.data.resize(kPageSize + kPageSize / 2); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_W; + phdr.p_offset = kPageSize; + // Put the data segment at a bit of an offset. + phdr.p_vaddr = 0x60000; + phdr.p_filesz = kPageSize / 2; + // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a + // bit less than 2 pages so this mapping doesn't extend beyond 0x43000. + phdr.p_memsz = 2 * kPageSize - kPageSize / 2; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // RIP tells us which page the first segment was loaded into. + struct user_regs_struct regs; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + + const uint64_t load_addr = regs.rip & ~(kPageSize - 1); + + // The ELF is loaded at an arbitrary address, not the first PT_LOAD vaddr. + // + // N.B. this is technically flaky, but Linux is *extremely* unlikely to pick + // this as the start address, as it searches from the top down. + EXPECT_NE(load_addr, 0x40000); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + // text page. + {load_addr, load_addr + 0x1000, true, false, true, + true, 0, 0, 0, 0, file.path().c_str()}, + // data + bss page from file. + {load_addr + 0x20000, load_addr + 0x21000, true, true, + false, true, kPageSize, 0, 0, 0, file.path().c_str()}, + // bss page from anon. + {load_addr + 0x21000, load_addr + 0x22000, true, true, + false, true, 0, 0, 0, 0, ""}, + }))); +} + +TEST(ElfTest, PIEOutOfOrderSegments) { + // TODO: This triggers a bug in Linux where it computes the size + // of the binary as 0x20000 - 0x40000 = 0xfffffffffffe0000, which obviously + // fails to map. + // + // We test gVisor's behavior (of rejecting the binary) because I assert that + // Linux is wrong and needs to be fixed. + SKIP_IF(!IsRunningOnGvisor()); + + ElfBinary<64> elf = StandardElf(); + + elf.header.e_type = ET_DYN; + + // Create a standard ELF, but extend to 1.5 pages. The second page will be the + // beginning of a multi-page data + bss segment. + elf.data.resize(kPageSize + kPageSize / 2); + + decltype(elf)::ElfPhdr phdr = {}; + phdr.p_type = PT_LOAD; + phdr.p_flags = PF_R | PF_W; + phdr.p_offset = kPageSize; + // Put the data segment *before* the first segment. + phdr.p_vaddr = 0x20000; + phdr.p_filesz = kPageSize / 2; + // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a + // bit less than 2 pages so this mapping doesn't extend beyond 0x43000. + phdr.p_memsz = 2 * kPageSize - kPageSize / 2; + elf.phdrs.push_back(phdr); + + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ENOEXEC); +} + +// Standard dynamically linked binary with an ELF interpreter. +TEST(ElfTest, ELFInterpreter) { + ElfBinary<64> interpreter = StandardElf(); + interpreter.header.e_type = ET_DYN; + interpreter.header.e_entry = 0x0; + interpreter.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + uint64_t const offset = interpreter.phdrs[1].p_offset; + interpreter.phdrs[1].p_offset = 0x0; + interpreter.phdrs[1].p_vaddr = 0x0; + interpreter.phdrs[1].p_filesz += offset; + interpreter.phdrs[1].p_memsz += offset; + + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + + ElfBinary<64> binary = StandardElf(); + + // Append the interpreter path. + int const interp_data_start = binary.data.size(); + for (char const c : interpreter_file.path()) { + binary.data.push_back(c); + } + // NUL-terminate. + binary.data.push_back(0); + int const interp_data_size = binary.data.size() - interp_data_start; + + decltype(binary)::ElfPhdr phdr = {}; + phdr.p_type = PT_INTERP; + phdr.p_offset = interp_data_start; + phdr.p_filesz = interp_data_size; + phdr.p_memsz = interp_data_size; + // "If [PT_INTERP] is present, it must precede any loadable segment entry." + // + // However, Linux allows it anywhere, so we just stick it at the end to make + // sure out-of-order PT_INTERP is OK. + binary.phdrs.push_back(phdr); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // RIP tells us which page the first segment of the interpreter was loaded + // into. + struct user_regs_struct regs; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + + const uint64_t interp_load_addr = regs.rip & ~(kPageSize - 1); + + EXPECT_THAT(child, + ContainsMappings(std::vector<ProcMapsEntry>({ + // Main binary + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + binary_file.path().c_str()}, + // Interpreter + {interp_load_addr, interp_load_addr + 0x1000, true, false, + true, true, 0, 0, 0, 0, interpreter_file.path().c_str()}, + }))); +} + +// Test parameter to ElfInterpterStaticTest cases. The first item is a suffix to +// add to the end of the interpreter path in the PT_INTERP segment and the +// second is the expected execve(2) errno. +using ElfInterpreterStaticParam = std::tuple<std::vector<char>, int>; + +class ElfInterpreterStaticTest + : public ::testing::TestWithParam<ElfInterpreterStaticParam> {}; + +// Statically linked ELF with a statically linked ELF interpreter. +TEST_P(ElfInterpreterStaticTest, Test) { + const std::vector<char> segment_suffix = std::get<0>(GetParam()); + const int expected_errno = std::get<1>(GetParam()); + + ElfBinary<64> interpreter = StandardElf(); + interpreter.UpdateOffsets(); + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + + ElfBinary<64> binary = StandardElf(); + // The PT_LOAD segment conflicts with the interpreter's PT_LOAD segment. The + // interpreter's will be mapped directly over the binary's. + + // Interpreter path plus the parameterized suffix in the PT_INTERP segment. + const std::string path = interpreter_file.path(); + std::vector<char> segment(path.begin(), path.end()); + segment.insert(segment.end(), segment_suffix.begin(), segment_suffix.end()); + binary.AddInterpreter(segment); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, expected_errno); + + if (expected_errno == 0) { + ASSERT_NO_ERRNO(WaitStopped(child)); + + EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({ + // Interpreter. + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, + 0, interpreter_file.path().c_str()}, + }))); + } +} + +INSTANTIATE_TEST_CASE_P( + Cases, ElfInterpreterStaticTest, + ::testing::ValuesIn({ + // Simple NUL-terminator to run the interpreter as normal. + std::make_tuple(std::vector<char>({'\0'}), 0), + // Add some garbage to the segment followed by a NUL-terminator. This is + // ignored. + std::make_tuple(std::vector<char>({'\0', 'b', '\0'}), 0), + // Add some garbage to the segment without a NUL-terminator. Linux will + // reject + // this. + std::make_tuple(std::vector<char>({'\0', 'b'}), ENOEXEC), + })); + +// Test parameter to ElfInterpterBadPathTest cases. The first item is the +// contents of the PT_INTERP segment and the second is the expected execve(2) +// errno. +using ElfInterpreterBadPathParam = std::tuple<std::vector<char>, int>; + +class ElfInterpreterBadPathTest + : public ::testing::TestWithParam<ElfInterpreterBadPathParam> {}; + +TEST_P(ElfInterpreterBadPathTest, Test) { + const std::vector<char> segment = std::get<0>(GetParam()); + const int expected_errno = std::get<1>(GetParam()); + + ElfBinary<64> binary = StandardElf(); + binary.AddInterpreter(segment); + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, expected_errno); +} + +INSTANTIATE_TEST_CASE_P( + Cases, ElfInterpreterBadPathTest, + ::testing::ValuesIn({ + // NUL-terminated fake path in the PT_INTERP segment. + std::make_tuple(std::vector<char>({'/', 'f', '/', 'b', '\0'}), ENOENT), + // ELF interpreter not NUL-terminated. + std::make_tuple(std::vector<char>({'/', 'f', '/', 'b'}), ENOEXEC), + // ELF interpreter path omitted entirely. + // + // fs/binfmt_elf.c:load_elf_binary returns ENOEXEC if p_filesz is < 2 + // bytes. + std::make_tuple(std::vector<char>({'\0'}), ENOEXEC), + // ELF interpreter path = "\0". + // + // fs/binfmt_elf.c:load_elf_binary returns ENOEXEC if p_filesz is < 2 + // bytes, so add an extra byte to pass that check. + // + // load_elf_binary -> open_exec -> do_open_execat fails to check that + // name != '\0' before calling do_filp_open, which thus opens the + // working directory. do_open_execat returns EACCES because the + // directory is not a regular file. + std::make_tuple(std::vector<char>({'\0', '\0'}), EACCES), + })); + +// Relative path to ELF interpreter. +TEST(ElfTest, ELFInterpreterRelative) { + ElfBinary<64> interpreter = StandardElf(); + interpreter.header.e_type = ET_DYN; + interpreter.header.e_entry = 0x0; + interpreter.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + uint64_t const offset = interpreter.phdrs[1].p_offset; + interpreter.phdrs[1].p_offset = 0x0; + interpreter.phdrs[1].p_vaddr = 0x0; + interpreter.phdrs[1].p_filesz += offset; + interpreter.phdrs[1].p_memsz += offset; + + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); + auto interpreter_relative = + ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, interpreter_file.path())); + + ElfBinary<64> binary = StandardElf(); + + // NUL-terminated path in the PT_INTERP segment. + std::vector<char> segment(interpreter_relative.begin(), + interpreter_relative.end()); + segment.push_back(0); + binary.AddInterpreter(segment); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // RIP tells us which page the first segment of the interpreter was loaded + // into. + struct user_regs_struct regs; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + + const uint64_t interp_load_addr = regs.rip & ~(kPageSize - 1); + + EXPECT_THAT(child, + ContainsMappings(std::vector<ProcMapsEntry>({ + // Main binary + {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, + binary_file.path().c_str()}, + // Interpreter + {interp_load_addr, interp_load_addr + 0x1000, true, false, + true, true, 0, 0, 0, 0, interpreter_file.path().c_str()}, + }))); +} + +// ELF interpreter architecture doesn't match the binary. +TEST(ElfTest, ELFInterpreterWrongArch) { + ElfBinary<64> interpreter = StandardElf(); + interpreter.header.e_machine = EM_PPC64; + interpreter.header.e_type = ET_DYN; + interpreter.header.e_entry = 0x0; + interpreter.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + uint64_t const offset = interpreter.phdrs[1].p_offset; + interpreter.phdrs[1].p_offset = 0x0; + interpreter.phdrs[1].p_vaddr = 0x0; + interpreter.phdrs[1].p_filesz += offset; + interpreter.phdrs[1].p_memsz += offset; + + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + + ElfBinary<64> binary = StandardElf(); + + // NUL-terminated path in the PT_INTERP segment. + const std::string path = interpreter_file.path(); + std::vector<char> segment(path.begin(), path.end()); + segment.push_back(0); + binary.AddInterpreter(segment); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, ELIBBAD); +} + +// No execute permissions on the binary. +TEST(ElfTest, NoExecute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + ASSERT_THAT(chmod(file.path().c_str(), 0644), SyscallSucceeds()); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, EACCES); +} + +// Execute, but no read permissions on the binary works just fine. +TEST(ElfTest, NoRead) { + // TODO: gVisor's backing filesystem may prevent the sentry from + // reading the executable. + SKIP_IF(IsRunningOnGvisor()); + + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + ASSERT_THAT(chmod(file.path().c_str(), 0111), SyscallSucceeds()); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + ASSERT_NO_ERRNO(WaitStopped(child)); + + // TODO: A task with a non-readable executable is marked + // non-dumpable, preventing access to proc files. gVisor does not implement + // this behavior. +} + +// No execute permissions on the ELF interpreter. +TEST(ElfTest, ElfInterpreterNoExecute) { + ElfBinary<64> interpreter = StandardElf(); + interpreter.header.e_type = ET_DYN; + interpreter.header.e_entry = 0x0; + interpreter.UpdateOffsets(); + + // The first segment really needs to start at 0 for a normal PIE binary, and + // thus includes the headers. + uint64_t const offset = interpreter.phdrs[1].p_offset; + interpreter.phdrs[1].p_offset = 0x0; + interpreter.phdrs[1].p_vaddr = 0x0; + interpreter.phdrs[1].p_filesz += offset; + interpreter.phdrs[1].p_memsz += offset; + + TempPath interpreter_file = + ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter)); + + ElfBinary<64> binary = StandardElf(); + + // NUL-terminated path in the PT_INTERP segment. + const std::string path = interpreter_file.path(); + std::vector<char> segment(path.begin(), path.end()); + segment.push_back(0); + binary.AddInterpreter(segment); + + binary.UpdateOffsets(); + + TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary)); + + ASSERT_THAT(chmod(interpreter_file.path().c_str(), 0644), SyscallSucceeds()); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(interpreter_file.path(), {interpreter_file.path()}, {}, + &child, &execve_errno)); + EXPECT_EQ(execve_errno, EACCES); +} + +// Execute a basic interpreter script. +TEST(InterpreterScriptTest, Execute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Whitespace after #!. +TEST(InterpreterScriptTest, Whitespace) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#! \t \t", binary.path()), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Interpreter script is missing execute permission. +TEST(InterpreterScriptTest, InterpreterScriptNoExecute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0644)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, EACCES); +} + +// Binary interpreter script refers to is missing execute permission. +TEST(InterpreterScriptTest, BinaryNoExecute) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + ASSERT_THAT(chmod(binary.path().c_str(), 0644), SyscallSucceeds()); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, EACCES); +} + +// Linux will load interpreter scripts five levels deep, but no more. +TEST(InterpreterScriptTest, MaxRecursion) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", binary.path()), 0755)); + TempPath script2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script1.path()), 0755)); + TempPath script3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script2.path()), 0755)); + TempPath script4 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script3.path()), 0755)); + TempPath script5 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script4.path()), 0755)); + TempPath script6 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + "/tmp", absl::StrCat("#!", script5.path()), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script6.path(), {script6.path()}, {}, &child, &execve_errno)); + // Too many levels of recursion. + EXPECT_EQ(execve_errno, ELOOP); + + // The next level up is OK. + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script5.path(), {script5.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Interpreter script with a relative path. +TEST(InterpreterScriptTest, RelativePath) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); + auto binary_relative = + ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, binary.path())); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary_relative), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Interpreter script with .. in a path component. +TEST(InterpreterScriptTest, UncleanPath) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!/tmp/../", binary.path()), + 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Passed interpreter script is a symlink. +TEST(InterpreterScriptTest, Symlink) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + // Use /tmp explicitly to ensure the path is short enough. + TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf)); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755)); + + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), script.path())); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(link.path(), {link.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + EXPECT_NO_ERRNO(WaitStopped(child)); +} + +// Interpreter script points to a symlink loop. +TEST(InterpreterScriptTest, SymlinkLoop) { + std::string const link1 = NewTempAbsPathInDir("/tmp"); + std::string const link2 = NewTempAbsPathInDir("/tmp"); + + ASSERT_THAT(symlink(link2.c_str(), link1.c_str()), SyscallSucceeds()); + auto remove_link1 = Cleanup( + [&link1] { EXPECT_THAT(unlink(link1.c_str()), SyscallSucceeds()); }); + + ASSERT_THAT(symlink(link1.c_str(), link2.c_str()), SyscallSucceeds()); + auto remove_link2 = Cleanup( + [&link2] { EXPECT_THAT(unlink(link2.c_str()), SyscallSucceeds()); }); + + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::StrCat("#!", link1), 0755)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ELOOP); +} + +// Binary is a symlink loop. +TEST(ExecveTest, SymlinkLoop) { + std::string const link1 = NewTempAbsPathInDir("/tmp"); + std::string const link2 = NewTempAbsPathInDir("/tmp"); + + ASSERT_THAT(symlink(link2.c_str(), link1.c_str()), SyscallSucceeds()); + auto remove_link = Cleanup( + [&link1] { EXPECT_THAT(unlink(link1.c_str()), SyscallSucceeds()); }); + + ASSERT_THAT(symlink(link1.c_str(), link2.c_str()), SyscallSucceeds()); + auto remove_link2 = Cleanup( + [&link2] { EXPECT_THAT(unlink(link2.c_str()), SyscallSucceeds()); }); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(link1, {link1}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ELOOP); +} + +// Binary is a directory. +TEST(ExecveTest, Directory) { + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/tmp", {"/tmp"}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, EACCES); +} + +// Pass a valid binary as a directory (extra / on the end). +TEST(ExecveTest, BinaryAsDirectory) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + std::string const path = absl::StrCat(file.path(), "/"); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(path, {path}, {}, &child, &execve_errno)); + EXPECT_EQ(execve_errno, ENOTDIR); +} + +// The initial brk value is after the page at the end of the binary. +TEST(ExecveTest, BrkAfterBinary) { + ElfBinary<64> elf = StandardElf(); + elf.UpdateOffsets(); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf)); + + pid_t child; + int execve_errno; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno)); + ASSERT_EQ(execve_errno, 0); + + // Ensure it made it to SIGSTOP. + ASSERT_NO_ERRNO(WaitStopped(child)); + + struct user_regs_struct regs; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + + // RIP is just beyond the final syscall instruction. Rewind to execute a brk + // syscall. + regs.rip -= kSyscallSize; + regs.rax = __NR_brk; + regs.rdi = 0; + ASSERT_THAT(ptrace(PTRACE_SETREGS, child, 0, ®s), SyscallSucceeds()); + + // Resume the child, waiting for syscall entry. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child, 0, 0), SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + ASSERT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << "status = " << status; + + // Execute the syscall. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child, 0, 0), SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + ASSERT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << "status = " << status; + + ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + + // brk is after the text page. + // + // The kernel does brk randomization, so we can't be sure what the exact + // address will be, but it is always beyond the final page in the binary. + // i.e., it does not start immediately after memsz in the middle of a page. + // Userspace may expect to use that space. + EXPECT_GE(regs.rax, 0x41000); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/exec_proc_exe_workload.cc b/test/syscalls/linux/exec_proc_exe_workload.cc new file mode 100644 index 000000000..b9a4ac749 --- /dev/null +++ b/test/syscalls/linux/exec_proc_exe_workload.cc @@ -0,0 +1,35 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdlib.h> +#include <unistd.h> + +#include <iostream> + +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" + +int main(int argc, char** argv, char** envp) { + std::string exe = gvisor::testing::ProcessExePath(getpid()).ValueOrDie(); + if (exe[0] != '/') { + std::cerr << "relative path: " << exe << std::endl; + exit(1); + } + if (exe.find(argv[1]) != std::string::npos) { + std::cerr << "matching path: " << exe << std::endl; + exit(1); + } + + return 0; +} diff --git a/test/syscalls/linux/exec_state_workload.cc b/test/syscalls/linux/exec_state_workload.cc new file mode 100644 index 000000000..b66e22565 --- /dev/null +++ b/test/syscalls/linux/exec_state_workload.cc @@ -0,0 +1,202 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <sys/time.h> +#include <iostream> +#include <ostream> +#include <string> + +// Pretty-print a sigset_t. +std::ostream& operator<<(std::ostream& out, const sigset_t& s) { + out << "{ "; + + for (int i = 0; i < NSIG; i++) { + if (sigismember(&s, i)) { + out << i << " "; + } + } + + out << "}"; + return out; +} + +// Verify that the signo handler is handler. +int CheckSigHandler(uint32_t signo, uintptr_t handler) { + struct sigaction sa; + int ret = sigaction(signo, nullptr, &sa); + if (ret < 0) { + perror("sigaction"); + return 1; + } + + if (reinterpret_cast<void (*)(int)>(handler) != sa.sa_handler) { + std::cerr << "signo " << signo << " handler got: " << sa.sa_handler + << " expected: " << std::hex << handler; + return 1; + } + return 0; +} + +// Verify that the signo is blocked. +int CheckSigBlocked(uint32_t signo) { + sigset_t s; + int ret = sigprocmask(SIG_SETMASK, nullptr, &s); + if (ret < 0) { + perror("sigprocmask"); + return 1; + } + + if (!sigismember(&s, signo)) { + std::cerr << "signal " << signo << " not blocked in signal mask: " << s + << std::endl; + return 1; + } + return 0; +} + +// Verify that the itimer is enabled. +int CheckItimerEnabled(uint32_t timer) { + struct itimerval itv; + int ret = getitimer(timer, &itv); + if (ret < 0) { + perror("getitimer"); + return 1; + } + + if (!itv.it_value.tv_sec && !itv.it_value.tv_usec && + !itv.it_interval.tv_sec && !itv.it_interval.tv_usec) { + std::cerr << "timer " << timer + << " not enabled. value sec: " << itv.it_value.tv_sec + << " usec: " << itv.it_value.tv_usec + << " interval sec: " << itv.it_interval.tv_sec + << " usec: " << itv.it_interval.tv_usec << std::endl; + return 1; + } + return 0; +} + +int PrintExecFn() { + unsigned long execfn = getauxval(AT_EXECFN); + if (!execfn) { + std::cerr << "AT_EXECFN missing" << std::endl; + return 1; + } + + std::cerr << reinterpret_cast<const char*>(execfn) << std::endl; + return 0; +} + +int PrintExecName() { + const size_t name_length = 20; + char name[name_length] = {0}; + if (prctl(PR_GET_NAME, name) < 0) { + std::cerr << "prctl(PR_GET_NAME) failed" << std::endl; + return 1; + } + + std::cerr << name << std::endl; + return 0; +} + +void usage(const std::string& prog) { + std::cerr << "usage:\n" + << "\t" << prog << " CheckSigHandler <signo> <handler addr (hex)>\n" + << "\t" << prog << " CheckSigBlocked <signo>\n" + << "\t" << prog << " CheckTimerDisabled <timer>\n" + << "\t" << prog << " PrintExecFn\n" + << "\t" << prog << " PrintExecName" << std::endl; +} + +int main(int argc, char** argv) { + if (argc < 2) { + usage(argv[0]); + return 1; + } + + std::string func(argv[1]); + + if (func == "CheckSigHandler") { + if (argc != 4) { + usage(argv[0]); + return 1; + } + + char* end; + uint32_t signo = strtoul(argv[2], &end, 10); + if (end == argv[2]) { + std::cerr << "invalid signo: " << argv[2] << std::endl; + return 1; + } + + uintptr_t handler = strtoull(argv[3], &end, 16); + if (end == argv[3]) { + std::cerr << "invalid handler: " << std::hex << argv[3] << std::endl; + return 1; + } + + return CheckSigHandler(signo, handler); + } + + if (func == "CheckSigBlocked") { + if (argc != 3) { + usage(argv[0]); + return 1; + } + + char* end; + uint32_t signo = strtoul(argv[2], &end, 10); + if (end == argv[2]) { + std::cerr << "invalid signo: " << argv[2] << std::endl; + return 1; + } + + return CheckSigBlocked(signo); + } + + if (func == "CheckItimerEnabled") { + if (argc != 3) { + usage(argv[0]); + return 1; + } + + char* end; + uint32_t timer = strtoul(argv[2], &end, 10); + if (end == argv[2]) { + std::cerr << "invalid signo: " << argv[2] << std::endl; + return 1; + } + + return CheckItimerEnabled(timer); + } + + if (func == "PrintExecFn") { + // N.B. This will be called as an interpreter script, with the script passed + // as the third argument. We don't care about that script. + return PrintExecFn(); + } + + if (func == "PrintExecName") { + // N.B. This may be called as an interpreter script like PrintExecFn. + return PrintExecName(); + } + + std::cerr << "Invalid function: " << func << std::endl; + return 1; +} diff --git a/test/syscalls/linux/exit.cc b/test/syscalls/linux/exit.cc new file mode 100644 index 000000000..7246a7b3b --- /dev/null +++ b/test/syscalls/linux/exit.cc @@ -0,0 +1,77 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/wait.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void TestExit(int code) { + pid_t pid = fork(); + if (pid == 0) { + _exit(code); + } + + ASSERT_THAT(pid, SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == code) << status; +} + +TEST(ExitTest, Success) { TestExit(0); } + +TEST(ExitTest, Failure) { TestExit(1); } + +// This test ensures that a process's file descriptors are closed when it calls +// exit(). In order to test this, the parent tries to read from a pipe whose +// write end is held by the child. While the read is blocking, the child exits, +// which should cause the parent to read 0 bytes due to EOF. +TEST(ExitTest, CloseFds) { + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + FileDescriptor read_fd(pipe_fds[0]); + FileDescriptor write_fd(pipe_fds[1]); + + pid_t pid = fork(); + if (pid == 0) { + read_fd.reset(); + + SleepSafe(absl::Seconds(10)); + + _exit(0); + } + + EXPECT_THAT(pid, SyscallSucceeds()); + + write_fd.reset(); + + char buf[10]; + EXPECT_THAT(ReadFd(read_fd.get(), buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/exit_script.sh b/test/syscalls/linux/exit_script.sh new file mode 100755 index 000000000..f014fcf99 --- /dev/null +++ b/test/syscalls/linux/exit_script.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ $# -ne 1 ]; then + echo "Usage: $0 exit_code" + exit 255 +fi + +exit $1 diff --git a/test/syscalls/linux/fadvise64.cc b/test/syscalls/linux/fadvise64.cc new file mode 100644 index 000000000..041e8b7b6 --- /dev/null +++ b/test/syscalls/linux/fadvise64.cc @@ -0,0 +1,72 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <syscall.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +TEST(FAdvise64Test, Basic) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + // fadvise64 is noop in gVisor, so just test that it succeeds. + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_NORMAL), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_RANDOM), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_SEQUENTIAL), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_WILLNEED), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_DONTNEED), + SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_NOREUSE), + SyscallSucceeds()); +} + +TEST(FAdvise64Test, InvalidArgs) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + // Note: offset is allowed to be negative. + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, static_cast<off_t>(-1), + POSIX_FADV_NORMAL), + SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, 12345), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(FAdvise64Test, NoPipes) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor read(fds[0]); + const FileDescriptor write(fds[1]); + + ASSERT_THAT(syscall(__NR_fadvise64, read.get(), 0, 10, POSIX_FADV_NORMAL), + SyscallFailsWithErrno(ESPIPE)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc new file mode 100644 index 000000000..53aedd4e4 --- /dev/null +++ b/test/syscalls/linux/fallocate.cc @@ -0,0 +1,57 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/eventfd.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// These tests are very rudimentary because fallocate is not +// implemented. We just want to make sure the expected error codes are +// returned. + +TEST(FallocateTest, NotImplemented) { + auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR)); + + // Test that a completely unassigned fallocate mode returns EOPNOTSUPP. + ASSERT_THAT(fallocate(fd.get(), 0x80, 0, 32768), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +TEST(FallocateTest, BadOffset) { + auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR)); + ASSERT_THAT(fallocate(fd.get(), 0, -1, 32768), SyscallFailsWithErrno(EINVAL)); +} + +TEST(FallocateTest, BadLength) { + auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR)); + ASSERT_THAT(fallocate(fd.get(), 0, 0, -1), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fault.cc b/test/syscalls/linux/fault.cc new file mode 100644 index 000000000..cfa7d0d1f --- /dev/null +++ b/test/syscalls/linux/fault.cc @@ -0,0 +1,71 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define _GNU_SOURCE 1 +#include <signal.h> +#include <ucontext.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +__attribute__((noinline)) void Fault(void) { + volatile int* foo = nullptr; + *foo = 0; +} + +int GetPcFromUcontext(ucontext_t* uc, uintptr_t* pc) { +#if defined(__x86_64__) + *pc = uc->uc_mcontext.gregs[REG_RIP]; + return 1; +#elif defined(__i386__) + *pc = uc->uc_mcontext.gregs[REG_EIP]; + return 1; +#else + return 0; +#endif +} + +void sigact_handler(int sig, siginfo_t* siginfo, void* context) { + uintptr_t pc; + if (GetPcFromUcontext(reinterpret_cast<ucontext_t*>(context), &pc)) { + /* Expect Fault() to be at most 64 bytes in size. */ + uintptr_t fault_addr = reinterpret_cast<uintptr_t>(&Fault); + EXPECT_GE(pc, fault_addr); + EXPECT_LT(pc, fault_addr + 64); + exit(0); + } +} + +TEST(FaultTest, InRange) { + // Reset the signal handler to do nothing so that it doesn't freak out + // the test runner when we fire an alarm. + struct sigaction sa = {}; + sa.sa_sigaction = sigact_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + ASSERT_THAT(sigaction(SIGSEGV, &sa, nullptr), SyscallSucceeds()); + + Fault(); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fchdir.cc b/test/syscalls/linux/fchdir.cc new file mode 100644 index 000000000..2b13e36c3 --- /dev/null +++ b/test/syscalls/linux/fchdir.cc @@ -0,0 +1,77 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(FchdirTest, Success) { + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + int fd; + ASSERT_THAT(fd = open(temp_dir.path().c_str(), O_DIRECTORY | O_RDONLY), + SyscallSucceeds()); + + EXPECT_THAT(fchdir(fd), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + // Change CWD to a permanent location as temp dirs will be cleaned up. + EXPECT_THAT(chdir("/"), SyscallSucceeds()); +} + +TEST(FchdirTest, InvalidFD) { + EXPECT_THAT(fchdir(-1), SyscallFailsWithErrno(EBADF)); +} + +TEST(FchdirTest, PermissionDenied) { + // Drop capabilities that allow us to override directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */)); + + int fd; + ASSERT_THAT(fd = open(temp_dir.path().c_str(), O_DIRECTORY | O_RDONLY), + SyscallSucceeds()); + + EXPECT_THAT(fchdir(fd), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(FchdirTest, NotDir) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + int fd; + ASSERT_THAT(fd = open(temp_file.path().c_str(), O_CREAT | O_RDONLY, 0777), + SyscallSucceeds()); + + EXPECT_THAT(fchdir(fd), SyscallFailsWithErrno(ENOTDIR)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc new file mode 100644 index 000000000..355334bfa --- /dev/null +++ b/test/syscalls/linux/fcntl.cc @@ -0,0 +1,978 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <signal.h> +#include <sys/eventfd.h> +#include <syscall.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/cleanup.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/timer_util.h" + +DEFINE_string(child_setlock_on, "", + "Contains the path to try to set a file lock on."); +DEFINE_bool(child_setlock_write, false, + "Whether to set a writable lock (otherwise readable)"); +DEFINE_bool(blocking, false, + "Whether to set a blocking lock (otherwise non-blocking)."); +DEFINE_bool(retry_eintr, false, "Whether to retry in the subprocess on EINTR."); +DEFINE_uint64(child_setlock_start, 0, "The value of struct flock start"); +DEFINE_uint64(child_setlock_len, 0, "The value of struct flock len"); +DEFINE_int32(socket_fd, -1, + "A socket to use for communicating more state back " + "to the parent."); + +namespace gvisor { +namespace testing { + +// O_LARGEFILE as defined by Linux. glibc tries to be clever by setting it to 0 +// because "it isn't needed", even though Linux can return it via F_GETFL. +constexpr int kOLargeFile = 00100000; + +class FcntlLockTest : public ::testing::Test { + public: + void SetUp() override { + // Let's make a socket pair. + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, fds_), SyscallSucceeds()); + } + + void TearDown() override { + EXPECT_THAT(close(fds_[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds_[1]), SyscallSucceeds()); + } + + int64_t GetSubprocessFcntlTimeInUsec() { + int64_t ret = 0; + EXPECT_THAT(ReadFd(fds_[0], reinterpret_cast<void*>(&ret), sizeof(ret)), + SyscallSucceedsWithValue(sizeof(ret))); + return ret; + } + + // The first fd will remain with the process creating the subprocess + // and the second will go to the subprocess. + int fds_[2] = {}; +}; + +namespace { + +PosixErrorOr<Cleanup> SubprocessLock(std::string const& path, bool for_write, + bool blocking, bool retry_eintr, int fd, + off_t start, off_t length, pid_t* child) { + std::vector<std::string> args = { + "/proc/self/exe", "--child_setlock_on", path, + "--child_setlock_start", absl::StrCat(start), "--child_setlock_len", + absl::StrCat(length), "--socket_fd", absl::StrCat(fd)}; + + if (for_write) { + args.push_back("--child_setlock_write"); + } + + if (blocking) { + args.push_back("--blocking"); + } + + if (retry_eintr) { + args.push_back("--retry_eintr"); + } + + int execve_errno = 0; + ASSIGN_OR_RETURN_ERRNO( + auto cleanup, + ForkAndExec("/proc/self/exe", ExecveArray(args.begin(), args.end()), {}, + nullptr, child, &execve_errno)); + + if (execve_errno != 0) { + return PosixError(execve_errno, "execve"); + } + + return std::move(cleanup); +} + +PosixErrorOr<FileDescriptor> Eventfd(int count, int flags) { + int efd = eventfd(count, flags); + if (efd < 0) { + return PosixError(errno, "Eventfd"); + } + return FileDescriptor(efd); +} + +TEST(FcntlTest, SetCloExec) { + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Eventfd(0, 0)); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0)); + + // Set the FD_CLOEXEC flag. + ASSERT_THAT(fcntl(fd.get(), F_SETFD, FD_CLOEXEC), SyscallSucceeds()); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); +} + +TEST(FcntlTest, ClearCloExec) { + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag set. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Eventfd(0, EFD_CLOEXEC)); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); + + // Clear the FD_CLOEXEC flag. + ASSERT_THAT(fcntl(fd.get(), F_SETFD, 0), SyscallSucceeds()); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0)); +} + +TEST(FcntlTest, IndependentDescriptorFlags) { + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Eventfd(0, 0)); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0)); + + // Duplicate the descriptor. Ensure that it also doesn't have FD_CLOEXEC. + FileDescriptor newfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup()); + ASSERT_THAT(fcntl(newfd.get(), F_GETFD), SyscallSucceedsWithValue(0)); + + // Set FD_CLOEXEC on the first FD. + ASSERT_THAT(fcntl(fd.get(), F_SETFD, FD_CLOEXEC), SyscallSucceeds()); + ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); + + // Ensure that the second FD is unaffected by the change on the first. + ASSERT_THAT(fcntl(newfd.get(), F_GETFD), SyscallSucceedsWithValue(0)); +} + +// All file description flags passed to open appear in F_GETFL. +TEST(FcntlTest, GetAllFlags) { + TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + int flags = O_RDWR | O_DIRECT | O_SYNC | O_NONBLOCK | O_APPEND; + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), flags)); + + // Linux forces O_LARGEFILE on all 64-bit kernels and gVisor's is 64-bit. + int expected = flags | kOLargeFile; + + int rflags; + EXPECT_THAT(rflags = fcntl(fd.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(rflags, expected); +} + +TEST(FcntlTest, SetFlags) { + TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), 0)); + + int const flags = O_RDWR | O_DIRECT | O_SYNC | O_NONBLOCK | O_APPEND; + EXPECT_THAT(fcntl(fd.get(), F_SETFL, flags), SyscallSucceeds()); + + // Can't set O_RDWR or O_SYNC. + // Linux forces O_LARGEFILE on all 64-bit kernels and gVisor's is 64-bit. + int expected = O_DIRECT | O_NONBLOCK | O_APPEND | kOLargeFile; + + int rflags; + EXPECT_THAT(rflags = fcntl(fd.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(rflags, expected); +} + +TEST_F(FcntlLockTest, SetLockBadFd) { + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // len 0 has a special meaning: lock all bytes despite how + // large the file grows. + fl.l_len = 0; + EXPECT_THAT(fcntl(-1, F_SETLK, &fl), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(FcntlLockTest, SetLockPipe) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd, but doesn't matter, we expect this to fail. + fl.l_len = 0; + EXPECT_THAT(fcntl(fds[0], F_SETLK, &fl), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(close(fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(FcntlLockTest, SetLockDir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); +} + +TEST_F(FcntlLockTest, SetLockBadOpenFlagsWrite) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY, 0666)); + + struct flock fl0; + fl0.l_type = F_WRLCK; + fl0.l_whence = SEEK_SET; + fl0.l_start = 0; + // Same as SetLockBadFd. + fl0.l_len = 0; + + // Expect that setting a write lock using a read only file descriptor + // won't work. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(FcntlLockTest, SetLockBadOpenFlagsRead) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY, 0666)); + + struct flock fl1; + fl1.l_type = F_RDLCK; + fl1.l_whence = SEEK_SET; + fl1.l_start = 0; + // Same as SetLockBadFd. + fl1.l_len = 0; + + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl1), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(FcntlLockTest, SetLockUnlockOnNothing) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); +} + +TEST_F(FcntlLockTest, SetWriteLockSingleProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd0 = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + EXPECT_THAT(fcntl(fd0.get(), F_SETLK, &fl), SyscallSucceeds()); + // Expect to be able to take the same lock on the same fd no problem. + EXPECT_THAT(fcntl(fd0.get(), F_SETLK, &fl), SyscallSucceeds()); + + FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + // Expect to be able to take the same lock from a different fd but for + // the same process. + EXPECT_THAT(fcntl(fd1.get(), F_SETLK, &fl), SyscallSucceeds()); +} + +TEST_F(FcntlLockTest, SetReadLockMultiProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // spawn a child process to take a read lock on the same file. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetReadThenWriteLockMultiProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Assert that another process trying to lock on the same file will fail + // with EAGAIN. It's important that we keep the fd above open so that + // that the other process will contend with the lock. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + // Close the fd: we want to test that another process can acquire the + // lock after this point. + fd.reset(); + // Assert that another process can now acquire the lock. + + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetWriteThenReadLockMultiProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + // Same as SetReadThenWriteLockMultiProc. + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Same as SetReadThenWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Same as SetReadThenWriteLockMultiProc. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + // Same as SetReadThenWriteLockMultiProc. + fd.reset(); // Close the fd. + + // Same as SetReadThenWriteLockMultiProc. + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetWriteLockMultiProc) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + // Same as SetReadThenWriteLockMultiProc. + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Same as SetReadWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Same as SetReadWriteLockMultiProc. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + fd.reset(); // Close the FD. + // Same as SetReadWriteLockMultiProc. + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockIsRegional) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 4096; + + // Same as SetReadWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Same as SetReadWriteLockMultiProc. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_len, 0, &child_pid)); + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockUpgradeDowngrade) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Same as SetReadWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Upgrade to a write lock. This will prevent anyone else from taking + // the lock. + fl.l_type = F_WRLCK; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Same as SetReadWriteLockMultiProc., + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + // Downgrade back to a read lock. + fl.l_type = F_RDLCK; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Do the same stint as before, but this time it should succeed. + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), false /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockDroppedOnClose) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + // While somewhat surprising, obtaining another fd to the same file and + // then closing it in this process drops *all* locks. + FileDescriptor other_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + // Same as SetReadThenWriteLockMultiProc. + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Same as SetReadWriteLockMultiProc. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + other_fd.reset(); // Close. + + // Expect to be able to get the lock, given that the close above dropped it. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(file.path(), true /* write lock */, + false /* nonblocking */, false /* no eintr retry */, + -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockUnlock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + // Setup two regional locks with different permissions. + struct flock fl0; + fl0.l_type = F_WRLCK; + fl0.l_whence = SEEK_SET; + fl0.l_start = 0; + fl0.l_len = 4096; + + struct flock fl1; + fl1.l_type = F_RDLCK; + fl1.l_whence = SEEK_SET; + fl1.l_start = 4096; + // Same as SetLockBadFd. + fl1.l_len = 0; + + // Set both region locks. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallSucceeds()); + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl1), SyscallSucceeds()); + + // Another process should fail to take a read lock on the entire file + // due to the regional write lock. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), false /* write lock */, false /* nonblocking */, + false /* no eintr retry */, -1 /* no socket fd */, 0, 0, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; + + // Then only unlock the writable one. This should ensure that other + // processes can take any read lock that it wants. + fl0.l_type = F_UNLCK; + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallSucceeds()); + + // Another process should now succeed to get a read lock on the entire file. + child_pid = 0; + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), false /* write lock */, false /* nonblocking */, + false /* no eintr retry */, -1 /* no socket fd */, 0, 0, &child_pid)); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST_F(FcntlLockTest, SetLockAcrossRename) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + // Setup two regional locks with different permissions. + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + // Same as SetLockBadFd. + fl.l_len = 0; + + // Set the region lock. + EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + + // Rename the file to someplace nearby. + std::string const newpath = NewTempAbsPath(); + EXPECT_THAT(rename(file.path().c_str(), newpath.c_str()), SyscallSucceeds()); + + // Another process should fail to take a read lock on the renamed file + // since we still have an open handle to the inode. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + SubprocessLock(newpath, false /* write lock */, false /* nonblocking */, + false /* no eintr retry */, -1 /* no socket fd */, + fl.l_start, fl.l_len, &child_pid)); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN) + << "Exited with code: " << status; +} + +// NOTE: The blocking tests below aren't perfect. It's hard to assert exactly +// what the kernel did while handling a syscall. These tests are timing based +// because there really isn't any other reasonable way to assert that correct +// blocking behavior happened. + +// This test will verify that blocking works as expected when another process +// holds a write lock when obtaining a write lock. This test will hold the lock +// for some amount of time and then wait for the second process to send over the +// socket_fd the amount of time it was blocked for before the lock succeeded. +TEST_F(FcntlLockTest, SetWriteLockThenBlockingWriteLock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + // Take the write lock. + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Attempt to take the read lock in a sub process. This will immediately block + // so we will release our lock after some amount of time and then assert the + // amount of time the other process was blocked for. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), true /* write lock */, true /* Blocking Lock */, + true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */, + fl.l_start, fl.l_len, &child_pid)); + + // We will wait kHoldLockForSec before we release our lock allowing the + // subprocess to obtain it. + constexpr absl::Duration kHoldLockFor = absl::Seconds(5); + const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); + + absl::SleepFor(kHoldLockFor); + + // Unlock our write lock. + fl.l_type = F_UNLCK; + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Read the blocked time from the subprocess socket. + int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); + + // We must have been waiting at least kMinBlockTime. + EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec); + + // The FCNTL write lock must always succeed as it will simply block until it + // can obtain the lock. + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +// This test will veirfy that blocking works as expected when another process +// holds a read lock when obtaining a write lock. This test will hold the lock +// for some amount of time and then wait for the second process to send over the +// socket_fd the amount of time it was blocked for before the lock succeeded. +TEST_F(FcntlLockTest, SetReadLockThenBlockingWriteLock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + // Take the write lock. + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Attempt to take the read lock in a sub process. This will immediately block + // so we will release our lock after some amount of time and then assert the + // amount of time the other process was blocked for. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), true /* write lock */, true /* Blocking Lock */, + true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */, + fl.l_start, fl.l_len, &child_pid)); + + // We will wait kHoldLockForSec before we release our lock allowing the + // subprocess to obtain it. + constexpr absl::Duration kHoldLockFor = absl::Seconds(5); + + const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); + + absl::SleepFor(kHoldLockFor); + + // Unlock our READ lock. + fl.l_type = F_UNLCK; + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Read the blocked time from the subprocess socket. + int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); + + // We must have been waiting at least kMinBlockTime. + EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec); + + // The FCNTL write lock must always succeed as it will simply block until it + // can obtain the lock. + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +// This test will veirfy that blocking works as expected when another process +// holds a write lock when obtaining a read lock. This test will hold the lock +// for some amount of time and then wait for the second process to send over the +// socket_fd the amount of time it was blocked for before the lock succeeded. +TEST_F(FcntlLockTest, SetWriteLockThenBlockingReadLock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + // Take the write lock. + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Attempt to take the read lock in a sub process. This will immediately block + // so we will release our lock after some amount of time and then assert the + // amount of time the other process was blocked for. + pid_t child_pid = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), false /* read lock */, true /* Blocking Lock */, + true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */, + fl.l_start, fl.l_len, &child_pid)); + + // We will wait kHoldLockForSec before we release our lock allowing the + // subprocess to obtain it. + constexpr absl::Duration kHoldLockFor = absl::Seconds(5); + + const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); + + absl::SleepFor(kHoldLockFor); + + // Unlock our write lock. + fl.l_type = F_UNLCK; + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Read the blocked time from the subprocess socket. + int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); + + // We must have been waiting at least kMinBlockTime. + EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec); + + // The FCNTL read lock must always succeed as it will simply block until it + // can obtain the lock. + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +// This test will verify that when one process only holds a read lock that +// another will not block while obtaining a read lock when F_SETLKW is used. +TEST_F(FcntlLockTest, SetReadLockThenBlockingReadLock) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666)); + + struct flock fl; + fl.l_type = F_RDLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + // Take the READ lock. + ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); + + // Attempt to take the read lock in a sub process. Since multiple processes + // can hold a read lock this should immediately return without blocking + // even though we used F_SETLKW in the subprocess. + pid_t child_pid = 0; + auto sp = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock( + file.path(), false /* read lock */, true /* Blocking Lock */, + true /* Retry on EINTR */, -1 /* No fd, should not block */, fl.l_start, + fl.l_len, &child_pid)); + + // We never release the lock and the subprocess should still obtain it without + // blocking for any period of time. + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +TEST(FcntlTest, GetO_ASYNC) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int flag_fl = -1; + ASSERT_THAT(flag_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(flag_fl & O_ASYNC, 0); + + int flag_fd = -1; + ASSERT_THAT(flag_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + EXPECT_EQ(flag_fd & O_ASYNC, 0); +} + +TEST(FcntlTest, SetFlO_ASYNC) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int before_fl = -1; + ASSERT_THAT(before_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + + int before_fd = -1; + ASSERT_THAT(before_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + + ASSERT_THAT(fcntl(s.get(), F_SETFL, before_fl | O_ASYNC), SyscallSucceeds()); + + int after_fl = -1; + ASSERT_THAT(after_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(after_fl, before_fl | O_ASYNC); + + int after_fd = -1; + ASSERT_THAT(after_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + EXPECT_EQ(after_fd, before_fd); +} + +TEST(FcntlTest, SetFdO_ASYNC) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int before_fl = -1; + ASSERT_THAT(before_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + + int before_fd = -1; + ASSERT_THAT(before_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + + ASSERT_THAT(fcntl(s.get(), F_SETFD, before_fd | O_ASYNC), SyscallSucceeds()); + + int after_fl = -1; + ASSERT_THAT(after_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(after_fl, before_fl); + + int after_fd = -1; + ASSERT_THAT(after_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds()); + EXPECT_EQ(after_fd, before_fd); +} + +TEST(FcntlTest, DupAfterO_ASYNC) { + FileDescriptor s1 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int before = -1; + ASSERT_THAT(before = fcntl(s1.get(), F_GETFL), SyscallSucceeds()); + + ASSERT_THAT(fcntl(s1.get(), F_SETFL, before | O_ASYNC), SyscallSucceeds()); + + FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(s1.Dup()); + + int after = -1; + ASSERT_THAT(after = fcntl(fd2.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(after & O_ASYNC, O_ASYNC); +} + +TEST(FcntlTest, GetOwn) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (!FLAGS_child_setlock_on.empty()) { + int socket_fd = FLAGS_socket_fd; + int fd = open(FLAGS_child_setlock_on.c_str(), O_RDWR, 0666); + if (fd == -1 && errno != 0) { + int err = errno; + std::cerr << "CHILD open " << FLAGS_child_setlock_on << " failed " << err + << std::endl; + exit(err); + } + + struct flock fl; + if (FLAGS_child_setlock_write) { + fl.l_type = F_WRLCK; + } else { + fl.l_type = F_RDLCK; + } + fl.l_whence = SEEK_SET; + fl.l_start = FLAGS_child_setlock_start; + fl.l_len = FLAGS_child_setlock_len; + + // Test the fcntl, no need to log, the error is unambiguously + // from fcntl at this point. + int err = 0; + int ret = 0; + + gvisor::testing::MonotonicTimer timer; + timer.Start(); + do { + ret = fcntl(fd, FLAGS_blocking ? F_SETLKW : F_SETLK, &fl); + } while (FLAGS_retry_eintr && ret == -1 && errno == EINTR); + auto usec = absl::ToInt64Microseconds(timer.Duration()); + + if (ret == -1 && errno != 0) { + err = errno; + } + + // If there is a socket fd let's send back the time in microseconds it took + // to execute this syscall. + if (socket_fd != -1) { + gvisor::testing::WriteFd(socket_fd, reinterpret_cast<void*>(&usec), + sizeof(usec)); + close(socket_fd); + } + + close(fd); + exit(err); + } + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h new file mode 100644 index 000000000..19c9a5053 --- /dev/null +++ b/test/syscalls/linux/file_base.h @@ -0,0 +1,206 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_FILE_BASE_H_ +#define GVISOR_TEST_SYSCALLS_FILE_BASE_H_ + +#include <arpa/inet.h> +#include <errno.h> +#include <fcntl.h> +#include <netinet/in.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <unistd.h> +#include <cstring> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +class FileTest : public ::testing::Test { + public: + void SetUp() override { + test_pipe_[0] = -1; + test_pipe_[1] = -1; + + test_file_name_ = NewTempAbsPath(); + test_file_fd_ = ASSERT_NO_ERRNO_AND_VALUE( + Open(test_file_name_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)); + + // FIXME: enable when mknod syscall is supported. + // test_fifo_name_ = NewTempAbsPath(); + // ASSERT_THAT(mknod(test_fifo_name_.c_str()), S_IFIFO|0644, 0, + // SyscallSucceeds()); + // ASSERT_THAT(test_fifo_[1] = open(test_fifo_name_.c_str(), + // O_WRONLY), + // SyscallSucceeds()); + // ASSERT_THAT(test_fifo_[0] = open(test_fifo_name_.c_str(), + // O_RDONLY), + // SyscallSucceeds()); + + ASSERT_THAT(pipe(test_pipe_), SyscallSucceeds()); + ASSERT_THAT(fcntl(test_pipe_[0], F_SETFL, O_NONBLOCK), SyscallSucceeds()); + } + + // CloseFile will allow the test to manually close the file descriptor. + void CloseFile() { test_file_fd_.reset(); } + + // UnlinkFile will allow the test to manually unlink the file. + void UnlinkFile() { + if (!test_file_name_.empty()) { + EXPECT_THAT(unlink(test_file_name_.c_str()), SyscallSucceeds()); + test_file_name_.clear(); + } + } + + // ClosePipes will allow the test to manually close the pipes. + void ClosePipes() { + if (test_pipe_[0] > 0) { + EXPECT_THAT(close(test_pipe_[0]), SyscallSucceeds()); + } + + if (test_pipe_[1] > 0) { + EXPECT_THAT(close(test_pipe_[1]), SyscallSucceeds()); + } + + test_pipe_[0] = -1; + test_pipe_[1] = -1; + } + + void TearDown() override { + CloseFile(); + UnlinkFile(); + ClosePipes(); + + // FIXME: enable when mknod syscall is supported. + // close(test_fifo_[0]); + // close(test_fifo_[1]); + // unlink(test_fifo_name_.c_str()); + } + + std::string test_file_name_; + std::string test_fifo_name_; + FileDescriptor test_file_fd_; + + int test_fifo_[2]; + int test_pipe_[2]; +}; + +class SocketTest : public ::testing::Test { + public: + void SetUp() override { + test_unix_stream_socket_[0] = -1; + test_unix_stream_socket_[1] = -1; + test_unix_dgram_socket_[0] = -1; + test_unix_dgram_socket_[1] = -1; + test_unix_seqpacket_socket_[0] = -1; + test_unix_seqpacket_socket_[1] = -1; + test_tcp_socket_[0] = -1; + test_tcp_socket_[1] = -1; + + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, test_unix_stream_socket_), + SyscallSucceeds()); + ASSERT_THAT(fcntl(test_unix_stream_socket_[0], F_SETFL, O_NONBLOCK), + SyscallSucceeds()); + ASSERT_THAT(socketpair(AF_UNIX, SOCK_DGRAM, 0, test_unix_dgram_socket_), + SyscallSucceeds()); + ASSERT_THAT(fcntl(test_unix_dgram_socket_[0], F_SETFL, O_NONBLOCK), + SyscallSucceeds()); + ASSERT_THAT( + socketpair(AF_UNIX, SOCK_SEQPACKET, 0, test_unix_seqpacket_socket_), + SyscallSucceeds()); + ASSERT_THAT(fcntl(test_unix_seqpacket_socket_[0], F_SETFL, O_NONBLOCK), + SyscallSucceeds()); + } + + void TearDown() override { + close(test_unix_stream_socket_[0]); + close(test_unix_stream_socket_[1]); + + close(test_unix_dgram_socket_[0]); + close(test_unix_dgram_socket_[1]); + + close(test_unix_seqpacket_socket_[0]); + close(test_unix_seqpacket_socket_[1]); + + close(test_tcp_socket_[0]); + close(test_tcp_socket_[1]); + } + + int test_unix_stream_socket_[2]; + int test_unix_dgram_socket_[2]; + int test_unix_seqpacket_socket_[2]; + int test_tcp_socket_[2]; +}; + +// MatchesStringLength checks that a tuple argument of (struct iovec *, int) +// corresponding to an iovec array and its length, contains data that matches +// the std::string length strlen. +MATCHER_P(MatchesStringLength, strlen, "") { + struct iovec* iovs = arg.first; + int niov = arg.second; + int offset = 0; + for (int i = 0; i < niov; i++) { + offset += iovs[i].iov_len; + } + if (offset != static_cast<int>(strlen)) { + *result_listener << offset; + return false; + } + return true; +} + +// MatchesStringValue checks that a tuple argument of (struct iovec *, int) +// corresponding to an iovec array and its length, contains data that matches +// the std::string value str. +MATCHER_P(MatchesStringValue, str, "") { + struct iovec* iovs = arg.first; + int len = strlen(str); + int niov = arg.second; + int offset = 0; + for (int i = 0; i < niov; i++) { + struct iovec iov = iovs[i]; + if (len < offset) { + *result_listener << "strlen " << len << " < offset " << offset; + return false; + } + if (strncmp(static_cast<char*>(iov.iov_base), &str[offset], iov.iov_len)) { + absl::string_view iovec_string(static_cast<char*>(iov.iov_base), + iov.iov_len); + *result_listener << iovec_string << " @offset " << offset; + return false; + } + offset += iov.iov_len; + } + return true; +} + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_FILE_BASE_H_ diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc new file mode 100644 index 000000000..fb93c8034 --- /dev/null +++ b/test/syscalls/linux/flock.cc @@ -0,0 +1,588 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/file.h> +#include <string> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class FlockTest : public FileTest {}; + +TEST_F(FlockTest, BadFD) { + // EBADF: fd is not an open file descriptor. + ASSERT_THAT(flock(-1, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(FlockTest, InvalidOpCombinations) { + // The operation cannot be both exclusive and shared. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_SH | LOCK_NB), + SyscallFailsWithErrno(EINVAL)); + + // Locking and Unlocking doesn't make sense. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_UN | LOCK_NB), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_UN | LOCK_NB), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(FlockTest, NoOperationSpecified) { + // Not specifying an operation is invalid. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(FlockTestNoFixture, FlockSupportsPipes) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + EXPECT_THAT(flock(fds[0], LOCK_EX | LOCK_NB), SyscallSucceeds()); + EXPECT_THAT(close(fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(FlockTest, TestSimpleExLock) { + // Test that we can obtain an exclusive lock (no other holders) + // and that we can unlock it. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestSimpleShLock) { + // Test that we can obtain a shared lock (no other holders) + // and that we can unlock it. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestLockableAnyMode) { + // flock(2): A shared or exclusive lock can be placed on a file + // regardless of the mode in which the file was opened. + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(test_file_name_, O_RDONLY)); // open read only to test + + // Mode shouldn't prevent us from taking an exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestUnlockWithNoHolders) { + // Test that unlocking when no one holds a lock succeeeds. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestRepeatedExLockingBySameHolder) { + // Test that repeated locking by the same holder for the + // same type of lock works correctly. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestRepeatedExLockingSingleUnlock) { + // Test that repeated locking by the same holder for the + // same type of lock works correctly and that a single unlock is required. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + // Should be unlocked at this point + ASSERT_THAT(flock(fd.get(), LOCK_NB | LOCK_EX), SyscallSucceedsWithValue(0)); + + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestRepeatedShLockingBySameHolder) { + // Test that repeated locking by the same holder for the + // same type of lock works correctly. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestSingleHolderUpgrade) { + // Test that a shared lock is upgradable when no one else holds a lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestSingleHolderDowngrade) { + // Test single holder lock downgrade case. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestMultipleShared) { + // This is a simple test to verify that multiple independent shared + // locks will be granted. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // A shared lock should be granted as there only exists other shared locks. + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock both. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +/* + * flock(2): If a process uses open(2) (or similar) to obtain more than one + * descriptor for the same file, these descriptors are treated + * independently by flock(). An attempt to lock the file using one of + * these file descriptors may be denied by a lock that the calling process + * has already placed via another descriptor. + */ +TEST_F(FlockTest, TestMultipleHolderSharedExclusive) { + // This test will verify that an exclusive lock will not be granted + // while a shared is held. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify We're unable to get an exlcusive lock via the second FD. + // because someone is holding a shared lock. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Unlock + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestSharedLockFailExclusiveHolder) { + // This test will verify that a shared lock is denied while + // someone holds an exclusive lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify we're unable to get an shared lock via the second FD. + // because someone is holding an exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Unlock + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestExclusiveLockFailExclusiveHolder) { + // This test will verify that an exclusive lock is denied while + // someone already holds an exclsuive lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify we're unable to get an exclusive lock via the second FD + // because someone is already holding an exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Unlock + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestMultipleHolderSharedExclusiveUpgrade) { + // This test will verify that we cannot obtain an exclusive lock while + // a shared lock is held by another descriptor, then verify that an upgrade + // is possible on a shared lock once all other shared locks have closed. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify we're unable to get an exclusive lock via the second FD because + // a shared lock is held. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Verify that we can get a shared lock via the second descriptor instead + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock the first and there will only be one shared lock remaining. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + + // Upgrade 2nd fd. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Finally unlock the second + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestMultipleHolderSharedExclusiveDowngrade) { + // This test will verify that a shared lock is not obtainable while an + // exclusive lock is held but that once the first is downgraded that + // the second independent file descriptor can also get a shared lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Verify We're unable to get a shared lock via the second FD because + // an exclusive lock is held. + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Verify that we can downgrade the first. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + // Now verify that we can obtain a shared lock since the first was downgraded. + ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Finally unlock both. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +/* + * flock(2): Locks created by flock() are associated with an open file table + * entry. This means that duplicate file descriptors (created by, for example, + * fork(2) or dup(2)) refer to the same lock, and this lock may be modified or + * released using any of these descriptors. Furthermore, the lock is released + * either by an explicit LOCK_UN operation on any of these duplicate descriptors + * or when all such descriptors have been closed. + */ +TEST_F(FlockTest, TestDupFdUpgrade) { + // This test will verify that a shared lock is upgradeable via a dupped + // file descriptor, if the FD wasn't dupped this would fail. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // Now we should be able to upgrade via the dupped fd. + ASSERT_THAT(flock(dup_fd.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + // Validate unlock via dupped fd. + ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestDupFdDowngrade) { + // This test will verify that a exclusive lock is downgradable via a dupped + // file descriptor, if the FD wasn't dupped this would fail. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // Now we should be able to downgrade via the dupped fd. + ASSERT_THAT(flock(dup_fd.get(), LOCK_SH | LOCK_NB), + SyscallSucceedsWithValue(0)); + + // Validate unlock via dupped fd + ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestDupFdCloseRelease) { + // flock(2): Furthermore, the lock is released either by an explicit LOCK_UN + // operation on any of these duplicate descriptors, or when all such + // descriptors have been closed. + // + // This test will verify that a dupped fd closing will not release the + // underlying lock until all such dupped fds have closed. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // At this point we have ONE exclusive locked referenced by two different fds. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Validate that we cannot get a lock on a new unrelated FD. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Closing the dupped fd shouldn't affect the lock until all are closed. + dup_fd.reset(); // Closed the duped fd. + + // Validate that we still cannot get a lock on a new unrelated FD. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Closing the first fd + CloseFile(); // Will validate the syscall succeeds. + + // Now we should actually be able to get a lock since all fds related to + // the first lock are closed. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestDupFdUnlockRelease) { + /* flock(2): Furthermore, the lock is released either by an explicit LOCK_UN + * operation on any of these duplicate descriptors, or when all such + * descriptors have been closed. + */ + // This test will verify that an explict unlock on a dupped FD will release + // the underlying lock unlike the previous case where close on a dup was + // not enough to release the lock. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB), + SyscallSucceedsWithValue(0)); + + const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // At this point we have ONE exclusive locked referenced by two different fds. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Validate that we cannot get a lock on a new unrelated FD. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Explicitly unlock via the dupped descriptor. + ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); + + // Validate that we can now get the lock since we explicitly unlocked. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0)); + + // Unlock + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +TEST_F(FlockTest, TestDupFdFollowedByLock) { + // This test will verify that taking a lock on a file descriptor that has + // already been dupped means that the lock is shared between both. This is + // slightly different than than duping on an already locked FD. + FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup()); + + // Take a lock. + ASSERT_THAT(flock(dup_fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); + + // Now dup_fd and test_file_ should both reference the same lock. + // We shouldn't be able to obtain a lock until both are closed. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Closing the first fd + dup_fd.reset(); // Close the duped fd. + + // Validate that we cannot get a lock yet because the dupped descriptor. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Closing the second fd. + CloseFile(); // CloseFile() will validate the syscall succeeds. + + // Now we should be able to get the lock. + ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); + + // Unlock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0)); +} + +// NOTE: These blocking tests are not perfect. Unfortunantely it's very hard to +// determine if a thread was actually blocked in the kernel so we're forced +// to use timing. +TEST_F(FlockTest, BlockingLockNoBlockingForSharedLocks) { + // This test will verify that although LOCK_NB isn't specified + // two different fds can obtain shared locks without blocking. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH), SyscallSucceeds()); + + // kHoldLockTime is the amount of time we will hold the lock before releasing. + constexpr absl::Duration kHoldLockTime = absl::Seconds(30); + + const DisableSave ds; // Timing-related. + + // We do this in another thread so we can determine if it was actually + // blocked by timing the amount of time it took for the syscall to complete. + ScopedThread t([&] { + MonotonicTimer timer; + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // Only a single shared lock is held, the lock will be granted immediately. + // This should be granted without any blocking. Don't save here to avoid + // wild discrepencies on timing. + timer.Start(); + ASSERT_THAT(flock(fd.get(), LOCK_SH), SyscallSucceeds()); + + // We held the lock for 30 seconds but this thread should not have + // blocked at all so we expect a very small duration on syscall completion. + ASSERT_LT(timer.Duration(), + absl::Seconds(1)); // 1000ms is much less than 30s. + + // We can release our second shared lock + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds()); + }); + + // Sleep before unlocking. + absl::SleepFor(kHoldLockTime); + + // Release the first shared lock. Don't save in this situation to avoid + // discrepencies in timing. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); +} + +TEST_F(FlockTest, BlockingLockFirstSharedSecondExclusive) { + // This test will verify that if someone holds a shared lock any attempt to + // obtain an exclusive lock will result in blocking. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH), SyscallSucceeds()); + + // kHoldLockTime is the amount of time we will hold the lock before releasing. + constexpr absl::Duration kHoldLockTime = absl::Seconds(2); + + const DisableSave ds; // Timing-related. + + // We do this in another thread so we can determine if it was actually + // blocked by timing the amount of time it took for the syscall to complete. + ScopedThread t([&] { + MonotonicTimer timer; + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // This exclusive lock should block because someone is already holding a + // shared lock. We don't save here to avoid wild discrepencies on timing. + timer.Start(); + ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_EX), SyscallSucceeds()); + + // We should be blocked, we will expect to be blocked for more than 1.0s. + ASSERT_GT(timer.Duration(), absl::Seconds(1)); + + // We can release our exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds()); + }); + + // Sleep before unlocking. + absl::SleepFor(kHoldLockTime); + + // Release the shared lock allowing the thread to proceed. + // We don't save here to avoid wild discrepencies in timing. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); +} + +TEST_F(FlockTest, BlockingLockFirstExclusiveSecondShared) { + // This test will verify that if someone holds an exclusive lock any attempt + // to obtain a shared lock will result in blocking. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX), SyscallSucceeds()); + + // kHoldLockTime is the amount of time we will hold the lock before releasing. + constexpr absl::Duration kHoldLockTime = absl::Seconds(2); + + const DisableSave ds; // Timing-related. + + // We do this in another thread so we can determine if it was actually + // blocked by timing the amount of time it took for the syscall to complete. + ScopedThread t([&] { + MonotonicTimer timer; + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // This shared lock should block because someone is already holding an + // exclusive lock. We don't save here to avoid wild discrepencies on timing. + timer.Start(); + ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_SH), SyscallSucceeds()); + + // We should be blocked, we will expect to be blocked for more than 1.0s. + ASSERT_GT(timer.Duration(), absl::Seconds(1)); + + // We can release our shared lock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds()); + }); + + // Sleep before unlocking. + absl::SleepFor(kHoldLockTime); + + // Release the exclusive lock allowing the blocked thread to proceed. + // We don't save here to avoid wild discrepencies in timing. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); +} + +TEST_F(FlockTest, BlockingLockFirstExclusiveSecondExclusive) { + // This test will verify that if someone holds an exclusive lock any attempt + // to obtain another exclusive lock will result in blocking. + ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX), SyscallSucceeds()); + + // kHoldLockTime is the amount of time we will hold the lock before releasing. + constexpr absl::Duration kHoldLockTime = absl::Seconds(2); + + const DisableSave ds; // Timing-related. + + // We do this in another thread so we can determine if it was actually + // blocked by timing the amount of time it took for the syscall to complete. + ScopedThread t([&] { + MonotonicTimer timer; + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + // This exclusive lock should block because someone is already holding an + // exclusive lock. + timer.Start(); + ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_EX), SyscallSucceeds()); + + // We should be blocked, we will expect to be blocked for more than 1.0s. + ASSERT_GT(timer.Duration(), absl::Seconds(1)); + + // We can release our exclusive lock. + ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds()); + }); + + // Sleep before unlocking. + absl::SleepFor(kHoldLockTime); + + // Release the exclusive lock allowing the blocked thread to proceed. + // We don't save to avoid wild discrepencies in timing. + EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc new file mode 100644 index 000000000..1bff5e50f --- /dev/null +++ b/test/syscalls/linux/fork.cc @@ -0,0 +1,413 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <atomic> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::Ge; + +class ForkTest : public ::testing::Test { + protected: + // SetUp creates a populated, open file. + void SetUp() override { + // Make a shared mapping. + shared_ = reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0)); + ASSERT_NE(reinterpret_cast<void*>(shared_), MAP_FAILED); + + // Make a private mapping. + private_ = + reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + ASSERT_NE(reinterpret_cast<void*>(private_), MAP_FAILED); + + // Make a pipe. + ASSERT_THAT(pipe(pipes_), SyscallSucceeds()); + } + + // TearDown frees associated resources. + void TearDown() override { + EXPECT_THAT(munmap(shared_, kPageSize), SyscallSucceeds()); + EXPECT_THAT(munmap(private_, kPageSize), SyscallSucceeds()); + EXPECT_THAT(close(pipes_[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipes_[1]), SyscallSucceeds()); + } + + // Fork executes a clone system call. + pid_t Fork() { + pid_t pid = fork(); + MaybeSave(); + TEST_PCHECK_MSG(pid >= 0, "fork failed"); + return pid; + } + + // Wait waits for the given pid and returns the exit status. If the child was + // killed by a signal or an error occurs, then 256+signal is returned. + int Wait(pid_t pid) { + int status; + while (true) { + int rval = wait4(pid, &status, 0, NULL); + if (rval < 0) { + return rval; + } + if (rval != pid) { + continue; + } + if (WIFEXITED(status)) { + return WEXITSTATUS(status); + } + if (WIFSIGNALED(status)) { + return 256 + WTERMSIG(status); + } + } + } + + // Exit exits the proccess. + void Exit(int code) { + _exit(code); + + // Should never reach here. Since the exit above failed, we really don't + // have much in the way of options to indicate failure. So we just try to + // log an assertion failure to the logs. The parent process will likely + // fail anyways if exit is not working. + TEST_CHECK_MSG(false, "_exit returned"); + } + + // ReadByte reads a byte from the shared pipe. + char ReadByte() { + char val = -1; + TEST_PCHECK(ReadFd(pipes_[0], &val, 1) == 1); + MaybeSave(); + return val; + } + + // WriteByte writes a byte from the shared pipe. + void WriteByte(char val) { + TEST_PCHECK(WriteFd(pipes_[1], &val, 1) == 1); + MaybeSave(); + } + + // Shared pipe. + int pipes_[2]; + + // Shared mapping (one page). + char* shared_; + + // Private mapping (one page). + char* private_; +}; + +TEST_F(ForkTest, Simple) { + pid_t child = Fork(); + if (child == 0) { + Exit(0); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST_F(ForkTest, ExitCode) { + pid_t child = Fork(); + if (child == 0) { + Exit(123); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(123)); + child = Fork(); + if (child == 0) { + Exit(1); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(1)); +} + +TEST_F(ForkTest, Multi) { + pid_t child1 = Fork(); + if (child1 == 0) { + Exit(0); + } + pid_t child2 = Fork(); + if (child2 == 0) { + Exit(1); + } + EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0)); + EXPECT_THAT(Wait(child2), SyscallSucceedsWithValue(1)); +} + +TEST_F(ForkTest, Pipe) { + pid_t child = Fork(); + if (child == 0) { + WriteByte(1); + Exit(0); + } + EXPECT_EQ(ReadByte(), 1); + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST_F(ForkTest, SharedMapping) { + pid_t child = Fork(); + if (child == 0) { + // Wait for the parent. + ReadByte(); + if (shared_[0] == 1) { + Exit(0); + } + // Failed. + Exit(1); + } + // Change the mapping. + ASSERT_EQ(shared_[0], 0); + shared_[0] = 1; + // Unblock the child. + WriteByte(0); + // Did it work? + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST_F(ForkTest, PrivateMapping) { + pid_t child = Fork(); + if (child == 0) { + // Wait for the parent. + ReadByte(); + if (private_[0] == 0) { + Exit(0); + } + // Failed. + Exit(1); + } + // Change the mapping. + ASSERT_EQ(private_[0], 0); + private_[0] = 1; + // Unblock the child. + WriteByte(0); + // Did it work? + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +// Test that cpuid works after a fork. +TEST_F(ForkTest, Cpuid) { + pid_t child = Fork(); + + // We should be able to determine the CPU vendor. + ASSERT_NE(GetCPUVendor(), CPUVendor::kUnknownVendor); + + if (child == 0) { + Exit(0); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST_F(ForkTest, Mmap) { + pid_t child = Fork(); + + if (child == 0) { + void* addr = + mmap(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MaybeSave(); + Exit(addr == MAP_FAILED); + } + + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +static volatile int alarmed = 0; + +void AlarmHandler(int sig, siginfo_t* info, void* context) { alarmed = 1; } + +TEST_F(ForkTest, Alarm) { + // Setup an alarm handler. + struct sigaction sa; + sa.sa_sigaction = AlarmHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + EXPECT_THAT(sigaction(SIGALRM, &sa, nullptr), SyscallSucceeds()); + + pid_t child = Fork(); + + if (child == 0) { + alarm(1); + sleep(3); + if (!alarmed) { + Exit(1); + } + Exit(0); + } + + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, alarmed); +} + +// Child cannot affect parent private memory. +TEST_F(ForkTest, PrivateMemory) { + std::atomic<uint32_t> local(0); + + pid_t child1 = Fork(); + if (child1 == 0) { + local++; + + pid_t child2 = Fork(); + if (child2 == 0) { + local++; + + TEST_CHECK(local.load() == 2); + + Exit(0); + } + + TEST_PCHECK(Wait(child2) == 0); + TEST_CHECK(local.load() == 1); + Exit(0); + } + + EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, local.load()); +} + +// Kernel-accessed buffers should remain coherent across COW. +TEST_F(ForkTest, COWSegment) { + constexpr int kBufSize = 1024; + char* read_buf = private_; + char* touch = private_ + kPageSize / 2; + + std::string contents(kBufSize, 'a'); + + ScopedThread t([&] { + // Wait to be sure the parent is blocked in read. + absl::SleepFor(absl::Seconds(3)); + + // Fork to mark private pages for COW. + // + // Use fork directly rather than the Fork wrapper to skip the multi-threaded + // check, and limit the child to async-signal-safe functions: + // + // "After a fork() in a multithreaded program, the child can safely call + // only async-signal-safe functions (see signal(7)) until such time as it + // calls execve(2)." + // + // Skip ASSERT in the child, as it isn't async-signal-safe. + pid_t child = fork(); + if (child == 0) { + // Wait to be sure parent touched memory. + sleep(3); + Exit(0); + } + + // Check success only in the parent. + ASSERT_THAT(child, SyscallSucceedsWithValue(Ge(0))); + + // Trigger COW on private page. + *touch = 42; + + // Write to pipe. Parent should still be able to read this. + EXPECT_THAT(WriteFd(pipes_[1], contents.c_str(), kBufSize), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); + }); + + EXPECT_THAT(ReadFd(pipes_[0], read_buf, kBufSize), + SyscallSucceedsWithValue(kBufSize)); + EXPECT_STREQ(contents.c_str(), read_buf); +} + +TEST_F(ForkTest, SigAltStack) { + std::vector<char> stack_mem(SIGSTKSZ); + stack_t stack = {}; + stack.ss_size = SIGSTKSZ; + stack.ss_sp = stack_mem.data(); + ASSERT_THAT(sigaltstack(&stack, nullptr), SyscallSucceeds()); + + pid_t child = Fork(); + + if (child == 0) { + stack_t oss = {}; + TEST_PCHECK(sigaltstack(nullptr, &oss) == 0); + MaybeSave(); + + TEST_CHECK((oss.ss_flags & SS_DISABLE) == 0); + TEST_CHECK(oss.ss_size == SIGSTKSZ); + TEST_CHECK(oss.ss_sp == stack.ss_sp); + + Exit(0); + } + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +TEST_F(ForkTest, Affinity) { + // Make a non-default cpumask. + cpu_set_t parent_mask; + EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask), + SyscallSucceeds()); + // Knock out the lowest bit. + for (unsigned int n = 0; n < CPU_SETSIZE; n++) { + if (CPU_ISSET(n, &parent_mask)) { + CPU_CLR(n, &parent_mask); + break; + } + } + EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask), + SyscallSucceeds()); + + pid_t child = Fork(); + if (child == 0) { + cpu_set_t child_mask; + + int ret = sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask); + MaybeSave(); + if (ret < 0) { + Exit(-ret); + } + + TEST_CHECK(CPU_EQUAL(&child_mask, &parent_mask)); + + Exit(0); + } + + EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); +} + +#ifdef __x86_64__ +// Clone with CLONE_SETTLS and a non-canonical TLS address is rejected. +TEST(CloneTest, NonCanonicalTLS) { + constexpr uintptr_t kNonCanonical = 1ull << 48; + + // We need a valid address for the stack pointer. We'll never actually execute + // on this. + char stack; + + EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr, + nullptr, kNonCanonical), + SyscallFailsWithErrno(EPERM)); +} +#endif + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fpsig_fork.cc b/test/syscalls/linux/fpsig_fork.cc new file mode 100644 index 000000000..e8f1dfa8a --- /dev/null +++ b/test/syscalls/linux/fpsig_fork.cc @@ -0,0 +1,105 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This test verifies that fork(2) in a signal handler will correctly +// restore floating point state after the signal handler returns in both +// the child and parent. +#include <sys/time.h> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#define GET_XMM(__var, __xmm) \ + asm volatile("movq %%" #__xmm ", %0" : "=r"(__var)) +#define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var)) + +int parent, child; + +void sigusr1(int s, siginfo_t* siginfo, void* _uc) { + // Fork and clobber %xmm0. The fpstate should be restored by sigreturn(2) + // in both parent and child. + child = fork(); + TEST_CHECK_MSG(child >= 0, "fork failed"); + + uint64_t val = SIGUSR1; + SET_XMM(val, xmm0); +} + +TEST(FPSigTest, Fork) { + parent = getpid(); + pid_t parent_tid = gettid(); + + struct sigaction sa = {}; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = sigusr1; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + // The amd64 ABI specifies that the XMM register set is caller-saved. This + // implies that if there is any function call between SET_XMM and GET_XMM the + // compiler might save/restore xmm0 implicitly. This defeats the entire + // purpose of the test which is to verify that fpstate is restored by + // sigreturn(2). + // + // This is the reason why 'tgkill(getpid(), gettid(), SIGUSR1)' is implemented + // in inline assembly below. + // + // If the OS is broken and registers are clobbered by the child, using tgkill + // to signal the current thread increases the likelihood that this thread will + // be the one clobbered. + + uint64_t expected = 0xdeadbeeffacefeed; + SET_XMM(expected, xmm0); + + asm volatile( + "movl %[killnr], %%eax;" + "movl %[parent], %%edi;" + "movl %[tid], %%esi;" + "movl %[sig], %%edx;" + "syscall;" + : + : [killnr] "i"(__NR_tgkill), [parent] "rm"(parent), + [tid] "rm"(parent_tid), [sig] "i"(SIGUSR1) + : "rax", "rdi", "rsi", "rdx", + // Clobbered by syscall. + "rcx", "r11"); + + uint64_t got; + GET_XMM(got, xmm0); + + if (getpid() == parent) { // Parent. + int status; + ASSERT_THAT(waitpid(child, &status, 0), SyscallSucceedsWithValue(child)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); + } + + // TEST_CHECK_MSG since this may run in the child. + TEST_CHECK_MSG(expected == got, "Bad xmm0 value"); + + if (getpid() != parent) { // Child. + _exit(0); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fpsig_nested.cc b/test/syscalls/linux/fpsig_nested.cc new file mode 100644 index 000000000..2fa40b42d --- /dev/null +++ b/test/syscalls/linux/fpsig_nested.cc @@ -0,0 +1,134 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This program verifies that application floating point state is restored +// correctly after a signal handler returns. It also verifies that this works +// with nested signals. +#include <sys/time.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#define GET_XMM(__var, __xmm) \ + asm volatile("movq %%" #__xmm ", %0" : "=r"(__var)) +#define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var)) + +int pid; +int tid; + +volatile uint64_t entryxmm[2] = {~0UL, ~0UL}; +volatile uint64_t exitxmm[2]; + +void sigusr2(int s, siginfo_t* siginfo, void* _uc) { + uint64_t val = SIGUSR2; + + // Record the value of %xmm0 on entry and then clobber it. + GET_XMM(entryxmm[1], xmm0); + SET_XMM(val, xmm0); + GET_XMM(exitxmm[1], xmm0); +} + +void sigusr1(int s, siginfo_t* siginfo, void* _uc) { + uint64_t val = SIGUSR1; + + // Record the value of %xmm0 on entry and then clobber it. + GET_XMM(entryxmm[0], xmm0); + SET_XMM(val, xmm0); + + // Send a SIGUSR2 to ourself. The signal mask is configured such that + // the SIGUSR2 handler will run before this handler returns. + asm volatile( + "movl %[killnr], %%eax;" + "movl %[pid], %%edi;" + "movl %[tid], %%esi;" + "movl %[sig], %%edx;" + "syscall;" + : + : [killnr] "i"(__NR_tgkill), [pid] "rm"(pid), [tid] "rm"(tid), + [sig] "i"(SIGUSR2) + : "rax", "rdi", "rsi", "rdx", + // Clobbered by syscall. + "rcx", "r11"); + + // Record value of %xmm0 again to verify that the nested signal handler + // does not clobber it. + GET_XMM(exitxmm[0], xmm0); +} + +TEST(FPSigTest, NestedSignals) { + pid = getpid(); + tid = gettid(); + + struct sigaction sa = {}; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = sigusr1; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + sa.sa_sigaction = sigusr2; + ASSERT_THAT(sigaction(SIGUSR2, &sa, nullptr), SyscallSucceeds()); + + // The amd64 ABI specifies that the XMM register set is caller-saved. This + // implies that if there is any function call between SET_XMM and GET_XMM the + // compiler might save/restore xmm0 implicitly. This defeats the entire + // purpose of the test which is to verify that fpstate is restored by + // sigreturn(2). + // + // This is the reason why 'tgkill(getpid(), gettid(), SIGUSR1)' is implemented + // in inline assembly below. + // + // If the OS is broken and registers are clobbered by the signal, using tgkill + // to signal the current thread ensures that this is the clobbered thread. + + uint64_t expected = 0xdeadbeeffacefeed; + SET_XMM(expected, xmm0); + + asm volatile( + "movl %[killnr], %%eax;" + "movl %[pid], %%edi;" + "movl %[tid], %%esi;" + "movl %[sig], %%edx;" + "syscall;" + : + : [killnr] "i"(__NR_tgkill), [pid] "rm"(pid), [tid] "rm"(tid), + [sig] "i"(SIGUSR1) + : "rax", "rdi", "rsi", "rdx", + // Clobbered by syscall. + "rcx", "r11"); + + uint64_t got; + GET_XMM(got, xmm0); + + // + // The checks below verifies the following: + // - signal handlers must called with a clean fpu state. + // - sigreturn(2) must restore fpstate of the interrupted context. + // + EXPECT_EQ(expected, got); + EXPECT_EQ(entryxmm[0], 0); + EXPECT_EQ(entryxmm[1], 0); + EXPECT_EQ(exitxmm[0], SIGUSR1); + EXPECT_EQ(exitxmm[1], SIGUSR2); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/fsync.cc b/test/syscalls/linux/fsync.cc new file mode 100644 index 000000000..536a73bf1 --- /dev/null +++ b/test/syscalls/linux/fsync.cc @@ -0,0 +1,55 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(FsyncTest, TempFileSucceeds) { + std::string path = NewTempAbsPath(); + int fd; + EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + const std::string data = "some data to sync"; + EXPECT_THAT(write(fd, data.c_str(), data.size()), + SyscallSucceedsWithValue(data.size())); + EXPECT_THAT(fsync(fd), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + ASSERT_THAT(unlink(path.c_str()), SyscallSucceeds()); +} + +TEST(FsyncTest, CannotFsyncOnUnopenedFd) { + int fd; + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_THAT(fd = open(f.path().c_str(), O_RDONLY), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + + // fd is now invalid. + EXPECT_THAT(fsync(fd), SyscallFailsWithErrno(EBADF)); +} +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/futex.cc b/test/syscalls/linux/futex.cc new file mode 100644 index 000000000..6fa284013 --- /dev/null +++ b/test/syscalls/linux/futex.cc @@ -0,0 +1,595 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <linux/futex.h> +#include <linux/types.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include <algorithm> +#include <atomic> +#include <memory> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/memory_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Amount of time we wait for threads doing futex_wait to start running before +// doing futex_wake. +constexpr auto kWaiterStartupDelay = absl::Seconds(3); + +// Default timeout for waiters in tests where we expect a futex_wake to be +// ineffective. +constexpr auto kIneffectiveWakeTimeout = absl::Seconds(6); + +static_assert(kWaiterStartupDelay < kIneffectiveWakeTimeout, + "futex_wait will time out before futex_wake is called"); + +int futex_wait(bool priv, std::atomic<int>* uaddr, int val, + absl::Duration timeout = absl::InfiniteDuration()) { + int op = FUTEX_WAIT; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + + if (timeout == absl::InfiniteDuration()) { + return RetryEINTR(syscall)(SYS_futex, uaddr, op, val, nullptr); + } + + // FUTEX_WAIT doesn't adjust the timeout if it returns EINTR, so we have to do + // so. + while (true) { + auto const timeout_ts = absl::ToTimespec(timeout); + MonotonicTimer timer; + timer.Start(); + int const ret = syscall(SYS_futex, uaddr, op, val, &timeout_ts); + if (ret != -1 || errno != EINTR) { + return ret; + } + timeout = std::max(timeout - timer.Duration(), absl::ZeroDuration()); + } +} + +int futex_wait_bitset(bool priv, std::atomic<int>* uaddr, int val, int bitset, + absl::Time deadline = absl::InfiniteFuture()) { + int op = FUTEX_WAIT_BITSET | FUTEX_CLOCK_REALTIME; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + + auto const deadline_ts = absl::ToTimespec(deadline); + return RetryEINTR(syscall)( + SYS_futex, uaddr, op, val, + deadline == absl::InfiniteFuture() ? nullptr : &deadline_ts, nullptr, + bitset); +} + +int futex_wake(bool priv, std::atomic<int>* uaddr, int count) { + int op = FUTEX_WAKE; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + return syscall(SYS_futex, uaddr, op, count); +} + +int futex_wake_bitset(bool priv, std::atomic<int>* uaddr, int count, + int bitset) { + int op = FUTEX_WAKE_BITSET; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + return syscall(SYS_futex, uaddr, op, count, nullptr, nullptr, bitset); +} + +int futex_wake_op(bool priv, std::atomic<int>* uaddr1, std::atomic<int>* uaddr2, + int nwake1, int nwake2, uint32_t sub_op) { + int op = FUTEX_WAKE_OP; + if (priv) { + op |= FUTEX_PRIVATE_FLAG; + } + return syscall(SYS_futex, uaddr1, op, nwake1, nwake2, uaddr2, sub_op); +} + +// Fixture for futex tests parameterized by whether to use private or shared +// futexes. +class PrivateAndSharedFutexTest : public ::testing::TestWithParam<bool> { + protected: + bool IsPrivate() const { return GetParam(); } + int PrivateFlag() const { return IsPrivate() ? FUTEX_PRIVATE_FLAG : 0; } +}; + +// FUTEX_WAIT with 0 timeout does not block. +TEST_P(PrivateAndSharedFutexTest, Wait_ZeroTimeout) { + struct timespec timeout = {}; + + // Don't use the futex_wait helper because it adjusts timeout. + int a = 1; + EXPECT_THAT(syscall(SYS_futex, &a, FUTEX_WAIT | PrivateFlag(), a, &timeout), + SyscallFailsWithErrno(ETIMEDOUT)); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_Timeout) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + + MonotonicTimer timer; + timer.Start(); + constexpr absl::Duration kTimeout = absl::Seconds(1); + EXPECT_THAT(futex_wait(IsPrivate(), &a, a, kTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + EXPECT_GE(timer.Duration(), kTimeout); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_BitsetTimeout) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + + MonotonicTimer timer; + timer.Start(); + constexpr absl::Duration kTimeout = absl::Seconds(1); + EXPECT_THAT( + futex_wait_bitset(IsPrivate(), &a, a, 0xffffffff, absl::Now() + kTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + EXPECT_GE(timer.Duration(), kTimeout); +} + +TEST_P(PrivateAndSharedFutexTest, WaitBitset_NegativeTimeout) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + + MonotonicTimer timer; + timer.Start(); + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, a, 0xffffffff, + absl::Now() - absl::Seconds(1)), + SyscallFailsWithErrno(ETIMEDOUT)); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_WrongVal) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + EXPECT_THAT(futex_wait(IsPrivate(), &a, a + 1), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_ZeroBitset) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, a, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(PrivateAndSharedFutexTest, Wake1_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + // Prevent save/restore from interrupting futex_wait, which will cause it to + // return EAGAIN instead of the expected result if futex_wait is restarted + // after we change the value of a below. + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), + SyscallSucceedsWithValue(0)); + }); + absl::SleepFor(kWaiterStartupDelay); + + // Change a so that if futex_wake happens before futex_wait, the latter + // returns EAGAIN instead of hanging the test. + a.fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), &a, 1), SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, WakeAll_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + constexpr int kThreads = 5; + std::vector<std::unique_ptr<ScopedThread>> threads; + threads.reserve(kThreads); + for (int i = 0; i < kThreads; i++) { + threads.push_back(absl::make_unique<ScopedThread>([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), + SyscallSucceeds()); + })); + } + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), &a, kThreads), + SyscallSucceedsWithValue(kThreads)); +} + +TEST_P(PrivateAndSharedFutexTest, WakeSome_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + constexpr int kThreads = 5; + constexpr int kWokenThreads = 3; + static_assert(kWokenThreads < kThreads, + "can't wake more threads than are created"); + std::vector<std::unique_ptr<ScopedThread>> threads; + threads.reserve(kThreads); + std::vector<int> rets; + rets.reserve(kThreads); + std::vector<int> errs; + errs.reserve(kThreads); + for (int i = 0; i < kThreads; i++) { + rets.push_back(-1); + errs.push_back(0); + } + for (int i = 0; i < kThreads; i++) { + threads.push_back(absl::make_unique<ScopedThread>([&, i] { + rets[i] = + futex_wait(IsPrivate(), &a, kInitialValue, kIneffectiveWakeTimeout); + errs[i] = errno; + })); + } + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), &a, kWokenThreads), + SyscallSucceedsWithValue(kWokenThreads)); + + int woken = 0; + int timedout = 0; + for (int i = 0; i < kThreads; i++) { + threads[i]->Join(); + if (rets[i] == 0) { + woken++; + } else if (errs[i] == ETIMEDOUT) { + timedout++; + } else { + ADD_FAILURE() << " thread " << i << ": returned " << rets[i] << ", errno " + << errs[i]; + } + } + EXPECT_EQ(woken, kWokenThreads); + EXPECT_EQ(timedout, kThreads - kWokenThreads); +} + +TEST_P(PrivateAndSharedFutexTest, WaitBitset_Wake_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, 0b01001000), + SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), &a, 1), SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, Wait_WakeBitset_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, 0b01001000), + SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, WaitBitset_WakeBitsetMatch_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + constexpr int kBitset = 0b01001000; + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, kBitset), + SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, kBitset), + SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, WaitBitset_WakeBitsetNoMatch_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + constexpr int kWaitBitset = 0b01000001; + constexpr int kWakeBitset = 0b00101000; + static_assert((kWaitBitset & kWakeBitset) == 0, + "futex_wake_bitset will wake waiter"); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, kWaitBitset, + absl::Now() + kIneffectiveWakeTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, kWakeBitset), + SyscallSucceedsWithValue(0)); +} + +TEST_P(PrivateAndSharedFutexTest, WakeOpCondSuccess_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + std::atomic<int> b = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread_a([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds()); + }); + ScopedThread thread_b([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &b, kInitialValue), SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + b.fetch_add(1); + // This futex_wake_op should: + // - Wake 1 waiter on a unconditionally. + // - Wake 1 waiter on b if b == kInitialValue + 1, which it is. + // - Do "b += 1". + EXPECT_THAT(futex_wake_op(IsPrivate(), &a, &b, 1, 1, + FUTEX_OP(FUTEX_OP_ADD, 1, FUTEX_OP_CMP_EQ, + (kInitialValue + 1))), + SyscallSucceedsWithValue(2)); + EXPECT_EQ(b, kInitialValue + 2); +} + +TEST_P(PrivateAndSharedFutexTest, WakeOpCondFailure_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + std::atomic<int> b = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread_a([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds()); + }); + ScopedThread thread_b([&] { + EXPECT_THAT( + futex_wait(IsPrivate(), &b, kInitialValue, kIneffectiveWakeTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + b.fetch_add(1); + // This futex_wake_op should: + // - Wake 1 waiter on a unconditionally. + // - Wake 1 waiter on b if b == kInitialValue - 1, which it isn't. + // - Do "b += 1". + EXPECT_THAT(futex_wake_op(IsPrivate(), &a, &b, 1, 1, + FUTEX_OP(FUTEX_OP_ADD, 1, FUTEX_OP_CMP_EQ, + (kInitialValue - 1))), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(b, kInitialValue + 2); +} + +TEST_P(PrivateAndSharedFutexTest, NoWakeInterprocessPrivateAnon_NoRandomSave) { + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr()); + constexpr int kInitialValue = 1; + ptr->store(kInitialValue); + + DisableSave ds; + pid_t const child_pid = fork(); + if (child_pid == 0) { + TEST_PCHECK(futex_wait(IsPrivate(), ptr, kInitialValue, + kIneffectiveWakeTimeout) == -1 && + errno == ETIMEDOUT); + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + absl::SleepFor(kWaiterStartupDelay); + + EXPECT_THAT(futex_wake(IsPrivate(), ptr, 1), SyscallSucceedsWithValue(0)); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST_P(PrivateAndSharedFutexTest, WakeAfterCOWBreak_NoRandomSave) { + // Use a futex on a non-stack mapping so we can be sure that the child process + // below isn't the one that breaks copy-on-write. + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr()); + constexpr int kInitialValue = 1; + ptr->store(kInitialValue); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait(IsPrivate(), ptr, kInitialValue), SyscallSucceeds()); + }); + absl::SleepFor(kWaiterStartupDelay); + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // Wait to be killed by the parent. + while (true) pause(); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + auto cleanup_child = Cleanup([&] { + EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; + }); + + // In addition to preventing a late futex_wait from sleeping, this breaks + // copy-on-write on the mapped page. + ptr->fetch_add(1); + EXPECT_THAT(futex_wake(IsPrivate(), ptr, 1), SyscallSucceedsWithValue(1)); +} + +TEST_P(PrivateAndSharedFutexTest, WakeWrongKind_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue); + + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT( + futex_wait(IsPrivate(), &a, kInitialValue, kIneffectiveWakeTimeout), + SyscallFailsWithErrno(ETIMEDOUT)); + }); + absl::SleepFor(kWaiterStartupDelay); + + a.fetch_add(1); + // The value of priv passed to futex_wake is the opposite of that passed to + // the futex_waiter; we expect this not to wake the waiter. + EXPECT_THAT(futex_wake(!IsPrivate(), &a, 1), SyscallSucceedsWithValue(0)); +} + +INSTANTIATE_TEST_CASE_P(SharedPrivate, PrivateAndSharedFutexTest, + ::testing::Bool()); + +// Passing null as the address only works for private futexes. + +TEST(PrivateFutexTest, WakeOp0Set) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + + int futex_op = FUTEX_OP(FUTEX_OP_SET, 2, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 2); +} + +TEST(PrivateFutexTest, WakeOp0Add) { + std::atomic<int> a = ATOMIC_VAR_INIT(1); + int futex_op = FUTEX_OP(FUTEX_OP_ADD, 1, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 2); +} + +TEST(PrivateFutexTest, WakeOp0Or) { + std::atomic<int> a = ATOMIC_VAR_INIT(0b01); + int futex_op = FUTEX_OP(FUTEX_OP_OR, 0b10, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 0b11); +} + +TEST(PrivateFutexTest, WakeOp0Andn) { + std::atomic<int> a = ATOMIC_VAR_INIT(0b11); + int futex_op = FUTEX_OP(FUTEX_OP_ANDN, 0b10, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 0b01); +} + +TEST(PrivateFutexTest, WakeOp0Xor) { + std::atomic<int> a = ATOMIC_VAR_INIT(0b1010); + int futex_op = FUTEX_OP(FUTEX_OP_XOR, 0b1100, 0, 0); + EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(a, 0b0110); +} + +TEST(SharedFutexTest, WakeInterprocessSharedAnon_NoRandomSave) { + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED)); + auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr()); + constexpr int kInitialValue = 1; + ptr->store(kInitialValue); + + DisableSave ds; + pid_t const child_pid = fork(); + if (child_pid == 0) { + TEST_PCHECK(futex_wait(false, ptr, kInitialValue) == 0); + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + auto kill_child = Cleanup( + [&] { EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); }); + absl::SleepFor(kWaiterStartupDelay); + + ptr->fetch_add(1); + // This is an ASSERT so that if it fails, we immediately abort the test (and + // kill the subprocess). + ASSERT_THAT(futex_wake(false, ptr, 1), SyscallSucceedsWithValue(1)); + + kill_child.Release(); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST(SharedFutexTest, WakeInterprocessFile_NoRandomSave) { + auto const file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_THAT(truncate(file.path().c_str(), kPageSize), SyscallSucceeds()); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0)); + auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr()); + constexpr int kInitialValue = 1; + ptr->store(kInitialValue); + + DisableSave ds; + pid_t const child_pid = fork(); + if (child_pid == 0) { + TEST_PCHECK(futex_wait(false, ptr, kInitialValue) == 0); + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + auto kill_child = Cleanup( + [&] { EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); }); + absl::SleepFor(kWaiterStartupDelay); + + ptr->fetch_add(1); + // This is an ASSERT so that if it fails, we immediately abort the test (and + // kill the subprocess). + ASSERT_THAT(futex_wake(false, ptr, 1), SyscallSucceedsWithValue(1)); + + kill_child.Release(); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/getcpu.cc b/test/syscalls/linux/getcpu.cc new file mode 100644 index 000000000..3a52b25fa --- /dev/null +++ b/test/syscalls/linux/getcpu.cc @@ -0,0 +1,40 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(GetcpuTest, IsValidCpuStress) { + const int num_cpus = NumCPUs(); + absl::Time deadline = absl::Now() + absl::Seconds(10); + while (absl::Now() < deadline) { + int cpu; + ASSERT_THAT(cpu = sched_getcpu(), SyscallSucceeds()); + ASSERT_LT(cpu, num_cpus); + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc new file mode 100644 index 000000000..5db580aa0 --- /dev/null +++ b/test/syscalls/linux/getdents.cc @@ -0,0 +1,485 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <sys/eventfd.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <syscall.h> +#include <unistd.h> +#include <map> +#include <string> +#include <unordered_set> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::IsEmpty; +using ::testing::IsSupersetOf; +using ::testing::Not; +using ::testing::NotNull; + +namespace gvisor { +namespace testing { + +namespace { + +// New Linux dirent format. +struct linux_dirent64 { + uint64_t d_ino; // Inode number + int64_t d_off; // Offset to next linux_dirent64 + unsigned short d_reclen; // NOLINT, Length of this linux_dirent64 + unsigned char d_type; // NOLINT, File type + char d_name[0]; // Filename (null-terminated) +}; + +// Old Linux dirent format. +struct linux_dirent { + unsigned long d_ino; // NOLINT + unsigned long d_off; // NOLINT + unsigned short d_reclen; // NOLINT + char d_name[0]; +}; + +// Wraps a buffer to provide a set of dirents. +// T is the underlying dirent type. +template <typename T> +class DirentBuffer { + public: + // DirentBuffer manages the buffer. + explicit DirentBuffer(size_t size) + : managed_(true), actual_size_(size), reported_size_(size) { + data_ = new char[actual_size_]; + } + + // The buffer is managed externally. + DirentBuffer(char* data, size_t actual_size, size_t reported_size) + : managed_(false), + data_(data), + actual_size_(actual_size), + reported_size_(reported_size) {} + + ~DirentBuffer() { + if (managed_) { + delete[] data_; + } + } + + T* Data() { return reinterpret_cast<T*>(data_); } + + T* Start(size_t read) { + read_ = read; + if (read_) { + return Data(); + } else { + return nullptr; + } + } + + T* Current() { return reinterpret_cast<T*>(&data_[off_]); } + + T* Next() { + size_t new_off = off_ + Current()->d_reclen; + if (new_off >= read_ || new_off >= actual_size_) { + return nullptr; + } + + off_ = new_off; + return Current(); + } + + size_t Size() { return reported_size_; } + + void Reset() { + off_ = 0; + read_ = 0; + memset(data_, 0, actual_size_); + } + + private: + bool managed_; + char* data_; + size_t actual_size_; + size_t reported_size_; + + size_t off_ = 0; + + size_t read_ = 0; +}; + +// Test for getdents/getdents64. +// T is the Linux dirent type. +template <typename T> +class GetdentsTest : public ::testing::Test { + public: + using LinuxDirentType = T; + using DirentBufferType = DirentBuffer<T>; + + protected: + void SetUp() override { + dir_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(dir_.path(), O_RDONLY | O_DIRECTORY)); + } + + // Must be overridden with explicit specialization. See below. + int SyscallNum(); + + int Getdents(LinuxDirentType* dirp, unsigned int count) { + return RetryEINTR(syscall)(SyscallNum(), fd_.get(), dirp, count); + } + + // Fill directory with num files, named by number starting at 0. + void FillDirectory(size_t num) { + for (size_t i = 0; i < num; i++) { + auto name = JoinPath(dir_.path(), absl::StrCat(i)); + TEST_CHECK(CreateWithContents(name, "").ok()); + } + } + + // Fill directory with a given list of filenames. + void FillDirectoryWithFiles(const std::vector<std::string>& filenames) { + for (const auto& filename : filenames) { + auto name = JoinPath(dir_.path(), filename); + TEST_CHECK(CreateWithContents(name, "").ok()); + } + } + + // Seek to the start of the directory. + PosixError SeekStart() { + constexpr off_t kStartOfFile = 0; + off_t offset = lseek(fd_.get(), kStartOfFile, SEEK_SET); + if (offset < 0) { + return PosixError(errno, absl::StrCat("error seeking to ", kStartOfFile)); + } + if (offset != kStartOfFile) { + return PosixError(EINVAL, absl::StrCat("tried to seek to ", kStartOfFile, + " but got ", offset)); + } + return NoError(); + } + + // Call getdents multiple times, reading all dirents and calling f on each. + // f has the type signature PosixError f(T*). + // If f returns a non-OK error, so does ReadDirents. + template <typename F> + PosixError ReadDirents(DirentBufferType* dirents, F const& f) { + int n; + do { + dirents->Reset(); + + n = Getdents(dirents->Data(), dirents->Size()); + MaybeSave(); + if (n < 0) { + return PosixError(errno, "getdents"); + } + + for (auto d = dirents->Start(n); d; d = dirents->Next()) { + RETURN_IF_ERRNO(f(d)); + } + } while (n > 0); + + return NoError(); + } + + // Call Getdents successively and count all entries. + int ReadAndCountAllEntries(DirentBufferType* dirents) { + int found = 0; + + EXPECT_NO_ERRNO(ReadDirents(dirents, [&](LinuxDirentType* d) { + found++; + return NoError(); + })); + + return found; + } + + private: + TempPath dir_; + FileDescriptor fd_; +}; + +// GUnit TYPED_TEST_CASE does not allow multiple template parameters, so we +// must use explicit template specialization to set the syscall number. +template <> +int GetdentsTest<struct linux_dirent>::SyscallNum() { + return SYS_getdents; +} + +template <> +int GetdentsTest<struct linux_dirent64>::SyscallNum() { + return SYS_getdents64; +} + +// Test both legacy getdents and getdents64. +typedef ::testing::Types<struct linux_dirent, struct linux_dirent64> + GetdentsTypes; +TYPED_TEST_CASE(GetdentsTest, GetdentsTypes); + +// N.B. TYPED_TESTs require explicitly using this-> to access members of +// GetdentsTest, since we are inside of a derived class template. + +TYPED_TEST(GetdentsTest, VerifyEntries) { + typename TestFixture::DirentBufferType dirents(1024); + + this->FillDirectory(2); + + // Map of all the entries we expect to find. + std::map<std::string, bool> found; + found["."] = false; + found[".."] = false; + found["0"] = false; + found["1"] = false; + + EXPECT_NO_ERRNO(this->ReadDirents( + &dirents, [&](typename TestFixture::LinuxDirentType* d) { + auto kv = found.find(d->d_name); + EXPECT_NE(kv, found.end()) << "Unexpected file: " << d->d_name; + if (kv != found.end()) { + EXPECT_FALSE(kv->second); + } + found[d->d_name] = true; + return NoError(); + })); + + for (auto& kv : found) { + EXPECT_TRUE(kv.second) << "File not found: " << kv.first; + } +} + +TYPED_TEST(GetdentsTest, VerifyPadding) { + typename TestFixture::DirentBufferType dirents(1024); + + // Create files with names of length 1 through 16. + std::vector<std::string> files; + std::string filename; + for (int i = 0; i < 16; ++i) { + absl::StrAppend(&filename, "a"); + files.push_back(filename); + } + this->FillDirectoryWithFiles(files); + + // We expect to find all the files, plus '.' and '..'. + const int expect_found = 2 + files.size(); + int found = 0; + + EXPECT_NO_ERRNO(this->ReadDirents( + &dirents, [&](typename TestFixture::LinuxDirentType* d) { + EXPECT_EQ(d->d_reclen % 8, 0) + << "Dirent " << d->d_name + << " had reclen that was not byte aligned: " << d->d_name; + found++; + return NoError(); + })); + + // Make sure we found all the files. + EXPECT_EQ(found, expect_found); +} + +// For a small directory, the provided buffer should be large enough +// for all entries. +TYPED_TEST(GetdentsTest, SmallDir) { + // . and .. should be in an otherwise empty directory. + int expect = 2; + + // Add some actual files. + this->FillDirectory(2); + expect += 2; + + typename TestFixture::DirentBufferType dirents(256); + + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); +} + +// A directory with lots of files requires calling getdents multiple times. +TYPED_TEST(GetdentsTest, LargeDir) { + // . and .. should be in an otherwise empty directory. + int expect = 2; + + // Add some actual files. + this->FillDirectory(100); + expect += 100; + + typename TestFixture::DirentBufferType dirents(256); + + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); +} + +// If we lie about the size of the buffer, we should still be able to read the +// entries with the available space. +TYPED_TEST(GetdentsTest, PartialBuffer) { + // . and .. should be in an otherwise empty directory. + int expect = 2; + + // Add some actual files. + this->FillDirectory(100); + expect += 100; + + void* addr = mmap(0, 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ASSERT_NE(addr, MAP_FAILED); + + char* buf = reinterpret_cast<char*>(addr); + + // Guard page + EXPECT_THAT( + mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, PROT_NONE), + SyscallSucceeds()); + + // Limit space in buf to 256 bytes. + buf += kPageSize - 256; + + // Lie about the buffer. Even though we claim the buffer is 1 page, + // we should still get all of the dirents in the first 256 bytes. + typename TestFixture::DirentBufferType dirents(buf, 256, kPageSize); + + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); + + EXPECT_THAT(munmap(addr, 2 * kPageSize), SyscallSucceeds()); +} + +// Open many file descriptors, then scan through /proc/self/fd to find and close +// them all. (The latter is commonly used to handle races betweek fork/execve +// and the creation of unwanted non-O_CLOEXEC file descriptors.) This tests that +// getdents iterates correctly despite mutation of /proc/self/fd. +TYPED_TEST(GetdentsTest, ProcSelfFd) { + constexpr size_t kNfds = 10; + std::unordered_set<int> fds; + std::vector<FileDescriptor> fd_closers; + fd_closers.reserve(fds.size()); + for (int fd : fds) { + fd_closers.emplace_back(fd); + } + for (size_t i = 0; i < kNfds; i++) { + int fd; + ASSERT_THAT(fd = eventfd(0, 0), SyscallSucceeds()); + fds.insert(fd); + } + + const FileDescriptor proc_self_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/fd", O_RDONLY | O_DIRECTORY)); + + // Make the buffer very small since we want to iterate. + typename TestFixture::DirentBufferType dirents( + 2 * sizeof(typename TestFixture::LinuxDirentType)); + std::unordered_set<int> prev_fds; + while (true) { + dirents.Reset(); + int rv; + ASSERT_THAT(rv = RetryEINTR(syscall)(this->SyscallNum(), proc_self_fd.get(), + dirents.Data(), dirents.Size()), + SyscallSucceeds()); + if (rv == 0) { + break; + } + for (auto* d = dirents.Start(rv); d; d = dirents.Next()) { + int dfd; + if (!absl::SimpleAtoi(d->d_name, &dfd)) continue; + EXPECT_TRUE(prev_fds.insert(dfd).second) + << "Repeated observation of /proc/self/fd/" << dfd; + auto it = fds.find(dfd); + if (it != fds.end()) { + fds.erase(it); + EXPECT_THAT(close(dfd), SyscallSucceeds()); + } + } + } + + // Check that we closed every fd. + EXPECT_THAT(fds, ::testing::IsEmpty()); +} + +// Test that getdents returns ENOTDIR when called on a file. +TYPED_TEST(GetdentsTest, NotDir) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + typename TestFixture::DirentBufferType dirents(256); + EXPECT_THAT(RetryEINTR(syscall)(this->SyscallNum(), fd.get(), dirents.Data(), + dirents.Size()), + SyscallFailsWithErrno(ENOTDIR)); +} + +// Test that SEEK_SET to 0 causes getdents to re-read the entries. +TYPED_TEST(GetdentsTest, SeekResetsCursor) { + // . and .. should be in an otherwise empty directory. + int expect = 2; + + // Add some files to the directory. + this->FillDirectory(10); + expect += 10; + + typename TestFixture::DirentBufferType dirents(256); + + // We should get all the expected entries. + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); + + // Seek back to 0. + ASSERT_NO_ERRNO(this->SeekStart()); + + // We should get all the expected entries again. + EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents)); +} + +// Some tests using the glibc readdir interface. +TEST(ReaddirTest, OpenDir) { + DIR* dev; + ASSERT_THAT(dev = opendir("/dev"), NotNull()); + EXPECT_THAT(closedir(dev), SyscallSucceeds()); +} + +TEST(ReaddirTest, RootContainsBasicDirectories) { + EXPECT_THAT(ListDir("/", true), + IsPosixErrorOkAndHolds(IsSupersetOf( + {"bin", "dev", "etc", "lib", "proc", "sbin", "usr"}))); +} + +TEST(ReaddirTest, Bug24096713Dev) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev", true)); + EXPECT_THAT(contents, Not(IsEmpty())); +} + +TEST(ReaddirTest, Bug24096713ProcTid) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE( + ListDir(absl::StrCat("/proc/", syscall(SYS_gettid), "/"), true)); + EXPECT_THAT(contents, Not(IsEmpty())); +} + +TEST(ReaddirTest, Bug33429925Proc) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc", true)); + EXPECT_THAT(contents, Not(IsEmpty())); +} + +TEST(ReaddirTest, Bug35110122Root) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/", true)); + EXPECT_THAT(contents, Not(IsEmpty())); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/getrandom.cc b/test/syscalls/linux/getrandom.cc new file mode 100644 index 000000000..be5325497 --- /dev/null +++ b/test/syscalls/linux/getrandom.cc @@ -0,0 +1,61 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#ifndef SYS_getrandom +#if defined(__x86_64__) +#define SYS_getrandom 318 +#elif defined(__i386__) +#define SYS_getrandom 355 +#else +#error "Unknown architecture" +#endif +#endif // SYS_getrandom + +bool SomeByteIsNonZero(char* random_bytes, int length) { + for (int i = 0; i < length; i++) { + if (random_bytes[i] != 0) { + return true; + } + } + return false; +} + +TEST(GetrandomTest, IsRandom) { + // This test calls get_random and makes sure that the array is filled in with + // something that is non-zero. Perhaps we get back \x00\x00\x00\x00\x00.... as + // a random result, but it's so unlikely that we'll just ignore this. + char random_bytes[64] = {}; + int n = syscall(SYS_getrandom, random_bytes, 64, 0); + SKIP_IF(!IsRunningOnGvisor() && n < 0 && errno == ENOSYS); + EXPECT_THAT(n, SyscallSucceeds()); + EXPECT_GT(n, 0); // Some bytes should be returned. + EXPECT_TRUE(SomeByteIsNonZero(random_bytes, n)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/getrusage.cc b/test/syscalls/linux/getrusage.cc new file mode 100644 index 000000000..1ae603858 --- /dev/null +++ b/test/syscalls/linux/getrusage.cc @@ -0,0 +1,177 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(GetrusageTest, BasicFork) { + pid_t pid = fork(); + if (pid == 0) { + struct rusage rusage_self; + TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0); + struct rusage rusage_children; + TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0); + // The child has consumed some memory. + TEST_CHECK(rusage_self.ru_maxrss != 0); + // The child has no children of its own. + TEST_CHECK(rusage_children.ru_maxrss == 0); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds()); + struct rusage rusage_self; + ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds()); + struct rusage rusage_children; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds()); + // The parent has consumed some memory. + EXPECT_GT(rusage_self.ru_maxrss, 0); + // The child has consumed some memory, and because it has exited we can get + // its max RSS. + EXPECT_GT(rusage_children.ru_maxrss, 0); +} + +// Verifies that a process can get the max resident set size of its grandchild, +// i.e. that maxrss propagates correctly from children to waiting parents. +TEST(GetrusageTest, Grandchild) { + constexpr int kGrandchildSizeKb = 1024; + pid_t pid = fork(); + if (pid == 0) { + pid = fork(); + if (pid == 0) { + int flags = MAP_ANONYMOUS | MAP_POPULATE | MAP_PRIVATE; + void *addr = + mmap(nullptr, kGrandchildSizeKb * 1024, PROT_WRITE, flags, -1, 0); + TEST_PCHECK(addr != MAP_FAILED); + } else { + int status; + TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0) == pid); + } + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds()); + struct rusage rusage_self; + ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds()); + struct rusage rusage_children; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds()); + // The parent has consumed some memory. + EXPECT_GT(rusage_self.ru_maxrss, 0); + // The child should consume next to no memory, but the grandchild will + // consume at least 1MB. Verify that usage bubbles up to the grandparent. + EXPECT_GT(rusage_children.ru_maxrss, kGrandchildSizeKb); +} + +// Verifies that processes ignoring SIGCHLD do not have updated child maxrss +// updated. +TEST(GetrusageTest, IgnoreSIGCHLD) { + struct sigaction sa; + sa.sa_handler = SIG_IGN; + sa.sa_flags = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa)); + pid_t pid = fork(); + if (pid == 0) { + struct rusage rusage_self; + TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0); + // The child has consumed some memory. + TEST_CHECK(rusage_self.ru_maxrss != 0); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallFailsWithErrno(ECHILD)); + struct rusage rusage_self; + ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds()); + struct rusage rusage_children; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds()); + // The parent has consumed some memory. + EXPECT_GT(rusage_self.ru_maxrss, 0); + // The child's maxrss should not have propagated up. + EXPECT_EQ(rusage_children.ru_maxrss, 0); +} + +// Verifies that zombie processes do not update their parent's maxrss. Only +// reaped processes should do this. +TEST(GetrusageTest, IgnoreZombie) { + pid_t pid = fork(); + if (pid == 0) { + struct rusage rusage_self; + TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0); + struct rusage rusage_children; + TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0); + // The child has consumed some memory. + TEST_CHECK(rusage_self.ru_maxrss != 0); + // The child has no children of its own. + TEST_CHECK(rusage_children.ru_maxrss == 0); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + // Give the child time to exit. Because we don't call wait, the child should + // remain a zombie. + absl::SleepFor(absl::Seconds(5)); + struct rusage rusage_self; + ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds()); + struct rusage rusage_children; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds()); + // The parent has consumed some memory. + EXPECT_GT(rusage_self.ru_maxrss, 0); + // The child has consumed some memory, but hasn't been reaped. + EXPECT_EQ(rusage_children.ru_maxrss, 0); +} + +TEST(GetrusageTest, Wait4) { + pid_t pid = fork(); + if (pid == 0) { + struct rusage rusage_self; + TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0); + struct rusage rusage_children; + TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0); + // The child has consumed some memory. + TEST_CHECK(rusage_self.ru_maxrss != 0); + // The child has no children of its own. + TEST_CHECK(rusage_children.ru_maxrss == 0); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + struct rusage rusage_children; + int status; + ASSERT_THAT(RetryEINTR(wait4)(pid, &status, 0, &rusage_children), + SyscallSucceeds()); + // The child has consumed some memory, and because it has exited we can get + // its max RSS. + EXPECT_GT(rusage_children.ru_maxrss, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc new file mode 100644 index 000000000..62fc55c72 --- /dev/null +++ b/test/syscalls/linux/inotify.cc @@ -0,0 +1,1489 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <libgen.h> +#include <sys/inotify.h> +#include <sys/ioctl.h> + +#include <list> +#include <string> +#include <vector> + +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using ::absl::StreamFormat; +using ::absl::StrFormat; + +constexpr int kBufSize = 1024; + +// C++-friendly version of struct inotify_event. +struct Event { + int32_t wd; + uint32_t mask; + uint32_t cookie; + uint32_t len; + std::string name; + + Event(uint32_t mask, int32_t wd, absl::string_view name, uint32_t cookie) + : wd(wd), + mask(mask), + cookie(cookie), + len(name.size()), + name(std::string(name)) {} + Event(uint32_t mask, int32_t wd, absl::string_view name) + : Event(mask, wd, name, 0) {} + Event(uint32_t mask, int32_t wd) : Event(mask, wd, "", 0) {} + Event() : Event(0, 0, "", 0) {} +}; + +// Prints the symbolic name for a struct inotify_event's 'mask' field. +std::string FlagString(uint32_t flags) { + std::vector<std::string> names; + +#define EMIT(target) \ + if (flags & target) { \ + names.push_back(#target); \ + flags &= ~target; \ + } + + EMIT(IN_ACCESS); + EMIT(IN_ATTRIB); + EMIT(IN_CLOSE_WRITE); + EMIT(IN_CLOSE_NOWRITE); + EMIT(IN_CREATE); + EMIT(IN_DELETE); + EMIT(IN_DELETE_SELF); + EMIT(IN_MODIFY); + EMIT(IN_MOVE_SELF); + EMIT(IN_MOVED_FROM); + EMIT(IN_MOVED_TO); + EMIT(IN_OPEN); + + EMIT(IN_DONT_FOLLOW); + EMIT(IN_EXCL_UNLINK); + EMIT(IN_ONESHOT); + EMIT(IN_ONLYDIR); + + EMIT(IN_IGNORED); + EMIT(IN_ISDIR); + EMIT(IN_Q_OVERFLOW); + EMIT(IN_UNMOUNT); + +#undef EMIT + + // If we have anything left over at the end, print it as a hex value. + if (flags) { + names.push_back(absl::StrCat("0x", absl::Hex(flags))); + } + + return absl::StrJoin(names, "|"); +} + +std::string DumpEvent(const Event& event) { + return StrFormat( + "%s, wd=%d%s%s", FlagString(event.mask), event.wd, + (event.len > 0) ? StrFormat(", name=%s", event.name) : "", + (event.cookie > 0) ? StrFormat(", cookie=%ud", event.cookie) : ""); +} + +std::string DumpEvents(const std::vector<Event>& events, int indent_level) { + std::stringstream ss; + ss << StreamFormat("%d event%s:\n", events.size(), + (events.size() > 1) ? "s" : ""); + int i = 0; + for (const Event& ev : events) { + ss << StreamFormat("%sevents[%d]: %s\n", std::string(indent_level, '\t'), i++, + DumpEvent(ev)); + } + return ss.str(); +} + +// A matcher which takes an expected list of events to match against another +// list of inotify events, in order. This is similar to the ElementsAre matcher, +// but displays more informative messages on mismatch. +class EventsAreMatcher + : public ::testing::MatcherInterface<std::vector<Event>> { + public: + explicit EventsAreMatcher(std::vector<Event> references) + : references_(std::move(references)) {} + + bool MatchAndExplain( + std::vector<Event> events, + ::testing::MatchResultListener* const listener) const override { + if (references_.size() != events.size()) { + *listener << StreamFormat("\n\tCount mismatch, got %s", + DumpEvents(events, 2)); + return false; + } + + bool success = true; + for (unsigned int i = 0; i < references_.size(); ++i) { + const Event& reference = references_[i]; + const Event& target = events[i]; + + if (target.mask != reference.mask || target.wd != reference.wd || + target.name != reference.name || target.cookie != reference.cookie) { + *listener << StreamFormat("\n\tMismatch at index %d, want %s, got %s,", + i, DumpEvent(reference), DumpEvent(target)); + success = false; + } + } + + if (!success) { + *listener << StreamFormat("\n\tIn total of %s", DumpEvents(events, 2)); + } + return success; + } + + void DescribeTo(::std::ostream* const os) const override { + *os << StreamFormat("%s", DumpEvents(references_, 1)); + } + + void DescribeNegationTo(::std::ostream* const os) const override { + *os << StreamFormat("mismatch from %s", DumpEvents(references_, 1)); + } + + private: + std::vector<Event> references_; +}; + +::testing::Matcher<std::vector<Event>> Are(std::vector<Event> events) { + return MakeMatcher(new EventsAreMatcher(std::move(events))); +} + +// Similar to the EventsAre matcher, but the order of events are ignored. +class UnorderedEventsAreMatcher + : public ::testing::MatcherInterface<std::vector<Event>> { + public: + explicit UnorderedEventsAreMatcher(std::vector<Event> references) + : references_(std::move(references)) {} + + bool MatchAndExplain( + std::vector<Event> events, + ::testing::MatchResultListener* const listener) const override { + if (references_.size() != events.size()) { + *listener << StreamFormat("\n\tCount mismatch, got %s", + DumpEvents(events, 2)); + return false; + } + + std::vector<Event> unmatched(references_); + + for (const Event& candidate : events) { + for (auto it = unmatched.begin(); it != unmatched.end();) { + const Event& reference = *it; + if (candidate.mask == reference.mask && candidate.wd == reference.wd && + candidate.name == reference.name && + candidate.cookie == reference.cookie) { + it = unmatched.erase(it); + break; + } else { + ++it; + } + } + } + + // Anything left unmatched? If so, the matcher fails. + if (!unmatched.empty()) { + *listener << StreamFormat("\n\tFailed to match %s", + DumpEvents(unmatched, 2)); + *listener << StreamFormat("\n\tIn total of %s", DumpEvents(events, 2)); + return false; + } + + return true; + } + + void DescribeTo(::std::ostream* const os) const override { + *os << StreamFormat("unordered %s", DumpEvents(references_, 1)); + } + + void DescribeNegationTo(::std::ostream* const os) const override { + *os << StreamFormat("mismatch from unordered %s", + DumpEvents(references_, 1)); + } + + private: + std::vector<Event> references_; +}; + +::testing::Matcher<std::vector<Event>> AreUnordered(std::vector<Event> events) { + return MakeMatcher(new UnorderedEventsAreMatcher(std::move(events))); +} + +// Reads events from an inotify fd until either EOF, or read returns EAGAIN. +PosixErrorOr<std::vector<Event>> DrainEvents(int fd) { + std::vector<Event> events; + while (true) { + int events_size = 0; + if (ioctl(fd, FIONREAD, &events_size) < 0) { + return PosixError(errno, "ioctl(FIONREAD) failed on inotify fd"); + } + // Deliberately use a buffer that is larger than necessary, expecting to + // only read events_size bytes. + std::vector<char> buf(events_size + kBufSize, 0); + const ssize_t readlen = read(fd, buf.data(), buf.size()); + MaybeSave(); + // Read error? + if (readlen < 0) { + if (errno == EAGAIN) { + // If EAGAIN, no more events at the moment. Return what we have so far. + return events; + } + // Some other read error. Return an error. Right now if we encounter this + // after already reading some events, they get lost. However, we don't + // expect to see any error, and the calling test will fail immediately if + // we signal an error anyways, so this is acceptable. + return PosixError(errno, "read() failed on inotify fd"); + } + if (readlen < static_cast<int>(sizeof(struct inotify_event))) { + // Impossibly short read. + return PosixError( + EIO, + "read() didn't return enough data represent even a single event"); + } + if (readlen != events_size) { + return PosixError(EINVAL, absl::StrCat("read ", readlen, + " bytes, expected ", events_size)); + } + if (readlen == 0) { + // EOF. + return events; + } + + // Normal read. + const char* cursor = buf.data(); + while (cursor < (buf.data() + readlen)) { + struct inotify_event event = {}; + memcpy(&event, cursor, sizeof(struct inotify_event)); + + Event ev; + ev.wd = event.wd; + ev.mask = event.mask; + ev.cookie = event.cookie; + ev.len = event.len; + if (event.len > 0) { + TEST_CHECK(static_cast<int>(sizeof(struct inotify_event) + event.len) <= + readlen); + ev.name = + std::string(cursor + offsetof(struct inotify_event, name)); // NOLINT + // Name field should always be smaller than event.len, otherwise we have + // a buffer overflow. The two sizes aren't equal because the std::string + // constructor will stop at the first null byte, while event.name may be + // padded up to event.len using multiple null bytes. + TEST_CHECK(ev.name.size() <= event.len); + } + + events.push_back(ev); + cursor += sizeof(struct inotify_event) + event.len; + } + } +} + +PosixErrorOr<FileDescriptor> InotifyInit1(int flags) { + int fd; + EXPECT_THAT(fd = inotify_init1(flags), SyscallSucceeds()); + if (fd < 0) { + return PosixError(errno, "inotify_init1() failed"); + } + return FileDescriptor(fd); +} + +PosixErrorOr<int> InotifyAddWatch(int fd, const std::string& path, uint32_t mask) { + int wd; + EXPECT_THAT(wd = inotify_add_watch(fd, path.c_str(), mask), + SyscallSucceeds()); + if (wd < 0) { + return PosixError(errno, "inotify_add_watch() failed"); + } + return wd; +} + +TEST(Inotify, InotifyFdNotWritable) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + EXPECT_THAT(write(fd.get(), "x", 1), SyscallFailsWithErrno(EBADF)); +} + +TEST(Inotify, NonBlockingReadReturnsEagain) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + std::vector<char> buf(kBufSize, 0); + + // The read below should return fail with EAGAIN because there is no data to + // read and we've specified IN_NONBLOCK. We're guaranteed that there is no + // data to read because we haven't registered any watches yet. + EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(Inotify, AddWatchOnInvalidFdFails) { + // Garbage fd. + EXPECT_THAT(inotify_add_watch(-1, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(inotify_add_watch(1337, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EBADF)); + + // Non-inotify fds. + EXPECT_THAT(inotify_add_watch(0, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(inotify_add_watch(1, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(inotify_add_watch(2, "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EINVAL)); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/tmp", O_RDONLY)); + EXPECT_THAT(inotify_add_watch(fd.get(), "/tmp", IN_ALL_EVENTS), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(Inotify, RemovingWatchGeneratesEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds()); + + // Read events, ensure the first event is IN_IGNORED. + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_IGNORED, wd)})); +} + +TEST(Inotify, CanDeleteFileAfterRemovingWatch) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds()); + file1.reset(); +} + +TEST(Inotify, CanRemoveWatchAfterDeletingFile) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + file1.reset(); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd), Event(IN_DELETE_SELF, wd), + Event(IN_IGNORED, wd)})); + + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallFailsWithErrno(EINVAL)); +} + +TEST(Inotify, DuplicateWatchRemovalFails) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds()); + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallFailsWithErrno(EINVAL)); +} + +TEST(Inotify, ConcurrentFileDeletionAndWatchRemoval) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const std::string filename = NewTempAbsPathInDir(root.path()); + + auto file_create_delete = [filename]() { + const DisableSave ds; // Too expensive. + for (int i = 0; i < 100; ++i) { + FileDescriptor file_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT, S_IRUSR | S_IWUSR)); + file_fd.reset(); // Close before unlinking (although save is disabled). + EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds()); + } + }; + + const int shared_fd = fd.get(); // We need to pass it to the thread. + auto add_remove_watch = [shared_fd, filename]() { + for (int i = 0; i < 100; ++i) { + int wd = inotify_add_watch(shared_fd, filename.c_str(), IN_ALL_EVENTS); + MaybeSave(); + if (wd != -1) { + // Watch added successfully, try removal. + if (inotify_rm_watch(shared_fd, wd)) { + // If removal fails, the only acceptable reason is if the wd + // is invalid, which will be the case if we try to remove + // the watch after the file has been deleted. + EXPECT_EQ(errno, EINVAL); + } + } else { + // Add watch failed, this should only fail if the target file doesn't + // exist. + EXPECT_EQ(errno, ENOENT); + } + } + }; + + ScopedThread t1(file_create_delete); + ScopedThread t2(add_remove_watch); +} + +TEST(Inotify, DeletingChildGeneratesEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + const std::string file1_path = file1.reset(); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + AreUnordered({Event(IN_ATTRIB, file1_wd), Event(IN_DELETE_SELF, file1_wd), + Event(IN_IGNORED, file1_wd), + Event(IN_DELETE, root_wd, Basename(file1_path))})); +} + +TEST(Inotify, CreatingFileGeneratesEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + // Create a new file in the directory. + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + // The library function we use to create the new file opens it for writing to + // create it and sets permissions on it, so we expect the three extra events. + ASSERT_THAT(events, Are({Event(IN_CREATE, wd, Basename(file1.path())), + Event(IN_OPEN, wd, Basename(file1.path())), + Event(IN_CLOSE_WRITE, wd, Basename(file1.path())), + Event(IN_ATTRIB, wd, Basename(file1.path()))})); +} + +TEST(Inotify, ReadingFileGeneratesAccessEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + char buf; + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ACCESS, wd, Basename(file1.path()))})); +} + +TEST(Inotify, WritingFileGeneratesModifyEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + const std::string data = "some content"; + EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()), + SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_MODIFY, wd, Basename(file1.path()))})); +} + +TEST(Inotify, WatchSetAfterOpenReportsCloseFdEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + FileDescriptor file1_fd_writable = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + FileDescriptor file1_fd_not_writable = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + file1_fd_writable.reset(); // Close file1_fd_writable. + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_CLOSE_WRITE, wd, Basename(file1.path()))})); + + file1_fd_not_writable.reset(); // Close file1_fd_not_writable. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, + Are({Event(IN_CLOSE_NOWRITE, wd, Basename(file1.path()))})); +} + +TEST(Inotify, ChildrenDeletionInWatchedDirGeneratesEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + TempPath dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + const std::string file1_path = file1.reset(); + const std::string dir1_path = dir1.release(); + EXPECT_THAT(rmdir(dir1_path.c_str()), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + ASSERT_THAT(events, + Are({Event(IN_DELETE, wd, Basename(file1_path)), + Event(IN_DELETE | IN_ISDIR, wd, Basename(dir1_path))})); +} + +TEST(Inotify, WatchTargetDeletionGeneratesEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + EXPECT_THAT(rmdir(root.path().c_str()), SyscallSucceeds()); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_DELETE_SELF, wd), Event(IN_IGNORED, wd)})); +} + +TEST(Inotify, MoveGeneratesEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const TempPath dir1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + const TempPath dir2 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int dir1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), dir1.path(), IN_ALL_EVENTS)); + const int dir2_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), dir2.path(), IN_ALL_EVENTS)); + // Test move from root -> root. + std::string newpath = NewTempAbsPathInDir(root.path()); + std::string oldpath = file1.release(); + EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds()); + file1.reset(newpath); + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie), + Event(IN_MOVED_TO, root_wd, Basename(newpath), events[1].cookie)})); + EXPECT_NE(events[0].cookie, 0); + EXPECT_EQ(events[0].cookie, events[1].cookie); + uint32_t last_cookie = events[0].cookie; + + // Test move from root -> root/dir1. + newpath = NewTempAbsPathInDir(dir1.path()); + oldpath = file1.release(); + EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds()); + file1.reset(newpath); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie), + Event(IN_MOVED_TO, dir1_wd, Basename(newpath), events[1].cookie)})); + // Cookies should be distinct between distinct rename events. + EXPECT_NE(events[0].cookie, last_cookie); + EXPECT_EQ(events[0].cookie, events[1].cookie); + last_cookie = events[0].cookie; + + // Test move from root/dir1 -> root/dir2. + newpath = NewTempAbsPathInDir(dir2.path()); + oldpath = file1.release(); + EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds()); + file1.reset(newpath); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_MOVED_FROM, dir1_wd, Basename(oldpath), events[0].cookie), + Event(IN_MOVED_TO, dir2_wd, Basename(newpath), events[1].cookie)})); + EXPECT_NE(events[0].cookie, last_cookie); + EXPECT_EQ(events[0].cookie, events[1].cookie); + last_cookie = events[0].cookie; +} + +TEST(Inotify, MoveWatchedTargetGeneratesEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + const std::string newpath = NewTempAbsPathInDir(root.path()); + const std::string oldpath = file1.release(); + EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds()); + file1.reset(newpath); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie), + Event(IN_MOVED_TO, root_wd, Basename(newpath), events[1].cookie), + // Self move events do not have a cookie. + Event(IN_MOVE_SELF, file1_wd)})); + EXPECT_NE(events[0].cookie, 0); + EXPECT_EQ(events[0].cookie, events[1].cookie); +} + +TEST(Inotify, CoalesceEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + // Read the file a few times. This will would generate multiple IN_ACCESS + // events but they should get coalesced to a single event. + char buf; + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + // Use the close event verify that we haven't simply left the additional + // IN_ACCESS events unread. + file1_fd.reset(); // Close file1_fd. + + const std::string file1_name = std::string(Basename(file1.path())); + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ACCESS, wd, file1_name), + Event(IN_CLOSE_NOWRITE, wd, file1_name)})); + + // Now let's try interleaving other events into a stream of repeated events. + file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds()); + EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds()); + EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + file1_fd.reset(); // Close the file. + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_OPEN, wd, file1_name), Event(IN_ACCESS, wd, file1_name), + Event(IN_MODIFY, wd, file1_name), Event(IN_ACCESS, wd, file1_name), + Event(IN_CLOSE_WRITE, wd, file1_name)})); + + // Ensure events aren't coalesced if they are from different files. + const TempPath file2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + // Discard events resulting from creation of file2. + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + FileDescriptor file2_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file2.path(), O_RDONLY)); + + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file2_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + // Close both files. + file1_fd.reset(); + file2_fd.reset(); + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + const std::string file2_name = std::string(Basename(file2.path())); + ASSERT_THAT( + events, + Are({Event(IN_OPEN, wd, file1_name), Event(IN_OPEN, wd, file2_name), + Event(IN_ACCESS, wd, file1_name), Event(IN_ACCESS, wd, file2_name), + Event(IN_ACCESS, wd, file1_name), + Event(IN_CLOSE_NOWRITE, wd, file1_name), + Event(IN_CLOSE_NOWRITE, wd, file2_name)})); +} + +TEST(Inotify, ClosingInotifyFdWithoutRemovingWatchesWorks) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + // Note: The check on close will happen in FileDescriptor::~FileDescriptor(). +} + +TEST(Inotify, NestedWatches) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + // Read from file1. This should generate an event for both watches. + char buf; + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ACCESS, root_wd, Basename(file1.path())), + Event(IN_ACCESS, file1_wd)})); +} + +TEST(Inotify, ConcurrentThreadsGeneratingEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + std::vector<TempPath> files; + files.reserve(10); + for (int i = 0; i < 10; i++) { + files.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode))); + } + + auto test_thread = [&files]() { + uint32_t seed = time(nullptr); + for (int i = 0; i < 20; i++) { + const TempPath& file = files[rand_r(&seed) % files.size()]; + const FileDescriptor file_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY)); + TEST_PCHECK(write(file_fd.get(), "x", 1) == 1); + } + }; + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + std::list<ScopedThread> threads; + for (int i = 0; i < 3; i++) { + threads.emplace_back(test_thread); + } + for (auto& t : threads) { + t.Join(); + } + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + // 3 threads doing 20 iterations, 3 events per iteration (open, write, + // close). However, some events may be coalesced, and we can't reliably + // predict how they'll be coalesced since the test threads aren't + // synchronized. We can only check that we aren't getting unexpected events. + for (const Event& ev : events) { + EXPECT_NE(ev.mask & (IN_OPEN | IN_MODIFY | IN_CLOSE_WRITE), 0); + } +} + +TEST(Inotify, ReadWithTooSmallBufferFails) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + // Open the file to queue an event. This event will not have a filename, so + // reading from the inotify fd should return sizeof(struct inotify_event) + // bytes of data. + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + std::vector<char> buf(kBufSize, 0); + ssize_t readlen; + + // Try a buffer too small to hold any potential event. This is rejected + // outright without the event being dequeued. + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event) - 1), + SyscallFailsWithErrno(EINVAL)); + // Try a buffer just large enough. This should succeeed. + EXPECT_THAT( + readlen = read(fd.get(), buf.data(), sizeof(struct inotify_event)), + SyscallSucceeds()); + EXPECT_EQ(readlen, sizeof(struct inotify_event)); + // Event queue is now empty, the next read should return EAGAIN. + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)), + SyscallFailsWithErrno(EAGAIN)); + + // Now put a watch on the directory, so that generated events contain a name. + EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds()); + + // Drain the event generated from the watch removal. + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + file1_fd.reset(); // Close file to generate an event. + + // Try a buffer too small to hold any event and one too small to hold an event + // with a name. These should both fail without consuming the event. + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event) - 1), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)), + SyscallFailsWithErrno(EINVAL)); + // Now try with a large enough buffer. This should return the one event. + EXPECT_THAT(readlen = read(fd.get(), buf.data(), buf.size()), + SyscallSucceeds()); + EXPECT_GE(readlen, + sizeof(struct inotify_event) + Basename(file1.path()).size()); + // With the single event read, the queue should once again be empty. + EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(Inotify, BlockingReadOnInotifyFd) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + // Spawn a thread performing a blocking read for new events on the inotify fd. + std::vector<char> buf(kBufSize, 0); + const int shared_fd = fd.get(); // The thread needs it. + ScopedThread t([shared_fd, &buf]() { + ssize_t readlen; + EXPECT_THAT(readlen = read(shared_fd, buf.data(), buf.size()), + SyscallSucceeds()); + }); + + // Perform a read on the watched file, which should generate an IN_ACCESS + // event, unblocking the event_reader thread. + char c; + EXPECT_THAT(read(file1_fd.get(), &c, 1), SyscallSucceeds()); + + // Wait for the thread to read the event and exit. + t.Join(); + + // Make sure the event we got back is sane. + uint32_t event_mask; + memcpy(&event_mask, buf.data() + offsetof(struct inotify_event, mask), + sizeof(event_mask)); + EXPECT_EQ(event_mask, IN_ACCESS); +} + +TEST(Inotify, WatchOnRelativePath) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + + // Change working directory to root. + const char* old_working_dir = get_current_dir_name(); + EXPECT_THAT(chdir(root.path().c_str()), SyscallSucceeds()); + + // Add a watch on file1 with a relative path. + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), std::string(Basename(file1.path())), IN_ALL_EVENTS)); + + // Perform a read on file1, this should generate an IN_ACCESS event. + char c; + EXPECT_THAT(read(file1_fd.get(), &c, 1), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_ACCESS, wd)})); + + // Explicitly reset the working directory so that we don't continue to + // reference "root". Once the test ends, "root" will get unlinked. If we + // continue to hold a reference, random save/restore tests can fail if a save + // is triggered after "root" is unlinked; we can't save deleted fs objects + // with active references. + EXPECT_THAT(chdir(old_working_dir), SyscallSucceeds()); +} + +TEST(Inotify, ZeroLengthReadWriteDoesNotGenerateEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const char kContent[] = "some content"; + TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), kContent, TempPath::kDefaultFileMode)); + const int kContentSize = sizeof(kContent) - 1; + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + std::vector<char> buf(kContentSize, 0); + // Read all available data. + ssize_t readlen; + EXPECT_THAT(readlen = read(file1_fd.get(), buf.data(), kContentSize), + SyscallSucceeds()); + EXPECT_EQ(readlen, kContentSize); + // Drain all events and make sure we got the IN_ACCESS for the read. + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_ACCESS, wd, Basename(file1.path()))})); + + // Now try read again. This should be a 0-length read, since we're at EOF. + char c; + EXPECT_THAT(readlen = read(file1_fd.get(), &c, 1), SyscallSucceeds()); + EXPECT_EQ(readlen, 0); + // We should have no new events. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_TRUE(events.empty()); + + // Try issuing a zero-length read. + EXPECT_THAT(readlen = read(file1_fd.get(), &c, 0), SyscallSucceeds()); + EXPECT_EQ(readlen, 0); + // We should have no new events. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_TRUE(events.empty()); + + // Try issuing a zero-length write. + ssize_t writelen; + EXPECT_THAT(writelen = write(file1_fd.get(), &c, 0), SyscallSucceeds()); + EXPECT_EQ(writelen, 0); + // We should have no new events. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_TRUE(events.empty()); +} + +TEST(Inotify, ChmodGeneratesAttribEvent_NoRandomSave) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor root_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(root.path(), O_RDONLY)); + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + auto verify_chmod_events = [&]() { + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ATTRIB, root_wd, Basename(file1.path())), + Event(IN_ATTRIB, file1_wd)})); + }; + + // Don't do cooperative S/R tests for any of the {f}chmod* syscalls below, the + // test will always fail because nodes cannot be saved when they have stricted + // permissions than the original host node. + const DisableSave ds; + + // Chmod. + ASSERT_THAT(chmod(file1.path().c_str(), S_IWGRP), SyscallSucceeds()); + verify_chmod_events(); + + // Fchmod. + ASSERT_THAT(fchmod(file1_fd.get(), S_IRGRP | S_IWGRP), SyscallSucceeds()); + verify_chmod_events(); + + // Fchmodat. + const std::string file1_basename = std::string(Basename(file1.path())); + ASSERT_THAT(fchmodat(root_fd.get(), file1_basename.c_str(), S_IWGRP, 0), + SyscallSucceeds()); + verify_chmod_events(); +} + +TEST(Inotify, TruncateGeneratesModifyEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + auto verify_truncate_events = [&]() { + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_MODIFY, root_wd, Basename(file1.path())), + Event(IN_MODIFY, file1_wd)})); + }; + + // Truncate. + EXPECT_THAT(truncate(file1.path().c_str(), 4096), SyscallSucceeds()); + verify_truncate_events(); + + // Ftruncate. + EXPECT_THAT(ftruncate(file1_fd.get(), 8192), SyscallSucceeds()); + verify_truncate_events(); + + // No events if truncate fails. + EXPECT_THAT(ftruncate(file1_fd.get(), -1), SyscallFailsWithErrno(EINVAL)); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({})); +} + +TEST(Inotify, GetdentsGeneratesAccessEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + // This internally calls getdents(2). We also expect to see an open/close + // event for the dirfd. + ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false)); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + // Linux only seems to generate access events on getdents() on some + // calls. Allow the test to pass even if it isn't generated. gVisor will + // always generate the IN_ACCESS event so the test will at least ensure gVisor + // behaves reasonably. + int i = 0; + EXPECT_EQ(events[i].mask, IN_OPEN | IN_ISDIR); + ++i; + if (IsRunningOnGvisor()) { + EXPECT_EQ(events[i].mask, IN_ACCESS | IN_ISDIR); + ++i; + } else { + if (events[i].mask == (IN_ACCESS | IN_ISDIR)) { + // Skip over the IN_ACCESS event on Linux, it only shows up some of the + // time so we can't assert its existence. + ++i; + } + } + EXPECT_EQ(events[i].mask, IN_CLOSE_NOWRITE | IN_ISDIR); +} + +TEST(Inotify, MknodGeneratesCreateEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + const TempPath file1(root.path() + "/file1"); + const int rc = mknod(file1.path().c_str(), S_IFREG, 0); + // mknod(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0); + ASSERT_THAT(rc, SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_CREATE, wd, Basename(file1.path()))})); +} + +TEST(Inotify, SymlinkGeneratesCreateEvent) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const TempPath link1(NewTempAbsPathInDir(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + ASSERT_THAT(symlink(file1.path().c_str(), link1.path().c_str()), + SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + + ASSERT_THAT(events, Are({Event(IN_CREATE, root_wd, Basename(link1.path()))})); +} + +TEST(Inotify, LinkGeneratesAttribAndCreateEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const TempPath link1(root.path() + "/link1"); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + const int rc = link(file1.path().c_str(), link1.path().c_str()); + // link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0 && errno == EPERM); + ASSERT_THAT(rc, SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ATTRIB, file1_wd), + Event(IN_CREATE, root_wd, Basename(link1.path()))})); +} + +TEST(Inotify, HardlinksReuseSameWatch) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + TempPath link1(root.path() + "/link1"); + const int rc = link(file1.path().c_str(), link1.path().c_str()); + // link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0 && errno == EPERM); + ASSERT_THAT(rc, SyscallSucceeds()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + const int link1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), link1.path(), IN_ALL_EVENTS)); + + // The watch descriptors for watches on different links to the same file + // should be identical. + EXPECT_NE(root_wd, file1_wd); + EXPECT_EQ(file1_wd, link1_wd); + + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, + AreUnordered({Event(IN_OPEN, root_wd, Basename(file1.path())), + Event(IN_OPEN, file1_wd)})); + + // For the next step, we want to ensure all fds to the file are closed. Do + // that now and drain the resulting events. + file1_fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, + Are({Event(IN_CLOSE_WRITE, root_wd, Basename(file1.path())), + Event(IN_CLOSE_WRITE, file1_wd)})); + + // Try removing the link and let's see what events show up. Note that after + // this, we still have a link to the file so the watch shouldn't be + // automatically removed. + const std::string link1_path = link1.reset(); + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_ATTRIB, link1_wd), + Event(IN_DELETE, root_wd, Basename(link1_path))})); + + // Now remove the other link. Since this is the last link to the file, the + // watch should be automatically removed. + const std::string file1_path = file1.reset(); + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + AreUnordered({Event(IN_ATTRIB, file1_wd), Event(IN_DELETE_SELF, file1_wd), + Event(IN_IGNORED, file1_wd), + Event(IN_DELETE, root_wd, Basename(file1_path))})); +} + +TEST(Inotify, MkdirGeneratesCreateEventWithDirFlag) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + const TempPath dir1(NewTempAbsPathInDir(root.path())); + ASSERT_THAT(mkdir(dir1.path().c_str(), 0777), SyscallSucceeds()); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT( + events, + Are({Event(IN_CREATE | IN_ISDIR, root_wd, Basename(dir1.path()))})); +} + +TEST(Inotify, MultipleInotifyInstancesAndWatchesAllGetEvents) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + constexpr int kNumFds = 30; + std::vector<FileDescriptor> inotify_fds; + + for (int i = 0; i < kNumFds; ++i) { + const DisableSave ds; // Too expensive. + inotify_fds.emplace_back( + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK))); + const FileDescriptor& fd = inotify_fds[inotify_fds.size() - 1]; // Back. + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + } + + const std::string data = "some content"; + EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()), + SyscallSucceeds()); + + for (const FileDescriptor& fd : inotify_fds) { + const DisableSave ds; // Too expensive. + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + if (events.size() >= 2) { + EXPECT_EQ(events[0].mask, IN_MODIFY); + EXPECT_EQ(events[0].wd, 1); + EXPECT_EQ(events[0].name, Basename(file1.path())); + EXPECT_EQ(events[1].mask, IN_MODIFY); + EXPECT_EQ(events[1].wd, 2); + EXPECT_EQ(events[1].name, ""); + } + } +} + +TEST(Inotify, EventsGoUpAtMostOneLevel) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath dir1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + const int dir1_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), dir1.path(), IN_ALL_EVENTS)); + + const std::string file1_path = file1.reset(); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_DELETE, dir1_wd, Basename(file1_path))})); +} + +TEST(Inotify, DuplicateWatchReturnsSameWatchDescriptor) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd1 = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + const int wd2 = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + EXPECT_EQ(wd1, wd2); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + // The watch shouldn't be duplicated, we only expect one event. + ASSERT_THAT(events, Are({Event(IN_OPEN, wd1)})); +} + +TEST(Inotify, UnmatchedEventsAreDiscarded) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(fd.get(), file1.path(), IN_ACCESS)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + + const std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + // We only asked for access events, the open event should be discarded. + ASSERT_THAT(events, Are({})); +} + +TEST(Inotify, AddWatchWithInvalidEventMaskFails) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + EXPECT_THAT(inotify_add_watch(fd.get(), root.path().c_str(), 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(Inotify, AddWatchOnInvalidPathFails) { + const TempPath nonexistent(NewTempAbsPath()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + // Non-existent path. + EXPECT_THAT( + inotify_add_watch(fd.get(), nonexistent.path().c_str(), IN_CREATE), + SyscallFailsWithErrno(ENOENT)); + + // Garbage path pointer. + EXPECT_THAT(inotify_add_watch(fd.get(), nullptr, IN_CREATE), + SyscallFailsWithErrno(EFAULT)); +} + +TEST(Inotify, InOnlyDirFlagRespected) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + EXPECT_THAT( + inotify_add_watch(fd.get(), root.path().c_str(), IN_ACCESS | IN_ONLYDIR), + SyscallSucceeds()); + + EXPECT_THAT( + inotify_add_watch(fd.get(), file1.path().c_str(), IN_ACCESS | IN_ONLYDIR), + SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(Inotify, MaskAddMergesWithExistingEventMask) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_OPEN | IN_CLOSE_WRITE)); + + const std::string data = "some content"; + EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()), + SyscallSucceeds()); + + // We shouldn't get any events, since IN_MODIFY wasn't in the event mask. + std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({})); + + // Add IN_MODIFY to event mask. + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_MODIFY | IN_MASK_ADD)); + + EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()), + SyscallSucceeds()); + + // This time we should get the modify event. + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_MODIFY, wd)})); + + // Now close the fd. If the modify event was added to the event mask rather + // than replacing the event mask we won't get the close event. + file1_fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_CLOSE_WRITE, wd)})); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ioctl.cc b/test/syscalls/linux/ioctl.cc new file mode 100644 index 000000000..bee0ba1b3 --- /dev/null +++ b/test/syscalls/linux/ioctl.cc @@ -0,0 +1,375 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <errno.h> +#include <fcntl.h> +#include <net/if.h> +#include <netdb.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +bool CheckNonBlocking(int fd) { + int ret = fcntl(fd, F_GETFL, 0); + TEST_CHECK(ret != -1); + return (ret & O_NONBLOCK) == O_NONBLOCK; +} + +bool CheckCloExec(int fd) { + int ret = fcntl(fd, F_GETFD, 0); + TEST_CHECK(ret != -1); + return (ret & FD_CLOEXEC) == FD_CLOEXEC; +} + +class IoctlTest : public ::testing::Test { + protected: + void SetUp() override { + ASSERT_THAT(fd_ = open("/dev/null", O_RDONLY), SyscallSucceeds()); + } + + void TearDown() override { + if (fd_ >= 0) { + ASSERT_THAT(close(fd_), SyscallSucceeds()); + fd_ = -1; + } + } + + int fd() const { return fd_; } + + private: + int fd_ = -1; +}; + +TEST_F(IoctlTest, BadFileDescriptor) { + EXPECT_THAT(ioctl(-1 /* fd */, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(IoctlTest, InvalidControlNumber) { + EXPECT_THAT(ioctl(STDOUT_FILENO, 0), SyscallFailsWithErrno(ENOTTY)); +} + +TEST_F(IoctlTest, FIONBIOSucceeds) { + EXPECT_FALSE(CheckNonBlocking(fd())); + int set = 1; + EXPECT_THAT(ioctl(fd(), FIONBIO, &set), SyscallSucceeds()); + EXPECT_TRUE(CheckNonBlocking(fd())); + set = 0; + EXPECT_THAT(ioctl(fd(), FIONBIO, &set), SyscallSucceeds()); + EXPECT_FALSE(CheckNonBlocking(fd())); +} + +TEST_F(IoctlTest, FIONBIOFails) { + EXPECT_THAT(ioctl(fd(), FIONBIO, nullptr), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(IoctlTest, FIONCLEXSucceeds) { + EXPECT_THAT(ioctl(fd(), FIONCLEX), SyscallSucceeds()); + EXPECT_FALSE(CheckCloExec(fd())); +} + +TEST_F(IoctlTest, FIOCLEXSucceeds) { + EXPECT_THAT(ioctl(fd(), FIOCLEX), SyscallSucceeds()); + EXPECT_TRUE(CheckCloExec(fd())); +} + +TEST_F(IoctlTest, FIOASYNCFails) { + EXPECT_THAT(ioctl(fd(), FIOASYNC, nullptr), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(IoctlTest, FIOASYNCSucceeds) { + // Not all FDs support FIOASYNC. + const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int before = -1; + ASSERT_THAT(before = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + + int set = 1; + EXPECT_THAT(ioctl(s.get(), FIOASYNC, &set), SyscallSucceeds()); + + int after_set = -1; + ASSERT_THAT(after_set = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + EXPECT_EQ(after_set, before | O_ASYNC) << "before was " << before; + + set = 0; + EXPECT_THAT(ioctl(s.get(), FIOASYNC, &set), SyscallSucceeds()); + + ASSERT_THAT(fcntl(s.get(), F_GETFL), SyscallSucceedsWithValue(before)); +} + +/* Count of the number of SIGIOs handled. */ +static volatile int io_received = 0; + +void inc_io_handler(int sig, siginfo_t* siginfo, void* arg) { io_received++; } + +TEST_F(IoctlTest, FIOASYNCNoTarget) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + // Count SIGIOs received. + io_received = 0; + struct sigaction sa; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + EXPECT_EQ(io_received, 0); +} + +TEST_F(IoctlTest, FIOASYNCSelfTarget) { + // FIXME: gVisor erroneously sends SIGIO on close(2), which would + // kill the test when pair goes out of scope. Temporarily ignore SIGIO so that + // that the close signal is ignored. + struct sigaction sa; + sa.sa_handler = SIG_IGN; + auto early_sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + // Count SIGIOs received. + io_received = 0; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + + pid_t pid = getpid(); + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + EXPECT_EQ(io_received, 1); +} + +// Equivalent to FIOASYNCSelfTarget except that FIOSETOWN is called before +// FIOASYNC. +TEST_F(IoctlTest, FIOASYNCSelfTarget2) { + // FIXME: gVisor erroneously sends SIGIO on close(2), which would + // kill the test when pair goes out of scope. Temporarily ignore SIGIO so that + // that the close signal is ignored. + struct sigaction sa; + sa.sa_handler = SIG_IGN; + auto early_sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + // Count SIGIOs received. + io_received = 0; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + pid_t pid = getpid(); + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + EXPECT_EQ(io_received, 1); +} + +TEST_F(IoctlTest, FIOASYNCInvalidPID) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + int set = 1; + ASSERT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + pid_t pid = INT_MAX; + // This succeeds (with behavior equivalent to a pid of 0) in Linux prior to + // f73127356f34 "fs/fcntl: return -ESRCH in f_setown when pid/pgid can't be + // found", and fails with EPERM after that commit. + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), + AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ESRCH))); +} + +TEST_F(IoctlTest, FIOASYNCUnsetTarget) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + // Count SIGIOs received. + io_received = 0; + struct sigaction sa; + sa.sa_sigaction = inc_io_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa)); + + // Actually allow SIGIO delivery. + auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); + + int set = 1; + EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds()); + + pid_t pid = getpid(); + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + // Passing a PID of 0 unsets the target. + pid = 0; + EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + EXPECT_EQ(io_received, 0); +} + +using IoctlTestSIOCGIFCONF = SimpleSocketTest; + +TEST_P(IoctlTestSIOCGIFCONF, ValidateNoArrayGetsLength) { + auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Validate that no array can be used to get the length required. + struct ifconf ifconf = {}; + ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds()); + ASSERT_GT(ifconf.ifc_len, 0); +} + +// This test validates that we will only return a partial array list and not +// partial ifrreq structs. +TEST_P(IoctlTestSIOCGIFCONF, ValidateNoPartialIfrsReturned) { + auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + struct ifreq ifr = {}; + struct ifconf ifconf = {}; + ifconf.ifc_len = sizeof(ifr) - 1; // One byte too few. + ifconf.ifc_ifcu.ifcu_req = 𝔦 + + ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds()); + ASSERT_EQ(ifconf.ifc_len, 0); + ASSERT_EQ(ifr.ifr_name[0], '\0'); // Nothing is returned. + + ifconf.ifc_len = sizeof(ifreq); + ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds()); + ASSERT_GT(ifconf.ifc_len, 0); + ASSERT_NE(ifr.ifr_name[0], '\0'); // An interface can now be returned. +} + +TEST_P(IoctlTestSIOCGIFCONF, ValidateLoopbackIsPresent) { + auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + struct ifconf ifconf = {}; + struct ifreq ifr[10] = {}; // Storage for up to 10 interfaces. + + ifconf.ifc_req = ifr; + ifconf.ifc_len = sizeof(ifr); + + ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds()); + size_t num_if = ifconf.ifc_len / sizeof(struct ifreq); + + // We should have at least one interface. + ASSERT_GE(num_if, 1); + + // One of the interfaces should be a loopback. + bool found_loopback = false; + for (size_t i = 0; i < num_if; ++i) { + if (strcmp(ifr[i].ifr_name, "lo") == 0) { + // SIOCGIFCONF returns the ipv4 address of the interface, let's check it. + ASSERT_EQ(ifr[i].ifr_addr.sa_family, AF_INET); + + // Validate the address is correct for loopback. + sockaddr_in* sin = reinterpret_cast<sockaddr_in*>(&ifr[i].ifr_addr); + ASSERT_EQ(htonl(sin->sin_addr.s_addr), INADDR_LOOPBACK); + + found_loopback = true; + break; + } + } + ASSERT_TRUE(found_loopback); +} + +std::vector<SocketKind> IoctlSocketTypes() { + return {SimpleSocket(AF_UNIX, SOCK_STREAM, 0), + SimpleSocket(AF_UNIX, SOCK_DGRAM, 0), + SimpleSocket(AF_INET, SOCK_STREAM, 0), + SimpleSocket(AF_INET6, SOCK_STREAM, 0), + SimpleSocket(AF_INET, SOCK_DGRAM, 0), + SimpleSocket(AF_INET6, SOCK_DGRAM, 0)}; +} + +INSTANTIATE_TEST_CASE_P(IoctlTest, IoctlTestSIOCGIFCONF, + ::testing::ValuesIn(IoctlSocketTypes())); + +} // namespace + +TEST_F(IoctlTest, FIOGETOWNSucceeds) { + const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int get = -1; + ASSERT_THAT(ioctl(s.get(), FIOGETOWN, &get), SyscallSucceeds()); + EXPECT_EQ(get, 0); +} + +TEST_F(IoctlTest, SIOCGPGRPSucceeds) { + const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + int get = -1; + ASSERT_THAT(ioctl(s.get(), SIOCGPGRP, &get), SyscallSucceeds()); + EXPECT_EQ(get, 0); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc new file mode 100644 index 000000000..1659d3d83 --- /dev/null +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -0,0 +1,78 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/ip_socket_test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +std::string DescribeSocketType(int type) { + return absl::StrCat(((type & SOCK_NONBLOCK) != 0) ? "non-blocking " : "", + ((type & SOCK_CLOEXEC) != 0) ? "close-on-exec " : ""); +} + +} // namespace + +SocketPairKind IPv6TCPAcceptBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "IPv6 TCP socket"); + return SocketPairKind{ + description, TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, + 0, /* dual_stack = */ false)}; +} + +SocketPairKind IPv4TCPAcceptBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "IPv4 TCP socket"); + return SocketPairKind{ + description, TCPAcceptBindSocketPairCreator(AF_INET, type | SOCK_STREAM, + 0, /* dual_stack = */ false)}; +} + +SocketPairKind DualStackTCPAcceptBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "dual stack TCP socket"); + return SocketPairKind{ + description, TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, + 0, /* dual_stack = */ true)}; +} + +SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "IPv6 UDP socket"); + return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator( + AF_INET6, type | SOCK_DGRAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket"); + return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator( + AF_INET, type | SOCK_DGRAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "dual stack UDP socket"); + return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator( + AF_INET6, type | SOCK_DGRAM, 0, + /* dual_stack = */ true)}; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h new file mode 100644 index 000000000..1e1400ecd --- /dev/null +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -0,0 +1,57 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_ + +#include <string> +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// IPv6TCPAcceptBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with AF_INET6 and the +// given type bound to the IPv6 loopback. +SocketPairKind IPv6TCPAcceptBindSocketPair(int type); + +// IPv4TCPAcceptBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with AF_INET and the +// given type bound to the IPv4 loopback. +SocketPairKind IPv4TCPAcceptBindSocketPair(int type); + +// DualStackTCPAcceptBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with AF_INET6 and the +// given type bound to the IPv4 loopback. +SocketPairKind DualStackTCPAcceptBindSocketPair(int type); + +// IPv6UDPBidirectionalBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and connect() syscalls with AF_INET6 and the +// given type bound to the IPv6 loopback. +SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type); + +// IPv4UDPBidirectionalBindSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and connect() syscalls with AF_INET and the +// given type bound to the IPv4 loopback. +SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type); + +// DualStackUDPBidirectionalBindSocketPair returns a SocketPairKind that +// represents SocketPairs created with bind() and connect() syscalls with +// AF_INET6 and the given type bound to the IPv4 loopback. +SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_ diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc new file mode 100644 index 000000000..ee5871cbe --- /dev/null +++ b/test/syscalls/linux/itimer.cc @@ -0,0 +1,342 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> + +#include <atomic> +#include <functional> +#include <iostream> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr char kSIGALRMToMainThread[] = "--itimer_sigarlm_to_main_thread"; +constexpr char kSIGPROFFairnessActive[] = "--itimer_sigprof_fairness_active"; +constexpr char kSIGPROFFairnessIdle[] = "--itimer_sigprof_fairness_idle"; + +// Time period to be set for the itimers. +constexpr absl::Duration kPeriod = absl::Milliseconds(25); +// Total amount of time to spend per thread. +constexpr absl::Duration kTestDuration = absl::Seconds(20); +// Amount of spin iterations to perform as the minimum work item per thread. +// Chosen to be sub-millisecond range. +constexpr int kIterations = 10000000; +// Allow deviation in the number of samples. +constexpr double kNumSamplesDeviationRatio = 0.2; +constexpr double kNumSamplesMinRatio = 0.5; + +TEST(ItimerTest, ItimervalUpdatedBeforeExpiration) { + constexpr int kSleepSecs = 10; + constexpr int kAlarmSecs = 15; + static_assert( + kSleepSecs < kAlarmSecs, + "kSleepSecs must be less than kAlarmSecs for the test to be meaningful"); + constexpr int kMaxRemainingSecs = kAlarmSecs - kSleepSecs; + + // Install a no-op handler for SIGALRM. + struct sigaction sa = {}; + sigfillset(&sa.sa_mask); + sa.sa_handler = +[](int signo) {}; + auto const cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + // Set an itimer-based alarm for kAlarmSecs from now. + struct itimerval itv = {}; + itv.it_value.tv_sec = kAlarmSecs; + auto const cleanup_itimer = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv)); + + // After sleeping for kSleepSecs, the itimer value should reflect the elapsed + // time even if it hasn't expired. + absl::SleepFor(absl::Seconds(kSleepSecs)); + ASSERT_THAT(getitimer(ITIMER_REAL, &itv), SyscallSucceeds()); + EXPECT_TRUE( + itv.it_value.tv_sec < kMaxRemainingSecs || + (itv.it_value.tv_sec == kMaxRemainingSecs && itv.it_value.tv_usec == 0)) + << "Remaining time: " << itv.it_value.tv_sec << " seconds + " + << itv.it_value.tv_usec << " microseconds"; +} + +ABSL_CONST_INIT static thread_local std::atomic_int signal_test_num_samples = + ATOMIC_VAR_INIT(0); + +void SignalTestSignalHandler(int /*signum*/) { signal_test_num_samples++; } + +struct SignalTestResult { + int expected_total; + int main_thread_samples; + std::vector<int> worker_samples; +}; + +std::ostream& operator<<(std::ostream& os, const SignalTestResult& r) { + os << "{expected_total: " << r.expected_total + << ", main_thread_samples: " << r.main_thread_samples + << ", worker_samples: ["; + bool first = true; + for (int sample : r.worker_samples) { + if (!first) { + os << ", "; + } + os << sample; + first = false; + } + os << "]}"; + return os; +} + +// Starts two worker threads and itimer id and measures the number of signal +// delivered to each thread. +SignalTestResult ItimerSignalTest(int id, clock_t main_clock, + clock_t worker_clock, int signal, + absl::Duration sleep) { + signal_test_num_samples = 0; + + struct sigaction sa = {}; + sa.sa_handler = &SignalTestSignalHandler; + sa.sa_flags = SA_RESTART; + sigemptyset(&sa.sa_mask); + auto sigaction_cleanup = std::move(ScopedSigaction(signal, sa).ValueOrDie()); + + int socketfds[2]; + TEST_PCHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, socketfds) == 0); + + // Do the spinning in the workers. + std::function<void*(int)> work = [&](int socket_fd) { + FileDescriptor fd(socket_fd); + + absl::Time finish = Now(worker_clock) + kTestDuration; + while (Now(worker_clock) < finish) { + // Blocked on read. + char c; + RetryEINTR(read)(fd.get(), &c, 1); + for (int i = 0; i < kIterations; i++) { + // Ensure compiler won't optimize this loop away. + asm(""); + } + + if (sleep != absl::ZeroDuration()) { + // Sleep so that the entire process is idle for a while. + absl::SleepFor(sleep); + } + + // Unblock the other thread. + RetryEINTR(write)(fd.get(), &c, 1); + } + + return reinterpret_cast<void*>(signal_test_num_samples.load()); + }; + + ScopedThread th1( + static_cast<std::function<void*()>>(std::bind(work, socketfds[0]))); + ScopedThread th2( + static_cast<std::function<void*()>>(std::bind(work, socketfds[1]))); + + absl::Time start = Now(main_clock); + // Start the timer. + struct itimerval timer = {}; + timer.it_value = absl::ToTimeval(kPeriod); + timer.it_interval = absl::ToTimeval(kPeriod); + auto cleanup_itimer = std::move(ScopedItimer(id, timer).ValueOrDie()); + + // Unblock th1. + // + // N.B. th2 owns socketfds[1] but can't close it until it unblocks. + char c = 0; + TEST_CHECK(write(socketfds[1], &c, 1) == 1); + + SignalTestResult result; + + // Wait for the workers to be done and collect their sample counts. + result.worker_samples.push_back(reinterpret_cast<int64_t>(th1.Join())); + result.worker_samples.push_back(reinterpret_cast<int64_t>(th2.Join())); + cleanup_itimer.Release()(); + result.expected_total = (Now(main_clock) - start) / kPeriod; + result.main_thread_samples = signal_test_num_samples.load(); + + return result; +} + +int TestSIGALRMToMainThread() { + SignalTestResult result = + ItimerSignalTest(ITIMER_REAL, CLOCK_REALTIME, CLOCK_REALTIME, SIGALRM, + absl::ZeroDuration()); + + std::cerr << "result: " << result << std::endl; + + // ITIMER_REAL-generated SIGALRMs prefer to deliver to the thread group leader + // (but don't guarantee it), so we expect to see most samples on the main + // thread. + // + // Linux only guarantees timers will never expire before the requested time. + // Thus, we only check the upper bound and also it at least have one sample. + TEST_CHECK(result.main_thread_samples <= result.expected_total); + TEST_CHECK(result.main_thread_samples > 0); + for (int num : result.worker_samples) { + TEST_CHECK_MSG(num <= 50, "worker received too many samples"); + } + + return 0; +} + +// Random save/restore is disabled as it introduces additional latency and +// unpredictable distribution patterns. +TEST(ItimerTest, DeliversSIGALRMToMainThread_NoRandomSave) { + pid_t child; + int execve_errno; + auto kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGALRMToMainThread}, + {}, &child, &execve_errno)); + EXPECT_EQ(0, execve_errno); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + + // Not required anymore. + kill.Release(); + + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status; +} + +// Signals are delivered to threads fairly. +// +// sleep indicates how long to sleep worker threads each iteration to make the +// entire process idle. +int TestSIGPROFFairness(absl::Duration sleep) { + SignalTestResult result = + ItimerSignalTest(ITIMER_PROF, CLOCK_PROCESS_CPUTIME_ID, + CLOCK_THREAD_CPUTIME_ID, SIGPROF, sleep); + + std::cerr << "result: " << result << std::endl; + + // The number of samples on the main thread should be very low as it did + // nothing. + TEST_CHECK(result.main_thread_samples < 60); + + // Both workers should get roughly equal number of samples. + TEST_CHECK(result.worker_samples.size() == 2); + + TEST_CHECK(result.expected_total > 0); + + // In an ideal world each thread would get exactly 50% of the signals, + // but since that's unlikely to happen we allow for them to get no less than + // kNumSamplesDeviationRatio of the total observed samples. + TEST_CHECK_MSG(std::abs(result.worker_samples[0] - result.worker_samples[1]) < + ((result.worker_samples[0] + result.worker_samples[1]) * + kNumSamplesDeviationRatio), + "one worker received disproportionate share of samples"); + + return 0; +} + +// Random save/restore is disabled as it introduces additional latency and +// unpredictable distribution patterns. +TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyActive_NoRandomSave) { + pid_t child; + int execve_errno; + auto kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGPROFFairnessActive}, + {}, &child, &execve_errno)); + EXPECT_EQ(0, execve_errno); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + + // Not required anymore. + kill.Release(); + + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +// Random save/restore is disabled as it introduces additional latency and +// unpredictable distribution patterns. +TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyIdle_NoRandomSave) { + pid_t child; + int execve_errno; + auto kill = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGPROFFairnessIdle}, + {}, &child, &execve_errno)); + EXPECT_EQ(0, execve_errno); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + + // Not required anymore. + kill.Release(); + + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "Exited with code: " << status; +} + +} // namespace +} // namespace testing +} // namespace gvisor + +namespace { +void MaskSIGPIPE() { + // Always mask SIGPIPE as it's common and tests aren't expected to handle it. + // We don't take the TestInit() path so we must do this manually. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + TEST_CHECK(sigaction(SIGPIPE, &sa, nullptr) == 0); +} +} // namespace + +int main(int argc, char** argv) { + // These tests require no background threads, so check for them before + // TestInit. + for (int i = 0; i < argc; i++) { + absl::string_view arg(argv[i]); + + if (arg == gvisor::testing::kSIGALRMToMainThread) { + MaskSIGPIPE(); + return gvisor::testing::TestSIGALRMToMainThread(); + } + if (arg == gvisor::testing::kSIGPROFFairnessActive) { + MaskSIGPIPE(); + return gvisor::testing::TestSIGPROFFairness(absl::ZeroDuration()); + } + if (arg == gvisor::testing::kSIGPROFFairnessIdle) { + MaskSIGPIPE(); + return gvisor::testing::TestSIGPROFFairness(absl::Milliseconds(10)); + } + } + + gvisor::testing::TestInit(&argc, &argv); + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/kill.cc b/test/syscalls/linux/kill.cc new file mode 100644 index 000000000..18ba8fb16 --- /dev/null +++ b/test/syscalls/linux/kill.cc @@ -0,0 +1,380 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <cerrno> +#include <csignal> + +#include "gtest/gtest.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +DEFINE_int32(scratch_uid, 65534, "scratch UID"); +DEFINE_int32(scratch_gid, 65534, "scratch GID"); + +using ::testing::Ge; + +namespace gvisor { +namespace testing { + +namespace { + +TEST(KillTest, CanKillValidPid) { + // If pid is positive, then signal sig is sent to the process with the ID + // specified by pid. + EXPECT_THAT(kill(getpid(), 0), SyscallSucceeds()); + // If pid equals 0, then sig is sent to every process in the process group of + // the calling process. + EXPECT_THAT(kill(0, 0), SyscallSucceeds()); + + ScopedThread([] { EXPECT_THAT(kill(gettid(), 0), SyscallSucceeds()); }); +} + +void SigHandler(int sig, siginfo_t* info, void* context) { _exit(0); } + +// If pid equals -1, then sig is sent to every process for which the calling +// process has permission to send signals, except for process 1 (init). +TEST(KillTest, CanKillAllPIDs) { + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + FileDescriptor read_fd(pipe_fds[0]); + FileDescriptor write_fd(pipe_fds[1]); + + pid_t pid = fork(); + if (pid == 0) { + read_fd.reset(); + + struct sigaction sa; + sa.sa_sigaction = SigHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGWINCH, sa)); + + // Indicate to the parent that we're ready. + write_fd.reset(); + + // Wait until we get the signal from the parent. + while (true) { + pause(); + } + } + + EXPECT_THAT(pid, SyscallSucceeds()); + + write_fd.reset(); + + // Wait for the child to indicate that it's unmasked the signal by closing + // the write end. + char buf; + ASSERT_THAT(ReadFd(read_fd.get(), &buf, 1), SyscallSucceedsWithValue(0)); + + // Signal the child and wait for it to die with status 0, indicating that + // it got the expected signal. + EXPECT_THAT(kill(-1, SIGWINCH), SyscallSucceeds()); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(KillTest, CannotKillInvalidPID) { + // We need an unused pid to verify that kill fails when given one. + // + // There is no way to guarantee that a PID is unused, but the PID of a + // recently exited process likely won't be reused soon. + pid_t fake_pid = fork(); + if (fake_pid == 0) { + _exit(0); + } + + EXPECT_THAT(fake_pid, SyscallSucceeds()); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(fake_pid, &status, 0), + SyscallSucceedsWithValue(fake_pid)); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + EXPECT_THAT(kill(fake_pid, 0), SyscallFailsWithErrno(ESRCH)); +} + +TEST(KillTest, CannotUseInvalidSignal) { + EXPECT_THAT(kill(getpid(), 200), SyscallFailsWithErrno(EINVAL)); +} + +TEST(KillTest, CanKillRemoteProcess) { + pid_t pid = fork(); + if (pid == 0) { + while (true) { + pause(); + } + } + + EXPECT_THAT(pid, SyscallSucceeds()); + + EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds()); + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(SIGKILL, WTERMSIG(status)); +} + +TEST(KillTest, CanKillOwnProcess) { + EXPECT_THAT(kill(getpid(), 0), SyscallSucceeds()); +} + +// Verify that you can kill a process even using a tid from a thread other than +// the group leader. +TEST(KillTest, CannotKillTid) { + pid_t tid; + bool tid_available = false; + bool finished = false; + absl::Mutex mu; + ScopedThread t([&] { + mu.Lock(); + tid = gettid(); + tid_available = true; + mu.Await(absl::Condition(&finished)); + mu.Unlock(); + }); + mu.LockWhen(absl::Condition(&tid_available)); + EXPECT_THAT(kill(tid, 0), SyscallSucceeds()); + finished = true; + mu.Unlock(); +} + +TEST(KillTest, SetPgid) { + for (int i = 0; i < 10; i++) { + // The following in the normal pattern for creating a new process group. + // Both the parent and child process will call setpgid in order to avoid any + // race conditions. We do this ten times to catch races. + pid_t pid = fork(); + if (pid == 0) { + setpgid(0, 0); + while (true) { + pause(); + } + } + + EXPECT_THAT(pid, SyscallSucceeds()); + + // Set the child's group and exit. + ASSERT_THAT(setpgid(pid, pid), SyscallSucceeds()); + EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(-pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(SIGKILL, WTERMSIG(status)); + } +} + +TEST(KillTest, ProcessGroups) { + // Fork a new child. + // + // other_child is used as a placeholder process. We use this PID as our "does + // not exist" process group to ensure some amount of safety. (It is still + // possible to violate this assumption, but extremely unlikely.) + pid_t child = fork(); + if (child == 0) { + while (true) { + pause(); + } + } + EXPECT_THAT(child, SyscallSucceeds()); + + pid_t other_child = fork(); + if (other_child == 0) { + while (true) { + pause(); + } + } + + // Ensure the kill does not succeed without the new group. + EXPECT_THAT(kill(-child, SIGKILL), SyscallFailsWithErrno(ESRCH)); + + // Put the child in its own process group. + ASSERT_THAT(setpgid(child, child), SyscallSucceeds()); + + // This should be not allowed: you can only create a new group with the same + // id or join an existing one. The other_child group should not exist. + ASSERT_THAT(setpgid(child, other_child), SyscallFailsWithErrno(EPERM)); + + // Done with other_child; kill it. + EXPECT_THAT(kill(other_child, SIGKILL), SyscallSucceeds()); + int status; + EXPECT_THAT(RetryEINTR(waitpid)(other_child, &status, 0), SyscallSucceeds()); + + // Linux returns success for the no-op call. + ASSERT_THAT(setpgid(child, child), SyscallSucceeds()); + + // Kill the child's process group. + ASSERT_THAT(kill(-child, SIGKILL), SyscallSucceeds()); + + // Wait on the process group; ensure that the signal was as expected. + EXPECT_THAT(RetryEINTR(waitpid)(-child, &status, 0), + SyscallSucceedsWithValue(child)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(SIGKILL, WTERMSIG(status)); + + // Try to kill the process group again; ensure that the wait fails. + EXPECT_THAT(kill(-child, SIGKILL), SyscallFailsWithErrno(ESRCH)); + EXPECT_THAT(RetryEINTR(waitpid)(-child, &status, 0), + SyscallFailsWithErrno(ECHILD)); +} + +TEST(KillTest, ChildDropsPrivsCannotKill) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + int uid = FLAGS_scratch_uid; + int gid = FLAGS_scratch_gid; + + // Create the child that drops privileges and tries to kill the parent. + pid_t pid = fork(); + if (pid == 0) { + TEST_PCHECK(setresgid(gid, gid, gid) == 0); + MaybeSave(); + + TEST_PCHECK(setresuid(uid, uid, uid) == 0); + MaybeSave(); + + // setresuid should have dropped CAP_KILL. Make sure. + TEST_CHECK(!HaveCapability(CAP_KILL).ValueOrDie()); + + // Try to kill parent with every signal-sending syscall possible. + pid_t parent = getppid(); + + TEST_CHECK(kill(parent, SIGKILL) < 0); + TEST_PCHECK_MSG(errno == EPERM, "kill failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(tgkill(parent, parent, SIGKILL) < 0); + TEST_PCHECK_MSG(errno == EPERM, "tgkill failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(syscall(SYS_tkill, parent, SIGKILL) < 0); + TEST_PCHECK_MSG(errno == EPERM, "tkill failed with wrong errno"); + MaybeSave(); + + siginfo_t uinfo; + uinfo.si_code = -1; // SI_QUEUE (allowed). + + TEST_CHECK(syscall(SYS_rt_sigqueueinfo, parent, SIGKILL, &uinfo) < 0); + TEST_PCHECK_MSG(errno == EPERM, "rt_sigqueueinfo failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(syscall(SYS_rt_tgsigqueueinfo, parent, parent, SIGKILL, &uinfo) < + 0); + TEST_PCHECK_MSG(errno == EPERM, "rt_sigqueueinfo failed with wrong errno"); + MaybeSave(); + + _exit(0); + } + + EXPECT_THAT(pid, SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; +} + +TEST(KillTest, CanSIGCONTSameSession) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + pid_t stopped_child = fork(); + if (stopped_child == 0) { + raise(SIGSTOP); + _exit(0); + } + + EXPECT_THAT(stopped_child, SyscallSucceeds()); + + // Put the child in its own process group. The child and parent process + // groups also share a session. + ASSERT_THAT(setpgid(stopped_child, stopped_child), SyscallSucceeds()); + + // Make sure child stopped. + int status; + EXPECT_THAT(RetryEINTR(waitpid)(stopped_child, &status, WUNTRACED), + SyscallSucceedsWithValue(stopped_child)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << "status " << status; + + int uid = FLAGS_scratch_uid; + int gid = FLAGS_scratch_gid; + + // Drop privileges only in child process, or else this parent process won't be + // able to open some log files after the test ends. + pid_t other_child = fork(); + if (other_child == 0) { + // Drop privileges. + TEST_PCHECK(setresgid(gid, gid, gid) == 0); + MaybeSave(); + + TEST_PCHECK(setresuid(uid, uid, uid) == 0); + MaybeSave(); + + // setresuid should have dropped CAP_KILL. + TEST_CHECK(!HaveCapability(CAP_KILL).ValueOrDie()); + + // Child 2 and child should now not share a thread group and any UIDs. + // Child 2 should have no privileges. That means any signal other than + // SIGCONT should fail. + TEST_CHECK(kill(stopped_child, SIGKILL) < 0); + TEST_PCHECK_MSG(errno == EPERM, "kill failed with wrong errno"); + MaybeSave(); + + TEST_PCHECK(kill(stopped_child, SIGCONT) == 0); + MaybeSave(); + + _exit(0); + } + + EXPECT_THAT(stopped_child, SyscallSucceeds()); + + // Make sure child exited normally. + EXPECT_THAT(RetryEINTR(waitpid)(stopped_child, &status, 0), + SyscallSucceedsWithValue(stopped_child)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; + + // Make sure other_child exited normally. + EXPECT_THAT(RetryEINTR(waitpid)(other_child, &status, 0), + SyscallSucceedsWithValue(other_child)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/link.cc b/test/syscalls/linux/link.cc new file mode 100644 index 000000000..ed74437bc --- /dev/null +++ b/test/syscalls/linux/link.cc @@ -0,0 +1,291 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +DEFINE_int32(scratch_uid, 65534, "scratch UID"); + +namespace gvisor { +namespace testing { + +namespace { + +// IsSameFile returns true if both filenames have the same device and inode. +bool IsSameFile(const std::string& f1, const std::string& f2) { + // Use lstat rather than stat, so that symlinks are not followed. + struct stat stat1 = {}; + EXPECT_THAT(lstat(f1.c_str(), &stat1), SyscallSucceeds()); + struct stat stat2 = {}; + EXPECT_THAT(lstat(f2.c_str(), &stat2), SyscallSucceeds()); + + return stat1.st_dev == stat2.st_dev && stat1.st_ino == stat2.st_ino; +} + +TEST(LinkTest, CanCreateLinkFile) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string newname = NewTempAbsPath(); + + // Get the initial link count. + uint64_t initial_link_count = ASSERT_NO_ERRNO_AND_VALUE(Links(oldfile.path())); + + EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()), SyscallSucceeds()); + + EXPECT_TRUE(IsSameFile(oldfile.path(), newname)); + + // Link count should be incremented. + EXPECT_THAT(Links(oldfile.path()), + IsPosixErrorOkAndHolds(initial_link_count + 1)); + + // Delete the link. + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + + // Link count should be back to initial. + EXPECT_THAT(Links(oldfile.path()), + IsPosixErrorOkAndHolds(initial_link_count)); +} + +TEST(LinkTest, PermissionDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); + + // Make the file "unsafe" to link by making it only readable, but not + // writable. + const auto oldfile = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0400)); + const std::string newname = NewTempAbsPath(); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + // Also drops capabilities. + EXPECT_THAT(syscall(SYS_setuid, FLAGS_scratch_uid), SyscallSucceeds()); + + EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()), + SyscallFailsWithErrno(EPERM)); + }); +} + +TEST(LinkTest, CannotLinkDirectory) { + auto olddir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string newdir = NewTempAbsPath(); + + EXPECT_THAT(link(olddir.path().c_str(), newdir.c_str()), + SyscallFailsWithErrno(EPERM)); + + EXPECT_THAT(rmdir(olddir.path().c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, CannotLinkWithSlash) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + // Put a final "/" on newname. + const std::string newname = absl::StrCat(NewTempAbsPath(), "/"); + + EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(LinkTest, OldnameIsEmpty) { + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT)); +} + +TEST(LinkTest, OldnameDoesNotExist) { + const std::string oldname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT)); +} + +TEST(LinkTest, NewnameCannotExist) { + const std::string newname = + JoinPath(GetAbsoluteTestTmpdir(), "thisdoesnotexist", "foo"); + EXPECT_THAT(link("/thisdoesnotmatter", newname.c_str()), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(LinkTest, WithOldDirFD) { + const std::string oldname_parent = NewTempAbsPath(); + const std::string oldname_base = "child"; + const std::string oldname = JoinPath(oldname_parent, oldname_base); + const std::string newname = NewTempAbsPath(); + + // Create oldname_parent directory, and get an FD. + ASSERT_THAT(mkdir(oldname_parent.c_str(), 0777), SyscallSucceeds()); + const FileDescriptor oldname_parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(oldname_parent, O_DIRECTORY | O_RDONLY)); + + // Create oldname file. + const FileDescriptor oldname_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(oldname, O_CREAT | O_RDWR, 0666)); + + // Link oldname to newname, using oldname_parent_fd. + EXPECT_THAT(linkat(oldname_parent_fd.get(), oldname_base.c_str(), AT_FDCWD, + newname.c_str(), 0), + SyscallSucceeds()); + + EXPECT_TRUE(IsSameFile(oldname, newname)); + + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds()); + EXPECT_THAT(rmdir(oldname_parent.c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, BogusFlags) { + ASSERT_THAT(linkat(1, "foo", 2, "bar", 3), SyscallFailsWithErrno(EINVAL)); +} + +TEST(LinkTest, WithNewDirFD) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string newname_parent = NewTempAbsPath(); + const std::string newname_base = "child"; + const std::string newname = JoinPath(newname_parent, newname_base); + + // Create newname_parent directory, and get an FD. + EXPECT_THAT(mkdir(newname_parent.c_str(), 0777), SyscallSucceeds()); + const FileDescriptor newname_parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(newname_parent, O_DIRECTORY | O_RDONLY)); + + // Link newname to oldfile, using newname_parent_fd. + EXPECT_THAT(linkat(AT_FDCWD, oldfile.path().c_str(), newname_parent_fd.get(), + newname.c_str(), 0), + SyscallSucceeds()); + + EXPECT_TRUE(IsSameFile(oldfile.path(), newname)); + + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + EXPECT_THAT(rmdir(newname_parent.c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, RelPathsWithNonDirFDs) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Create a file that will be passed as the directory fd for old/new names. + const std::string filename = NewTempAbsPath(); + const FileDescriptor file_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0666)); + + // Using file_fd as olddirfd will fail. + EXPECT_THAT(linkat(file_fd.get(), "foo", AT_FDCWD, "bar", 0), + SyscallFailsWithErrno(ENOTDIR)); + + // Using file_fd as newdirfd will fail. + EXPECT_THAT(linkat(AT_FDCWD, oldfile.path().c_str(), file_fd.get(), "bar", 0), + SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(LinkTest, AbsPathsWithNonDirFDs) { + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string newname = NewTempAbsPath(); + + // Create a file that will be passed as the directory fd for old/new names. + const std::string filename = NewTempAbsPath(); + const FileDescriptor file_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0666)); + + // Using file_fd as the dirfds is OK as long as paths are absolute. + EXPECT_THAT(linkat(file_fd.get(), oldfile.path().c_str(), file_fd.get(), + newname.c_str(), 0), + SyscallSucceeds()); +} + +TEST(LinkTest, LinkDoesNotFollowSymlinks) { + // Create oldfile, and oldsymlink which points to it. + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string oldsymlink = NewTempAbsPath(); + EXPECT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()), + SyscallSucceeds()); + + // Now hard link newname to oldsymlink. + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(link(oldsymlink.c_str(), newname.c_str()), SyscallSucceeds()); + + // The link should not have resolved the symlink, so newname and oldsymlink + // are the same. + EXPECT_TRUE(IsSameFile(oldsymlink, newname)); + EXPECT_FALSE(IsSameFile(oldfile.path(), newname)); + + EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, LinkatDoesNotFollowSymlinkByDefault) { + // Create oldfile, and oldsymlink which points to it. + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string oldsymlink = NewTempAbsPath(); + EXPECT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()), + SyscallSucceeds()); + + // Now hard link newname to oldsymlink. + const std::string newname = NewTempAbsPath(); + EXPECT_THAT( + linkat(AT_FDCWD, oldsymlink.c_str(), AT_FDCWD, newname.c_str(), 0), + SyscallSucceeds()); + + // The link should not have resolved the symlink, so newname and oldsymlink + // are the same. + EXPECT_TRUE(IsSameFile(oldsymlink, newname)); + EXPECT_FALSE(IsSameFile(oldfile.path(), newname)); + + EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); +} + +TEST(LinkTest, LinkatWithSymlinkFollow) { + // Create oldfile, and oldsymlink which points to it. + auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string oldsymlink = NewTempAbsPath(); + ASSERT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()), + SyscallSucceeds()); + + // Now hard link newname to oldsymlink, and pass AT_SYMLINK_FOLLOW flag. + const std::string newname = NewTempAbsPath(); + ASSERT_THAT(linkat(AT_FDCWD, oldsymlink.c_str(), AT_FDCWD, newname.c_str(), + AT_SYMLINK_FOLLOW), + SyscallSucceeds()); + + // The link should have resolved the symlink, so oldfile and newname are the + // same. + EXPECT_TRUE(IsSameFile(oldfile.path(), newname)); + EXPECT_FALSE(IsSameFile(oldsymlink, newname)); + + EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/lseek.cc b/test/syscalls/linux/lseek.cc new file mode 100644 index 000000000..fb6a1546e --- /dev/null +++ b/test/syscalls/linux/lseek.cc @@ -0,0 +1,202 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(LseekTest, InvalidWhence) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + ASSERT_THAT(lseek(fd.get(), 0, -1), SyscallFailsWithErrno(EINVAL)); +} + +TEST(LseekTest, NegativeOffset) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + EXPECT_THAT(lseek(fd.get(), -(kFileData.length() + 1), SEEK_CUR), + SyscallFailsWithErrno(EINVAL)); +} + +// A 32-bit off_t is not large enough to represent an offset larger than +// maximum file size on standard file systems, so it isn't possible to cause +// overflow. +#ifdef __x86_64__ +TEST(LseekTest, Overflow) { + // HA! Classic Linux. We really should have an EOVERFLOW + // here, since we're seeking to something that cannot be + // represented.. but instead we are given an EINVAL. + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + EXPECT_THAT(lseek(fd.get(), 0x7fffffffffffffff, SEEK_END), + SyscallFailsWithErrno(EINVAL)); +} +#endif + +TEST(LseekTest, Set) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + char buf = '\0'; + EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[0]); + EXPECT_THAT(lseek(fd.get(), 6, SEEK_SET), SyscallSucceedsWithValue(6)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[6]); +} + +TEST(LseekTest, Cur) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + char buf = '\0'; + EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[0]); + EXPECT_THAT(lseek(fd.get(), 3, SEEK_CUR), SyscallSucceedsWithValue(4)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[4]); +} + +TEST(LseekTest, End) { + const std::string kFileData = "hello world\n"; + const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644)); + + char buf = '\0'; + EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[0]); + EXPECT_THAT(lseek(fd.get(), -2, SEEK_END), SyscallSucceedsWithValue(10)); + ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, kFileData.c_str()[kFileData.length() - 2]); +} + +TEST(LseekTest, InvalidFD) { + EXPECT_THAT(lseek(-1, 0, SEEK_SET), SyscallFailsWithErrno(EBADF)); +} + +TEST(LseekTest, DirCurEnd) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/tmp", O_RDONLY)); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); +} + +TEST(LseekTest, ProcDir) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self", O_RDONLY)); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(LseekTest, ProcFile) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/meminfo", O_RDONLY)); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL)); +} + +TEST(LseekTest, SysDir) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/sys/devices", O_RDONLY)); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(LseekTest, SeekCurrentDir) { + // From include/linux/fs.h. + constexpr loff_t MAX_LFS_FILESIZE = 0x7fffffffffffffff; + + char* dir = get_current_dir_name(); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir, O_RDONLY)); + + ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), + // Some filesystems (like ext4) allow lseek(SEEK_END) on a + // directory and return MAX_LFS_FILESIZE, others return EINVAL. + AnyOf(SyscallSucceedsWithValue(MAX_LFS_FILESIZE), + SyscallFailsWithErrno(EINVAL))); + free(dir); +} + +TEST(LseekTest, ProcStatTwice) { + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY)); + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY)); + + ASSERT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + ASSERT_THAT(lseek(fd1.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceeds()); + // Check that just because we moved fd1, fd2 doesn't move. + ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + const FileDescriptor fd3 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY)); + ASSERT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); +} + +TEST(LseekTest, EtcPasswdDup) { + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/etc/passwd", O_RDONLY)); + const FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(fd1.Dup()); + + ASSERT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + ASSERT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceeds()); + // Check that just because we moved fd1, fd2 doesn't move. + ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(1000)); + + const FileDescriptor fd3 = ASSERT_NO_ERRNO_AND_VALUE(fd1.Dup()); + ASSERT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(1000)); +} + +// TODO: Add tests where we have donated in sockets. + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/madvise.cc b/test/syscalls/linux/madvise.cc new file mode 100644 index 000000000..a79c8c75d --- /dev/null +++ b/test/syscalls/linux/madvise.cc @@ -0,0 +1,142 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void ExpectAllMappingBytes(Mapping const& m, char c) { + auto const v = m.view(); + for (size_t i = 0; i < kPageSize; i++) { + ASSERT_EQ(v[i], c) << "at offset " << i; + } +} + +// Equivalent to ExpectAllMappingBytes but async-signal-safe and with less +// helpful failure messages. +void CheckAllMappingBytes(Mapping const& m, char c) { + auto const v = m.view(); + for (size_t i = 0; i < kPageSize; i++) { + TEST_CHECK_MSG(v[i] == c, "mapping contains wrong value"); + } +} + +TEST(MadviseDontneedTest, ZerosPrivateAnonPage) { + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + ExpectAllMappingBytes(m, 0); + memset(m.ptr(), 1, m.len()); + ExpectAllMappingBytes(m, 1); + ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); + ExpectAllMappingBytes(m, 0); +} + +TEST(MadviseDontneedTest, ZerosCOWAnonPageInCallerOnly) { + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + ExpectAllMappingBytes(m, 0); + memset(m.ptr(), 2, m.len()); + ExpectAllMappingBytes(m, 2); + + // Do madvise in a child process. + pid_t pid = fork(); + CheckAllMappingBytes(m, 2); + if (pid == 0) { + TEST_PCHECK(madvise(m.ptr(), m.len(), MADV_DONTNEED) == 0); + CheckAllMappingBytes(m, 0); + _exit(0); + } + + ASSERT_THAT(pid, SyscallSucceeds()); + + int status = 0; + ASSERT_THAT(waitpid(-1, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(WEXITSTATUS(status), 0); + // The child's madvise should not have affected the parent. + ExpectAllMappingBytes(m, 2); +} + +TEST(MadviseDontneedTest, DoesNotModifySharedAnonPage) { + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED)); + ExpectAllMappingBytes(m, 0); + memset(m.ptr(), 3, m.len()); + ExpectAllMappingBytes(m, 3); + ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); + ExpectAllMappingBytes(m, 3); +} + +TEST(MadviseDontneedTest, CleansPrivateFilePage) { + TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + /* parent = */ GetAbsoluteTestTmpdir(), + /* content = */ std::string(kPageSize, 4), TempPath::kDefaultFileMode)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + + Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0)); + ExpectAllMappingBytes(m, 4); + memset(m.ptr(), 5, m.len()); + ExpectAllMappingBytes(m, 5); + ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); + ExpectAllMappingBytes(m, 4); +} + +TEST(MadviseDontneedTest, DoesNotModifySharedFilePage) { + TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + /* parent = */ GetAbsoluteTestTmpdir(), + /* content = */ std::string(kPageSize, 6), TempPath::kDefaultFileMode)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + + Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0)); + ExpectAllMappingBytes(m, 6); + memset(m.ptr(), 7, m.len()); + ExpectAllMappingBytes(m, 7); + ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); + ExpectAllMappingBytes(m, 7); +} + +TEST(MadviseDontneedTest, IgnoresPermissions) { + auto m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE)); + EXPECT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/memory_accounting.cc b/test/syscalls/linux/memory_accounting.cc new file mode 100644 index 000000000..b4b680c34 --- /dev/null +++ b/test/syscalls/linux/memory_accounting.cc @@ -0,0 +1,99 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mman.h> +#include <map> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_split.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using ::absl::StrFormat; + +// AnonUsageFromMeminfo scrapes the current anonymous memory usage from +// /proc/meminfo and returns it in bytes. +PosixErrorOr<uint64_t> AnonUsageFromMeminfo() { + ASSIGN_OR_RETURN_ERRNO(auto meminfo, GetContents("/proc/meminfo")); + std::vector<std::string> lines(absl::StrSplit(meminfo, '\n')); + + // Try to find AnonPages line, the format is AnonPages:\\s+(\\d+) kB\n. + for (const auto& line : lines) { + if (!absl::StartsWith(line, "AnonPages:")) { + continue; + } + + std::vector<std::string> parts( + absl::StrSplit(line, ' ', absl::SkipEmpty())); + if (parts.size() == 3) { + // The size is the second field, let's try to parse it as a number. + ASSIGN_OR_RETURN_ERRNO(auto anon_kb, Atoi<uint64_t>(parts[1])); + return anon_kb * 1024; + } + + return PosixError(EINVAL, "AnonPages field in /proc/meminfo was malformed"); + } + + return PosixError(EINVAL, "AnonPages field not found in /proc/meminfo"); +} + +TEST(MemoryAccounting, AnonAccountingPreservedOnSaveRestore) { + // This test isn't meaningful on Linux. /proc/meminfo reports system-wide + // memory usage, which can change arbitrarily in Linux from other activity on + // the machine. In gvisor, this test is the only thing running on the + // "machine", so values in /proc/meminfo accurately reflect the memory used by + // the test. + SKIP_IF(!IsRunningOnGvisor()); + + uint64_t anon_initial = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); + + // Cause some anonymous memory usage. + uint64_t map_bytes = Megabytes(512); + char* mem = + static_cast<char*>(mmap(nullptr, map_bytes, PROT_READ | PROT_WRITE, + MAP_POPULATE | MAP_ANON | MAP_PRIVATE, -1, 0)); + ASSERT_NE(mem, MAP_FAILED) + << "Map failed, errno: " << errno << " (" << strerror(errno) << ")."; + + // Write something to each page to prevent them from being decommited on + // S/R. Zero pages are dropped on save. + for (uint64_t i = 0; i < map_bytes; i += kPageSize) { + mem[i] = 'a'; + } + + uint64_t anon_after_alloc = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); + EXPECT_THAT(anon_after_alloc, + EquivalentWithin(anon_initial + map_bytes, 0.03)); + + // We have many implicit S/R cycles from scraping /proc/meminfo throughout the + // test, but throw an explicit S/R in here as well. + MaybeSave(); + + // Usage should remain the same across S/R. + uint64_t anon_after_sr = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); + EXPECT_THAT(anon_after_sr, EquivalentWithin(anon_after_alloc, 0.03)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mempolicy.cc b/test/syscalls/linux/mempolicy.cc new file mode 100644 index 000000000..9f8033bdf --- /dev/null +++ b/test/syscalls/linux/mempolicy.cc @@ -0,0 +1,258 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/syscall.h> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "test/util/cleanup.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#define BITS_PER_BYTE 8 + +#define MPOL_F_STATIC_NODES (1 << 15) +#define MPOL_F_RELATIVE_NODES (1 << 14) +#define MPOL_DEFAULT 0 +#define MPOL_PREFERRED 1 +#define MPOL_BIND 2 +#define MPOL_INTERLEAVE 3 +#define MPOL_MAX MPOL_INTERLEAVE +#define MPOL_F_NODE (1 << 0) +#define MPOL_F_ADDR (1 << 1) +#define MPOL_F_MEMS_ALLOWED (1 << 2) +#define MPOL_MF_STRICT (1 << 0) +#define MPOL_MF_MOVE (1 << 1) +#define MPOL_MF_MOVE_ALL (1 << 2) + +int get_mempolicy(int *policy, uint64_t *nmask, uint64_t maxnode, void *addr, + int flags) { + return syscall(__NR_get_mempolicy, policy, nmask, maxnode, addr, flags); +} + +int set_mempolicy(int mode, uint64_t *nmask, uint64_t maxnode) { + return syscall(__NR_set_mempolicy, mode, nmask, maxnode); +} + +// Creates a cleanup object that resets the calling thread's mempolicy to the +// system default when the calling scope ends. +Cleanup ScopedMempolicy() { + return Cleanup([] { + EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, nullptr, 0), SyscallSucceeds()); + }); +} + +// Temporarily change the memory policy for the calling thread within the +// caller's scope. +PosixErrorOr<Cleanup> ScopedSetMempolicy(int mode, uint64_t *nmask, + uint64_t maxnode) { + if (set_mempolicy(mode, nmask, maxnode)) { + return PosixError(errno, "set_mempolicy"); + } + return ScopedMempolicy(); +} + +TEST(MempolicyTest, CheckDefaultPolicy) { + int mode = 0; + uint64_t nodemask = 0; + ASSERT_THAT(get_mempolicy(&mode, &nodemask, sizeof(nodemask) * BITS_PER_BYTE, + nullptr, 0), + SyscallSucceeds()); + + EXPECT_EQ(MPOL_DEFAULT, mode); + EXPECT_EQ(0x0, nodemask); +} + +TEST(MempolicyTest, PolicyPreservedAfterSetMempolicy) { + uint64_t nodemask = 0x1; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy( + MPOL_BIND, &nodemask, sizeof(nodemask) * BITS_PER_BYTE)); + + int mode = 0; + uint64_t nodemask_after = 0x0; + ASSERT_THAT(get_mempolicy(&mode, &nodemask_after, + sizeof(nodemask_after) * BITS_PER_BYTE, nullptr, 0), + SyscallSucceeds()); + EXPECT_EQ(MPOL_BIND, mode); + EXPECT_EQ(0x1, nodemask_after); + + // Try throw in some mode flags. + for (auto mode_flag : {MPOL_F_STATIC_NODES, MPOL_F_RELATIVE_NODES}) { + auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE( + ScopedSetMempolicy(MPOL_INTERLEAVE | mode_flag, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE)); + mode = 0; + nodemask_after = 0x0; + ASSERT_THAT( + get_mempolicy(&mode, &nodemask_after, + sizeof(nodemask_after) * BITS_PER_BYTE, nullptr, 0), + SyscallSucceeds()); + EXPECT_EQ(MPOL_INTERLEAVE | mode_flag, mode); + EXPECT_EQ(0x1, nodemask_after); + } +} + +TEST(MempolicyTest, SetMempolicyRejectsInvalidInputs) { + auto cleanup = ScopedMempolicy(); + uint64_t nodemask; + + if (IsRunningOnGvisor()) { + // Invalid nodemask, we only support a single node on gvisor. + nodemask = 0x4; + ASSERT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE), + SyscallFailsWithErrno(EINVAL)); + } + + nodemask = 0x1; + + // Invalid mode. + ASSERT_THAT(set_mempolicy(7439, &nodemask, sizeof(nodemask) * BITS_PER_BYTE), + SyscallFailsWithErrno(EINVAL)); + + // Invalid nodemask size. + ASSERT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, 0), + SyscallFailsWithErrno(EINVAL)); + + // Invalid mode flag. + ASSERT_THAT( + set_mempolicy(MPOL_DEFAULT | MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES, + &nodemask, sizeof(nodemask) * BITS_PER_BYTE), + SyscallFailsWithErrno(EINVAL)); + + // MPOL_INTERLEAVE with empty nodemask. + nodemask = 0x0; + ASSERT_THAT(set_mempolicy(MPOL_INTERLEAVE, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE), + SyscallFailsWithErrno(EINVAL)); +} + +// The manpages specify that the nodemask provided to set_mempolicy are +// considered empty if the nodemask pointer is null, or if the nodemask size is +// 0. We use a policy which accepts both empty and non-empty nodemasks +// (MPOL_PREFERRED), a policy which requires a non-empty nodemask (MPOL_BIND), +// and a policy which completely ignores the nodemask (MPOL_DEFAULT) to verify +// argument checking around nodemasks. +TEST(MempolicyTest, EmptyNodemaskOnSet) { + auto cleanup = ScopedMempolicy(); + + EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, nullptr, 1), SyscallSucceeds()); + EXPECT_THAT(set_mempolicy(MPOL_BIND, nullptr, 1), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(set_mempolicy(MPOL_PREFERRED, nullptr, 1), SyscallSucceeds()); + + uint64_t nodemask = 0x1; + EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, 0), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(set_mempolicy(MPOL_BIND, &nodemask, 0), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(set_mempolicy(MPOL_PREFERRED, &nodemask, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(MempolicyTest, QueryAvailableNodes) { + uint64_t nodemask = 0; + ASSERT_THAT( + get_mempolicy(nullptr, &nodemask, sizeof(nodemask) * BITS_PER_BYTE, + nullptr, MPOL_F_MEMS_ALLOWED), + SyscallSucceeds()); + // We can only be sure there is a single node if running on gvisor. + if (IsRunningOnGvisor()) { + EXPECT_EQ(0x1, nodemask); + } + + // MPOL_F_ADDR and MPOL_F_NODE flags may not be combined with + // MPOL_F_MEMS_ALLLOWED. + for (auto flags : + {MPOL_F_MEMS_ALLOWED | MPOL_F_ADDR, MPOL_F_MEMS_ALLOWED | MPOL_F_NODE, + MPOL_F_MEMS_ALLOWED | MPOL_F_ADDR | MPOL_F_NODE}) { + ASSERT_THAT(get_mempolicy(nullptr, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE, nullptr, flags), + SyscallFailsWithErrno(EINVAL)); + } +} + +TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) { + uint64_t dummy_stack_address; + auto dummy_heap_address = absl::make_unique<uint64_t>(); + int mode; + + for (auto ptr : {&dummy_stack_address, dummy_heap_address.get()}) { + mode = -1; + ASSERT_THAT( + get_mempolicy(&mode, nullptr, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE), + SyscallSucceeds()); + // If we're not running on gvisor, the address may be allocated on a + // different numa node. + if (IsRunningOnGvisor()) { + EXPECT_EQ(0, mode); + } + } + + void* invalid_address = reinterpret_cast<void*>(-1); + + // Invalid address. + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, invalid_address, + MPOL_F_ADDR | MPOL_F_NODE), + SyscallFailsWithErrno(EFAULT)); + + // Invalid mode pointer. + ASSERT_THAT(get_mempolicy(reinterpret_cast<int*>(invalid_address), nullptr, 0, + &dummy_stack_address, MPOL_F_ADDR | MPOL_F_NODE), + SyscallFailsWithErrno(EFAULT)); +} + +TEST(MempolicyTest, GetMempolicyCanOmitPointers) { + int mode; + uint64_t nodemask; + + // Omit nodemask pointer. + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, 0), SyscallSucceeds()); + // Omit mode pointer. + ASSERT_THAT(get_mempolicy(nullptr, &nodemask, + sizeof(nodemask) * BITS_PER_BYTE, nullptr, 0), + SyscallSucceeds()); + // Omit both pointers. + ASSERT_THAT(get_mempolicy(nullptr, nullptr, 0, nullptr, 0), + SyscallSucceeds()); +} + +TEST(MempolicyTest, GetMempolicyNextInterleaveNode) { + int mode; + // Policy for thread not yet set to MPOL_INTERLEAVE, can't query for + // the next node which will be used for allocation. + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, MPOL_F_NODE), + SyscallFailsWithErrno(EINVAL)); + + // Set default policy for thread to MPOL_INTERLEAVE. + uint64_t nodemask = 0x1; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy( + MPOL_INTERLEAVE, &nodemask, sizeof(nodemask) * BITS_PER_BYTE)); + + mode = -1; + ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, MPOL_F_NODE), + SyscallSucceeds()); + EXPECT_EQ(0, mode); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mincore.cc b/test/syscalls/linux/mincore.cc new file mode 100644 index 000000000..c572bf5ec --- /dev/null +++ b/test/syscalls/linux/mincore.cc @@ -0,0 +1,96 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <algorithm> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +size_t CountSetLSBs(std::vector<unsigned char> const& vec) { + return std::count_if(begin(vec), end(vec), + [](unsigned char c) { return (c & 1) != 0; }); +} + +TEST(MincoreTest, DirtyAnonPagesAreResident) { + constexpr size_t kTestPageCount = 10; + auto const kTestMappingBytes = kTestPageCount * kPageSize; + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + memset(m.ptr(), 0, m.len()); + + std::vector<unsigned char> vec(kTestPageCount, 0); + ASSERT_THAT(mincore(m.ptr(), kTestMappingBytes, vec.data()), + SyscallSucceeds()); + EXPECT_EQ(kTestPageCount, CountSetLSBs(vec)); +} + +TEST(MincoreTest, UnalignedAddressFails) { + // Map and touch two pages, then try to mincore the second half of the first + // page + the first half of the second page. Both pages are mapped, but + // mincore should return EINVAL due to the misaligned start address. + constexpr size_t kTestPageCount = 2; + auto const kTestMappingBytes = kTestPageCount * kPageSize; + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + memset(m.ptr(), 0, m.len()); + + std::vector<unsigned char> vec(kTestPageCount, 0); + EXPECT_THAT(mincore(reinterpret_cast<void*>(m.addr() + kPageSize / 2), + kPageSize, vec.data()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(MincoreTest, UnalignedLengthSucceedsAndIsRoundedUp) { + // Map and touch two pages, then try to mincore the first page + the first + // half of the second page. mincore should silently round up the length to + // include both pages. + constexpr size_t kTestPageCount = 2; + auto const kTestMappingBytes = kTestPageCount * kPageSize; + auto m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + memset(m.ptr(), 0, m.len()); + + std::vector<unsigned char> vec(kTestPageCount, 0); + ASSERT_THAT(mincore(m.ptr(), kPageSize + kPageSize / 2, vec.data()), + SyscallSucceeds()); + EXPECT_EQ(kTestPageCount, CountSetLSBs(vec)); +} + +TEST(MincoreTest, ZeroLengthSucceedsAndAllowsAnyVecBelowTaskSize) { + EXPECT_THAT(mincore(nullptr, 0, nullptr), SyscallSucceeds()); +} + +TEST(MincoreTest, InvalidLengthFails) { + EXPECT_THAT(mincore(nullptr, -1, nullptr), SyscallFailsWithErrno(ENOMEM)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mkdir.cc b/test/syscalls/linux/mkdir.cc new file mode 100644 index 000000000..84db45eb3 --- /dev/null +++ b/test/syscalls/linux/mkdir.cc @@ -0,0 +1,96 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/temp_umask.h" +#include "test/util/capability_util.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class MkdirTest : public ::testing::Test { + protected: + // SetUp creates various configurations of files. + void SetUp() override { dirname_ = NewTempAbsPath(); } + + // TearDown unlinks created files. + void TearDown() override { + // FIXME: We don't currently implement rmdir. + // We do this unconditionally because there's no harm in trying. + rmdir(dirname_.c_str()); + } + + std::string dirname_; +}; + +TEST_F(MkdirTest, DISABLED_CanCreateReadbleDir) { + ASSERT_THAT(mkdir(dirname_.c_str(), 0444), SyscallSucceeds()); + ASSERT_THAT( + open(JoinPath(dirname_, "anything").c_str(), O_RDWR | O_CREAT, 0666), + SyscallFailsWithErrno(EACCES)); +} + +TEST_F(MkdirTest, CanCreateWritableDir) { + ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds()); + std::string filename = JoinPath(dirname_, "anything"); + int fd; + ASSERT_THAT(fd = open(filename.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + ASSERT_THAT(unlink(filename.c_str()), SyscallSucceeds()); +} + +TEST_F(MkdirTest, HonorsUmask) { + constexpr mode_t kMask = 0111; + TempUmask mask(kMask); + ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds()); + struct stat statbuf; + ASSERT_THAT(stat(dirname_.c_str(), &statbuf), SyscallSucceeds()); + EXPECT_EQ(0777 & ~kMask, statbuf.st_mode & 0777); +} + +TEST_F(MkdirTest, HonorsUmask2) { + constexpr mode_t kMask = 0142; + TempUmask mask(kMask); + ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds()); + struct stat statbuf; + ASSERT_THAT(stat(dirname_.c_str(), &statbuf), SyscallSucceeds()); + EXPECT_EQ(0777 & ~kMask, statbuf.st_mode & 0777); +} + +TEST_F(MkdirTest, FailsOnDirWithoutWritePerms) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto parent = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555)); + auto dir = JoinPath(parent.path(), "foo"); + ASSERT_THAT(mkdir(dir.c_str(), 0777), SyscallFailsWithErrno(EACCES)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc new file mode 100644 index 000000000..361ca299b --- /dev/null +++ b/test/syscalls/linux/mknod.cc @@ -0,0 +1,173 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/un.h> +#include <unistd.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(MknodTest, RegularFile) { + std::string const node0 = NewTempAbsPathInDir("/tmp"); + std::string const node1 = NewTempAbsPathInDir("/tmp"); + ASSERT_THAT(mknod(node0.c_str(), S_IFREG, 0), SyscallSucceeds()); + ASSERT_THAT(mknod(node1.c_str(), 0, 0), SyscallSucceeds()); +} + +TEST(MknodTest, MknodAtRegularFile) { + std::string const fifo_relpath = NewTempRelPath(); + std::string const fifo = JoinPath("/tmp", fifo_relpath); + int dirfd; + ASSERT_THAT(dirfd = open("/tmp", O_RDONLY), SyscallSucceeds()); + ASSERT_THAT(mknodat(dirfd, fifo_relpath.c_str(), S_IFIFO | S_IRUSR, 0), + SyscallSucceeds()); + EXPECT_THAT(close(dirfd), SyscallSucceeds()); + + struct stat st; + ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISFIFO(st.st_mode)); +} + +TEST(MknodTest, MknodOnExistingPathFails) { + std::string const file = NewTempAbsPathInDir("/tmp"); + std::string const slink = NewTempAbsPathInDir("/tmp"); + int fd; + ASSERT_THAT(fd = open(file.c_str(), O_CREAT | O_RDWR, S_IRUSR | S_IWUSR), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + ASSERT_THAT(symlink(file.c_str(), slink.c_str()), SyscallSucceeds()); + + EXPECT_THAT(mknod(file.c_str(), S_IFREG, 0), SyscallFailsWithErrno(EEXIST)); + EXPECT_THAT(mknod(file.c_str(), S_IFIFO, 0), SyscallFailsWithErrno(EEXIST)); + EXPECT_THAT(mknod(slink.c_str(), S_IFREG, 0), SyscallFailsWithErrno(EEXIST)); + EXPECT_THAT(mknod(slink.c_str(), S_IFIFO, 0), SyscallFailsWithErrno(EEXIST)); +} + +TEST(MknodTest, UnimplementedTypesReturnError) { + if (IsRunningOnGvisor()) { + ASSERT_THAT(mknod("/tmp/a_socket", S_IFSOCK, 0), + SyscallFailsWithErrno(EOPNOTSUPP)); + } + // These will fail on linux as well since we don't have CAP_MKNOD. + ASSERT_THAT(mknod("/tmp/a_chardev", S_IFCHR, 0), + SyscallFailsWithErrno(EPERM)); + ASSERT_THAT(mknod("/tmp/a_blkdev", S_IFBLK, 0), SyscallFailsWithErrno(EPERM)); +} + +TEST(MknodTest, Fifo) { + std::string const fifo = NewTempAbsPathInDir("/tmp"); + ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0), + SyscallSucceeds()); + + struct stat st; + ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISFIFO(st.st_mode)); + + std::string msg = "some string"; + std::vector<char> buf(512); + + // Read-end of the pipe. + ScopedThread t([&fifo, &buf, &msg]() { + int fd; + ASSERT_THAT(fd = open(fifo.c_str(), O_RDONLY), SyscallSucceeds()); + EXPECT_THAT(read(fd, buf.data(), buf.size()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_EQ(msg, std::string(buf.data())); + EXPECT_THAT(close(fd), SyscallSucceeds()); + }); + + // Write-end of the pipe. + int wfd; + ASSERT_THAT(wfd = open(fifo.c_str(), O_WRONLY), SyscallSucceeds()); + EXPECT_THAT(write(wfd, msg.c_str(), msg.length()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_THAT(close(wfd), SyscallSucceeds()); +} + +TEST(MknodTest, FifoOtrunc) { + std::string const fifo = NewTempAbsPathInDir("/tmp"); + ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0), + SyscallSucceeds()); + + struct stat st = {}; + ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISFIFO(st.st_mode)); + + std::string msg = "some string"; + std::vector<char> buf(512); + // Read-end of the pipe. + ScopedThread t([&fifo, &buf, &msg]() { + int fd; + ASSERT_THAT(fd = open(fifo.c_str(), O_RDONLY), SyscallSucceeds()); + EXPECT_THAT(read(fd, buf.data(), buf.size()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_EQ(msg, std::string(buf.data())); + EXPECT_THAT(close(fd), SyscallSucceeds()); + }); + + int wfd; + ASSERT_THAT(wfd = open(fifo.c_str(), O_TRUNC | O_WRONLY), SyscallSucceeds()); + EXPECT_THAT(write(wfd, msg.c_str(), msg.length()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_THAT(close(wfd), SyscallSucceeds()); +} + +TEST(MknodTest, FifoTruncNoOp) { + std::string const fifo = NewTempAbsPathInDir("/tmp"); + ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0), + SyscallSucceeds()); + + EXPECT_THAT(truncate(fifo.c_str(), 0), SyscallFailsWithErrno(EINVAL)); + + struct stat st = {}; + ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISFIFO(st.st_mode)); + + std::string msg = "some string"; + std::vector<char> buf(512); + // Read-end of the pipe. + ScopedThread t([&fifo, &buf, &msg]() { + int rfd = 0; + ASSERT_THAT(rfd = open(fifo.c_str(), O_RDONLY), SyscallSucceeds()); + EXPECT_THAT(ReadFd(rfd, buf.data(), buf.size()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_EQ(msg, std::string(buf.data())); + EXPECT_THAT(close(rfd), SyscallSucceeds()); + }); + + int wfd = 0; + ASSERT_THAT(wfd = open(fifo.c_str(), O_TRUNC | O_WRONLY), SyscallSucceeds()); + EXPECT_THAT(ftruncate(wfd, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(WriteFd(wfd, msg.c_str(), msg.length()), + SyscallSucceedsWithValue(msg.length())); + EXPECT_THAT(ftruncate(wfd, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(close(wfd), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc new file mode 100644 index 000000000..afe060d33 --- /dev/null +++ b/test/syscalls/linux/mmap.cc @@ -0,0 +1,1714 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <linux/magic.h> +#include <linux/unistd.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/statfs.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/escaping.h" +#include "absl/strings/str_split.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::Gt; + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<int64_t> VirtualMemorySize() { + ASSIGN_OR_RETURN_ERRNO(auto contents, GetContents("/proc/self/statm")); + std::vector<std::string> parts = absl::StrSplit(contents, ' '); + if (parts.empty()) { + return PosixError(EINVAL, "Unable to parse /proc/self/statm"); + } + ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi<int64_t>(parts[0])); + return pages * getpagesize(); +} + +class MMapTest : public ::testing::Test { + protected: + // Unmap mapping, if one was made. + void TearDown() override { + if (addr_) { + EXPECT_THAT(Unmap(), SyscallSucceeds()); + } + } + + // Remembers mapping, so it can be automatically unmapped. + uintptr_t Map(uintptr_t addr, size_t length, int prot, int flags, int fd, + off_t offset) { + void* ret = + mmap(reinterpret_cast<void*>(addr), length, prot, flags, fd, offset); + + if (ret != MAP_FAILED) { + addr_ = ret; + length_ = length; + } + + return reinterpret_cast<uintptr_t>(ret); + } + + // Unmap previous mapping + int Unmap() { + if (!addr_) { + return -1; + } + + int ret = munmap(addr_, length_); + + addr_ = nullptr; + length_ = 0; + + return ret; + } + + // Msync the mapping. + int Msync() { return msync(addr_, length_, MS_SYNC); } + + // Mlock the mapping. + int Mlock() { return mlock(addr_, length_); } + + // Munlock the mapping. + int Munlock() { return munlock(addr_, length_); } + + int Protect(uintptr_t addr, size_t length, int prot) { + return mprotect(reinterpret_cast<void*>(addr), length, prot); + } + + void* addr_ = nullptr; + size_t length_ = 0; +}; + +// Matches if arg contains the same contents as std::string str. +MATCHER_P(EqualsMemory, str, "") { + if (0 == memcmp(arg, str.c_str(), str.size())) { + return true; + } + + *result_listener << "Memory did not match. Got:\n" + << absl::BytesToHexString( + std::string(static_cast<char*>(arg), str.size())) + << "Want:\n" + << absl::BytesToHexString(str); + return false; +} + +// We can't map pipes, but for different reasons. +TEST_F(MMapTest, MapPipe) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[0], 0), + SyscallFailsWithErrno(ENODEV)); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[1], 0), + SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +// It's very common to mmap /dev/zero because anonymous mappings aren't part +// of POSIX although they are widely supported. So a zero initialized memory +// region would actually come from a "file backed" /dev/zero mapping. +TEST_F(MMapTest, MapDevZeroShared) { + // This test will verify that we're able to map a page backed by /dev/zero + // as MAP_SHARED. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + // Test that we can create a RW SHARED mapping of /dev/zero. + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), + SyscallSucceeds()); +} + +TEST_F(MMapTest, MapDevZeroPrivate) { + // This test will verify that we're able to map a page backed by /dev/zero + // as MAP_PRIVATE. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + // Test that we can create a RW SHARED mapping of /dev/zero. + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0), + SyscallSucceeds()); +} + +TEST_F(MMapTest, MapDevZeroNoPersistence) { + // This test will verify that two independent mappings of /dev/zero do not + // appear to reference the same "backed file." + + const FileDescriptor dev_zero1 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + const FileDescriptor dev_zero2 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero1.get(), 0), + SyscallSucceeds()); + + // Create a second mapping via the second /dev/zero fd. + void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + dev_zero2.get(), 0); + ASSERT_THAT(reinterpret_cast<intptr_t>(psec_map), SyscallSucceeds()); + + // Always unmap. + auto cleanup_psec_map = Cleanup( + [&] { EXPECT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); }); + + // Verify that we have independently addressed pages. + ASSERT_NE(psec_map, addr_); + + std::string buf_zero(kPageSize, 0x00); + std::string buf_ones(kPageSize, 0xFF); + + // Verify the first is actually all zeros after mmap. + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + + // Let's fill in the first mapping with 0xFF. + memcpy(addr_, buf_ones.data(), kPageSize); + + // Verify that the memcpy actually stuck in the page. + EXPECT_THAT(addr_, EqualsMemory(buf_ones)); + + // Verify that it didn't affect the second page which should be all zeros. + EXPECT_THAT(psec_map, EqualsMemory(buf_zero)); +} + +TEST_F(MMapTest, MapDevZeroSharedMultiplePages) { + // This will test that we're able to map /dev/zero over multiple pages. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + // Test that we can create a RW SHARED mapping of /dev/zero. + ASSERT_THAT(Map(0, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE, + dev_zero.get(), 0), + SyscallSucceeds()); + + std::string buf_zero(kPageSize * 2, 0x00); + std::string buf_ones(kPageSize * 2, 0xFF); + + // Verify the two pages are actually all zeros after mmap. + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + + // Fill out the pages with all ones. + memcpy(addr_, buf_ones.data(), kPageSize * 2); + + // Verify that the memcpy actually stuck in the pages. + EXPECT_THAT(addr_, EqualsMemory(buf_ones)); +} + +TEST_F(MMapTest, MapDevZeroSharedFdNoPersistence) { + // This test will verify that two independent mappings of /dev/zero do not + // appear to reference the same "backed file" even when mapped from the + // same initial fd. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), + SyscallSucceeds()); + + // Create a second mapping via the same fd. + void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + dev_zero.get(), 0); + ASSERT_THAT(reinterpret_cast<int64_t>(psec_map), SyscallSucceeds()); + + // Always unmap. + auto cleanup_psec_map = Cleanup( + [&] { ASSERT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); }); + + // Verify that we have independently addressed pages. + ASSERT_NE(psec_map, addr_); + + std::string buf_zero(kPageSize, 0x00); + std::string buf_ones(kPageSize, 0xFF); + + // Verify the first is actually all zeros after mmap. + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + + // Let's fill in the first mapping with 0xFF. + memcpy(addr_, buf_ones.data(), kPageSize); + + // Verify that the memcpy actually stuck in the page. + EXPECT_THAT(addr_, EqualsMemory(buf_ones)); + + // Verify that it didn't affect the second page which should be all zeros. + EXPECT_THAT(psec_map, EqualsMemory(buf_zero)); +} + +TEST_F(MMapTest, MapDevZeroSegfaultAfterUnmap) { + SetupGvisorDeathTest(); + + // This test will verify that we're able to map a page backed by /dev/zero + // as MAP_SHARED and after it's unmapped any access results in a SIGSEGV. + // This test is redundant but given the special nature of /dev/zero mappings + // it doesn't hurt. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + const auto rest = [&] { + // Test that we can create a RW SHARED mapping of /dev/zero. + TEST_PCHECK(Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + dev_zero.get(), + 0) != reinterpret_cast<uintptr_t>(MAP_FAILED)); + + // Confirm that accesses after the unmap result in a SIGSEGV. + // + // N.B. We depend on this process being single-threaded to ensure there + // can't be another mmap to map addr before the dereference below. + void* addr_saved = addr_; // Unmap resets addr_. + TEST_PCHECK(Unmap() == 0); + *reinterpret_cast<volatile int*>(addr_saved) = 0xFF; + }; + + EXPECT_THAT(InForkedProcess(rest), + IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV))); +} + +TEST_F(MMapTest, MapDevZeroUnaligned) { + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + const size_t size = kPageSize + kPageSize / 2; + const std::string buf_zero(size, 0x00); + + ASSERT_THAT( + Map(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + ASSERT_THAT( + Map(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); +} + +// We can't map _some_ character devices. +TEST_F(MMapTest, MapCharDevice) { + const FileDescriptor cdevfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/random", 0, 0)); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, cdevfd.get(), 0), + SyscallFailsWithErrno(ENODEV)); +} + +// We can't map directories. +TEST_F(MMapTest, MapDirectory) { + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), 0, 0)); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, dirfd.get(), 0), + SyscallFailsWithErrno(ENODEV)); +} + +// We can map *something* +TEST_F(MMapTest, MapAnything) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceedsWithValue(Gt(0))); +} + +// Map length < PageSize allowed +TEST_F(MMapTest, SmallMap) { + EXPECT_THAT(Map(0, 128, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); +} + +// Hint address doesn't break anything. +// Note: there is no requirement we actually get the hint address +TEST_F(MMapTest, HintAddress) { + EXPECT_THAT( + Map(0x30000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); +} + +// MAP_FIXED gives us exactly the requested address +TEST_F(MMapTest, MapFixed) { + EXPECT_THAT(Map(0x30000000, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), + SyscallSucceedsWithValue(0x30000000)); +} + +// 64-bit addresses work too +#ifdef __x86_64__ +TEST_F(MMapTest, MapFixed64) { + EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), + SyscallSucceedsWithValue(0x300000000000)); +} +#endif + +// MAP_STACK allowed. +// There isn't a good way to verify it did anything. +TEST_F(MMapTest, MapStack) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0), + SyscallSucceeds()); +} + +// MAP_LOCKED allowed. +// There isn't a good way to verify it did anything. +TEST_F(MMapTest, MapLocked) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0), + SyscallSucceeds()); +} + +// MAP_PRIVATE or MAP_SHARED must be passed +TEST_F(MMapTest, NotPrivateOrShared) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Only one of MAP_PRIVATE or MAP_SHARED may be passed +TEST_F(MMapTest, PrivateAndShared) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_SHARED | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(MMapTest, FixedAlignment) { + // Addr must be page aligned (MAP_FIXED) + EXPECT_THAT(Map(0x30000001, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Non-MAP_FIXED address does not need to be page aligned +TEST_F(MMapTest, NonFixedAlignment) { + EXPECT_THAT( + Map(0x30000001, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); +} + +// Length = 0 results in EINVAL. +TEST_F(MMapTest, InvalidLength) { + EXPECT_THAT(Map(0, 0, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Bad fd not allowed. +TEST_F(MMapTest, BadFd) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE, 999, 0), + SyscallFailsWithErrno(EBADF)); +} + +// Mappings are writable. +TEST_F(MMapTest, ProtWrite) { + uint64_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + // This shouldn't cause a SIGSEGV. + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + // The written data should actually be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +// "Write-only" mappings are writable *and* readable. +TEST_F(MMapTest, ProtWriteOnly) { + uint64_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + // This shouldn't cause a SIGSEGV. + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + // The written data should actually be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +// "Write-only" mappings are readable. +// +// This is distinct from above to ensure the page is accessible even if the +// initial fault is a write fault. +TEST_F(MMapTest, ProtWriteOnlyReadable) { + uint64_t addr; + constexpr uint64_t kFirstWord = 0; + + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), &kFirstWord, + sizeof(kFirstWord))); +} + +// Mappings are writable after mprotect from PROT_NONE to PROT_READ|PROT_WRITE. +TEST_F(MMapTest, ProtectProtWrite) { + uint64_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE), + SyscallSucceeds()); + + // This shouldn't cause a SIGSEGV. + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + // The written data should actually be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +// SIGSEGV raised when reading PROT_NONE memory +TEST_F(MMapTest, ProtNoneDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + + ASSERT_THAT( + addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr), + ::testing::KilledBySignal(SIGSEGV), ""); +} + +// SIGSEGV raised when writing PROT_READ only memory +TEST_F(MMapTest, ReadOnlyDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + + ASSERT_THAT( + addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr) = 42, + ::testing::KilledBySignal(SIGSEGV), ""); +} + +// Writable mapping mprotect'd to read-only should not be writable. +TEST_F(MMapTest, MprotectReadOnlyDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + volatile int* val = reinterpret_cast<int*>(addr); + + // Copy to ensure page is mapped in. + *val = 42; + + ASSERT_THAT(Protect(addr, kPageSize, PROT_READ), SyscallSucceeds()); + + // Now it shouldn't be writable. + EXPECT_EXIT(*val = 0, ::testing::KilledBySignal(SIGSEGV), ""); +} + +// Verify that calling mprotect an address that's not page aligned fails. +TEST_F(MMapTest, MprotectNotPageAligned) { + uintptr_t addr; + + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + ASSERT_THAT(Protect(addr + 1, kPageSize - 1, PROT_READ), + SyscallFailsWithErrno(EINVAL)); +} + +// Verify that calling mprotect with an absurdly huge length fails. +TEST_F(MMapTest, MprotectHugeLength) { + uintptr_t addr; + + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + ASSERT_THAT(Protect(addr, static_cast<size_t>(-1), PROT_READ), + SyscallFailsWithErrno(ENOMEM)); +} + +#if defined(__x86_64__) || defined(__i386__) +// This code is equivalent in 32 and 64-bit mode +const uint8_t machine_code[] = { + 0xb8, 0x2a, 0x00, 0x00, 0x00, // movl $42, %eax + 0xc3, // retq +}; + +// PROT_EXEC allows code execution +TEST_F(MMapTest, ProtExec) { + uintptr_t addr; + uint32_t (*func)(void); + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code)); + + func = reinterpret_cast<uint32_t (*)(void)>(addr); + + EXPECT_EQ(42, func()); +} + +// No PROT_EXEC disallows code execution +TEST_F(MMapTest, NoProtExecDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + uint32_t (*func)(void); + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code)); + + func = reinterpret_cast<uint32_t (*)(void)>(addr); + + EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), ""); +} +#endif + +TEST_F(MMapTest, NoExceedLimitData) { + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 1; + + setlim.rlim_cur = RLIM_INFINITY; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallSucceedsWithValue(0)); +} + +TEST_F(MMapTest, ExceedLimitData) { + // To unit test this more precisely, we'd need access to the mm's start_brk + // and end_brk, which we don't have direct access to :/ + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 8192; + + setlim.rlim_cur = 0; + setlim.rlim_max = RLIM_INFINITY; + // Set RLIMIT_DATA very low so any subsequent brk() calls fail. + // Reset RLIMIT_DATA during teardown step. + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); + // Teardown step... + setlim.rlim_cur = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); +} + +TEST_F(MMapTest, ExceedLimitDataPrlimit) { + // To unit test this more precisely, we'd need access to the mm's start_brk + // and end_brk, which we don't have direct access to :/ + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 8192; + + setlim.rlim_cur = 0; + setlim.rlim_max = RLIM_INFINITY; + // Set RLIMIT_DATA very low so any subsequent brk() calls fail. + // Reset RLIMIT_DATA during teardown step. + ASSERT_THAT(prlimit(0, RLIMIT_DATA, &setlim, nullptr), SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); + // Teardown step... + setlim.rlim_cur = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); +} + +TEST_F(MMapTest, ExceedLimitDataPrlimitPID) { + // To unit test this more precisely, we'd need access to the mm's start_brk + // and end_brk, which we don't have direct access to :/ + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 8192; + + setlim.rlim_cur = 0; + setlim.rlim_max = RLIM_INFINITY; + // Set RLIMIT_DATA very low so any subsequent brk() calls fail. + // Reset RLIMIT_DATA during teardown step. + ASSERT_THAT(prlimit(syscall(__NR_gettid), RLIMIT_DATA, &setlim, nullptr), + SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); + // Teardown step... + setlim.rlim_cur = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); +} + +TEST_F(MMapTest, NoExceedLimitAS) { + constexpr uint64_t kAllocBytes = 200 << 20; + // Add some headroom to the AS limit in case of e.g. unexpected stack + // expansion. + constexpr uint64_t kExtraASBytes = kAllocBytes + (20 << 20); + static_assert(kAllocBytes < kExtraASBytes, + "test depends on allocation not exceeding AS limit"); + + auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize()); + struct rlimit setlim; + setlim.rlim_cur = vss + kExtraASBytes; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds()); + EXPECT_THAT( + Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceedsWithValue(Gt(0))); +} + +TEST_F(MMapTest, ExceedLimitAS) { + constexpr uint64_t kAllocBytes = 200 << 20; + // Add some headroom to the AS limit in case of e.g. unexpected stack + // expansion. + constexpr uint64_t kExtraASBytes = 20 << 20; + static_assert(kAllocBytes > kExtraASBytes, + "test depends on allocation exceeding AS limit"); + + auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize()); + struct rlimit setlim; + setlim.rlim_cur = vss + kExtraASBytes; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds()); + EXPECT_THAT( + Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(ENOMEM)); +} + +// Tests that setting an anonymous mmap to PROT_NONE doesn't free the memory. +TEST_F(MMapTest, SettingProtNoneDoesntFreeMemory) { + uintptr_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceedsWithValue(Gt(0))); + + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + ASSERT_THAT(Protect(addr, kPageSize, PROT_NONE), SyscallSucceeds()); + ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE), + SyscallSucceeds()); + + // The written data should still be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +constexpr char kFileContents[] = "Hello World!"; + +class MMapFileTest : public MMapTest { + protected: + FileDescriptor fd_; + std::string filename_; + + // Open a file for read/write + void SetUp() override { + MMapTest::SetUp(); + + filename_ = NewTempAbsPath(); + fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_CREAT | O_RDWR, 0644)); + + // Extend file so it can be written once mapped. Deliberately make the file + // only half a page in size, so we can test what happens when we access the + // second half. + // Use ftruncate(2) once the sentry supports it. + char zero = 0; + size_t count = 0; + do { + const DisableSave ds; // saving 2048 times is slow and useless. + Write(&zero, 1), SyscallSucceedsWithValue(1); + } while (++count < (kPageSize / 2)); + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + } + + // Close and delete file + void TearDown() override { + MMapTest::TearDown(); + fd_.reset(); // Make sure the files is closed before we unlink it. + ASSERT_THAT(unlink(filename_.c_str()), SyscallSucceeds()); + } + + ssize_t Read(char* buf, size_t count) { + ssize_t len = 0; + do { + ssize_t ret = read(fd_.get(), buf, count); + if (ret < 0) { + return ret; + } else if (ret == 0) { + return len; + } + + len += ret; + buf += ret; + } while (len < static_cast<ssize_t>(count)); + + return len; + } + + ssize_t Write(const char* buf, size_t count) { + ssize_t len = 0; + do { + ssize_t ret = write(fd_.get(), buf, count); + if (ret < 0) { + return ret; + } else if (ret == 0) { + return len; + } + + len += ret; + buf += ret; + } while (len < static_cast<ssize_t>(count)); + + return len; + } +}; + +// MAP_POPULATE allowed. +// There isn't a good way to verify it actually did anything. +// +// FIXME: Parameterize. +TEST_F(MMapFileTest, MapPopulate) { + ASSERT_THAT( + Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd_.get(), 0), + SyscallSucceeds()); +} + +// MAP_POPULATE on a short file. +// +// FIXME: Parameterize. +TEST_F(MMapFileTest, MapPopulateShort) { + ASSERT_THAT(Map(0, 2 * kPageSize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, + fd_.get(), 0), + SyscallSucceeds()); +} + +// Read contents from mapped file. +TEST_F(MMapFileTest, Read) { + size_t len = strlen(kFileContents); + ASSERT_EQ(len, Write(kFileContents, len)); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), 0), + SyscallSucceeds()); + + EXPECT_THAT(reinterpret_cast<char*>(addr), + EqualsMemory(std::string(kFileContents))); +} + +// Map at an offset. +TEST_F(MMapFileTest, MapOffset) { + ASSERT_THAT(lseek(fd_.get(), kPageSize, SEEK_SET), SyscallSucceeds()); + + size_t len = strlen(kFileContents); + ASSERT_EQ(len, Write(kFileContents, len)); + + uintptr_t addr; + ASSERT_THAT( + addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), kPageSize), + SyscallSucceeds()); + + EXPECT_THAT(reinterpret_cast<char*>(addr), + EqualsMemory(std::string(kFileContents))); +} + +TEST_F(MMapFileTest, MapOffsetBeyondEnd) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 10 * kPageSize), + SyscallSucceeds()); + + // Touching the memory causes SIGBUS. + size_t len = strlen(kFileContents); + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)), + ::testing::KilledBySignal(SIGBUS), ""); +} + +// Verify mmap fails when sum of length and offset overflows. +TEST_F(MMapFileTest, MapLengthPlusOffsetOverflows) { + const size_t length = static_cast<size_t>(-kPageSize); + const off_t offset = kPageSize; + ASSERT_THAT(Map(0, length, PROT_READ, MAP_PRIVATE, fd_.get(), offset), + SyscallFailsWithErrno(ENOMEM)); +} + +// MAP_PRIVATE PROT_WRITE is allowed on read-only FDs. +TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); + + uintptr_t addr; + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd.get(), 0), + SyscallSucceeds()); + + // Touch the page to ensure the kernel didn't lie about writability. + size_t len = strlen(kFileContents); + std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)); +} + +// MAP_PRIVATE PROT_READ is not allowed on write-only FDs. +TEST_F(MMapFileTest, ReadPrivateOnWriteOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); + + uintptr_t addr; + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd.get(), 0), + SyscallFailsWithErrno(EACCES)); +} + +// MAP_SHARED PROT_WRITE not allowed on read-only FDs. +TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); + + uintptr_t addr; + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0), + SyscallFailsWithErrno(EACCES)); +} + +// MAP_SHARED PROT_READ not allowed on write-only FDs. +// +// FIXME: Parameterize. +TEST_F(MMapFileTest, ReadSharedOnWriteOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); + + uintptr_t addr; + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd.get(), 0), + SyscallFailsWithErrno(EACCES)); +} + +// MAP_SHARED PROT_WRITE not allowed on write-only FDs. +// The FD must always be readable. +// +// FIXME: Parameterize. +TEST_F(MMapFileTest, WriteSharedOnWriteOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); + + uintptr_t addr; + EXPECT_THAT(addr = Map(0, kPageSize, PROT_WRITE, MAP_SHARED, fd.get(), 0), + SyscallFailsWithErrno(EACCES)); +} + +// Overwriting the contents of a file mapped MAP_SHARED PROT_READ +// should cause the new data to be reflected in the mapping. +TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to two pages and dirty them. + std::string bufA(kPageSize, 'a'); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + std::string bufB(kPageSize, 'b'); + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the mapping contains the right file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize)); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(), + kPageSize)); + + // Start at the beginning of the file. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Swap the write pattern. + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + + // Check that the mapping got updated. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufB.c_str(), kPageSize)); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufA.c_str(), + kPageSize)); +} + +// Partially overwriting a file mapped MAP_SHARED PROT_READ should be reflected +// in the mapping. +TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to two pages and dirty them. + std::string bufA(kPageSize, 'a'); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + std::string bufB(kPageSize, 'b'); + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the mapping contains the right file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize)); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(), + kPageSize)); + + // Start at the beginning of the file. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Do a partial overwrite, spanning both pages. + std::string bufC(kPageSize + (kPageSize / 2), 'c'); + ASSERT_THAT(Write(bufC.c_str(), bufC.size()), + SyscallSucceedsWithValue(bufC.size())); + + // Check that the mapping got updated. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufC.c_str(), + kPageSize + (kPageSize / 2))); + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr + kPageSize + (kPageSize / 2)), + bufB.c_str(), kPageSize / 2)); +} + +// Overwriting a file mapped MAP_SHARED PROT_READ should be reflected in the +// mapping and the file. +TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to two full pages and dirty it. + std::string bufA(2 * kPageSize, 'a'); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + + // Map only the first page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Prepare to overwrite the file contents. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Overwrite everything, beyond the mapped portion. + std::string bufB(2 * kPageSize, 'b'); + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + + // What the mapped portion should now look like. + std::string bufMapped(kPageSize, 'b'); + + // Expect that the mapped portion is consistent. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), bufMapped.c_str(), kPageSize)); + + // Prepare to read the entire file contents. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Expect that the file was fully updated. + std::vector<char> bufFile(2 * kPageSize); + ASSERT_THAT(Read(bufFile.data(), bufFile.size()), + SyscallSucceedsWithValue(bufFile.size())); + // Cast to void* to avoid EXPECT_THAT assuming bufFile.data() is a + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(bufFile.data()), EqualsMemory(bufB)); +} + +// Write data to mapped file. +TEST_F(MMapFileTest, WriteShared) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + size_t len = strlen(kFileContents); + memcpy(reinterpret_cast<void*>(addr), kFileContents, len); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + std::vector<char> buf(len); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), + EqualsMemory(std::string(kFileContents))); +} + +// Write data to portion of mapped page beyond the end of the file. +// These writes are not reflected in the file. +TEST_F(MMapFileTest, WriteSharedBeyondEnd) { + // The file is only half of a page. We map an entire page. Writes to the + // end of the mapping must not be reflected in the file. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // First half; this is reflected in the file. + std::string first(kPageSize / 2, 'A'); + memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); + + // Second half; this is not reflected in the file. + std::string second(kPageSize / 2, 'B'); + memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(), + second.size()); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + // Big enough to fit the entire page, if the writes are mistakenly written to + // the file. + std::vector<char> buf(kPageSize); + + // Only the first half is in the file. + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(first.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); +} + +// The portion of a mapped page that becomes part of the file after a truncate +// is reflected in the file. +TEST_F(MMapFileTest, WriteSharedTruncateUp) { + // The file is only half of a page. We map an entire page. Writes to the + // end of the mapping must not be reflected in the file. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // First half; this is reflected in the file. + std::string first(kPageSize / 2, 'A'); + memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); + + // Second half; this is not reflected in the file now (see + // WriteSharedBeyondEnd), but will be after the truncate. + std::string second(kPageSize / 2, 'B'); + memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(), + second.size()); + + // Extend the file to a full page. The second half of the page will be + // reflected in the file. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + // The whole page is in the file. + std::vector<char> buf(kPageSize); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); + EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2), + EqualsMemory(second)); +} + +TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to a full page and dirty it. + std::string buf(kPageSize, 'a'); + ASSERT_THAT(Write(buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the memory contains he file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize)); + + // Truncate down, then up. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + // Check that the memory was zeroed. + std::string zeroed(kPageSize, '\0'); + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize)); + + // The file may not actually be updated until msync is called. + ASSERT_THAT(Msync(), SyscallSucceeds()); + + // Prepare to read the entire file contents. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Expect that the file is fully updated. + std::vector<char> bufFile(kPageSize); + ASSERT_THAT(Read(bufFile.data(), bufFile.size()), + SyscallSucceedsWithValue(bufFile.size())); + EXPECT_EQ(0, memcmp(bufFile.data(), zeroed.c_str(), kPageSize)); +} + +TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) { + // The file is only half of a page. We map an entire page. Writes to the + // end of the mapping must not be reflected in the file. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // First half; this will be deleted by truncate(0). + std::string first(kPageSize / 2, 'A'); + memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); + + // Truncate down, then up. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + // The whole page is zeroed in memory. + std::string zeroed(kPageSize, '\0'); + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize)); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + // The whole file is also zeroed. + std::vector<char> buf(kPageSize); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed)); +} + +TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) { + SetupGvisorDeathTest(); + + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to a full page and dirty it. + std::string buf(kPageSize, 'a'); + ASSERT_THAT(Write(buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the mapping contains the file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize)); + + // Truncate down. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Accessing the truncated region should cause a SIGBUS. + std::vector<char> in(kPageSize); + EXPECT_EXIT( + std::copy(reinterpret_cast<volatile char*>(addr), + reinterpret_cast<volatile char*>(addr) + kPageSize, in.data()), + ::testing::KilledBySignal(SIGBUS), ""); +} + +TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // Touch the memory to be sure it really is mapped. + size_t len = strlen(kFileContents); + memcpy(reinterpret_cast<void*>(addr), kFileContents, len); + + // Truncate down. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Accessing the truncated file should cause a SIGBUS. + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)), + ::testing::KilledBySignal(SIGBUS), ""); +} + +TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Dirty the file. + std::string buf(kPageSize, 'a'); + ASSERT_THAT(Write(buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Map a page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Truncate to half of the page. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds()); + + // First half of the page untouched. + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr), buf.data(), kPageSize / 2)); + + // Second half is zeroed. + std::string zeroed(kPageSize / 2, '\0'); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2), + zeroed.c_str(), kPageSize / 2)); +} + +// Page can still be accessed and contents are intact after truncating a partial +// page. +TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) { + // Expand the file to a full page. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // Fill the entire page. + std::string contents(kPageSize, 'A'); + memcpy(reinterpret_cast<void*>(addr), contents.c_str(), contents.size()); + + // Truncate half of the page. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds()); + + // First half of the page untouched. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), contents.c_str(), + kPageSize / 2)); + + // Second half zeroed. + std::string zeroed(kPageSize / 2, '\0'); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2), + zeroed.c_str(), kPageSize / 2)); +} + +// MAP_PRIVATE writes are not carried through to the underlying file. +TEST_F(MMapFileTest, WritePrivate) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 0), + SyscallSucceeds()); + + size_t len = strlen(kFileContents); + memcpy(reinterpret_cast<void*>(addr), kFileContents, len); + + // The file should not be updated, but if it mistakenly is, it may not be + // until after munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + std::vector<char> buf(len); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), + EqualsMemory(std::string(len, '\0'))); +} + +// SIGBUS raised when writing past end of file to a private mapping. +// +// FIXME: Parameterize. +TEST_F(MMapFileTest, SigBusDeathWritePrivate) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 0), + SyscallSucceeds()); + + // MMapFileTest makes a file kPageSize/2 long. The entire first page will be + // accessible. Write just beyond that. + size_t len = strlen(kFileContents); + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr + kPageSize)), + ::testing::KilledBySignal(SIGBUS), ""); +} + +// SIGBUS raised when reading past end of file on a shared mapping. +// +// FIXME: Parameterize. +TEST_F(MMapFileTest, SigBusDeathReadShared) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // MMapFileTest makes a file kPageSize/2 long. The entire first page will be + // accessible. Read just beyond that. + std::vector<char> in(kPageSize); + EXPECT_EXIT( + std::copy(reinterpret_cast<volatile char*>(addr + kPageSize), + reinterpret_cast<volatile char*>(addr + kPageSize) + kPageSize, + in.data()), + ::testing::KilledBySignal(SIGBUS), ""); +} + +// SIGBUS raised when reading past end of file on a shared mapping. +// +// FIXME: Parameterize. +TEST_F(MMapFileTest, SigBusDeathWriteShared) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // MMapFileTest makes a file kPageSize/2 long. The entire first page will be + // accessible. Write just beyond that. + size_t len = strlen(kFileContents); + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr + kPageSize)), + ::testing::KilledBySignal(SIGBUS), ""); +} + +// Tests that SIGBUS is not raised when writing to a file-mapped page before +// EOF, even if part of the mapping extends beyond EOF. +TEST_F(MMapFileTest, NoSigBusOnPagesBeforeEOF) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 0), + SyscallSucceeds()); + + // The test passes if this survives. + size_t len = strlen(kFileContents); + std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)); +} + +// Tests that SIGBUS is not raised when writing to a file-mapped page containing +// EOF, *after* the EOF for a private mapping. +TEST_F(MMapFileTest, NoSigBusOnPageContainingEOFWritePrivate) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 0), + SyscallSucceeds()); + + // The test passes if this survives. (Technically addr+kPageSize/2 is already + // beyond EOF, but +1 to check for fencepost errors.) + size_t len = strlen(kFileContents); + std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1)); +} + +// Tests that SIGBUS is not raised when reading from a file-mapped page +// containing EOF, *after* the EOF for a shared mapping. +// +// FIXME: Parameterize. +TEST_F(MMapFileTest, NoSigBusOnPageContainingEOFReadShared) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // The test passes if this survives. (Technically addr+kPageSize/2 is already + // beyond EOF, but +1 to check for fencepost errors.) + auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1); + size_t len = strlen(kFileContents); + std::vector<char> in(len); + std::copy(start, start + len, in.data()); +} + +// Tests that SIGBUS is not raised when writing to a file-mapped page containing +// EOF, *after* the EOF for a shared mapping. +// +// FIXME: Parameterize. +TEST_F(MMapFileTest, NoSigBusOnPageContainingEOFWriteShared) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // The test passes if this survives. (Technically addr+kPageSize/2 is already + // beyond EOF, but +1 to check for fencepost errors.) + size_t len = strlen(kFileContents); + std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1)); +} + +// Tests that reading from writable shared file-mapped pages succeeds. +// +// On most platforms this is trivial, but when the file is mapped via the sentry +// page cache (which does not yet support writing to shared mappings), a bug +// caused reads to fail unnecessarily on such mappings. +TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { + uintptr_t addr; + size_t len = strlen(kFileContents); + + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + std::vector<char> buf(kPageSize); + // The test passes if this survives. + std::copy(reinterpret_cast<volatile char*>(addr), + reinterpret_cast<volatile char*>(addr) + len, buf.data()); +} + +// Tests that EFAULT is returned when invoking a syscall that requires the OS to +// read past end of file (resulting in a fault in sentry context in the gVisor +// case). +TEST_F(MMapFileTest, InternalSigBus) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 0), + SyscallSucceeds()); + + // This depends on the fact that gVisor implements pipes internally. + int pipefd[2]; + ASSERT_THAT(pipe(pipefd), SyscallSucceeds()); + EXPECT_THAT( + write(pipefd[1], reinterpret_cast<void*>(addr + kPageSize), kPageSize), + SyscallFailsWithErrno(EFAULT)); + + EXPECT_THAT(close(pipefd[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipefd[1]), SyscallSucceeds()); +} + +// Like InternalSigBus, but test the WriteZerosAt path by reading from +// /dev/zero to a shared mapping (so that the SIGBUS isn't caught during +// copy-on-write breaking). +TEST_F(MMapFileTest, InternalSigBusZeroing) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + EXPECT_THAT(read(dev_zero.get(), reinterpret_cast<void*>(addr + kPageSize), + kPageSize), + SyscallFailsWithErrno(EFAULT)); +} + +// Checks that mmaps with a length of uint64_t(-PAGE_SIZE + 1) or greater do not +// induce a sentry panic (due to "rounding up" to 0). +TEST_F(MMapTest, HugeLength) { + EXPECT_THAT(Map(0, static_cast<uint64_t>(-kPageSize + 1), PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(ENOMEM)); +} + +// Tests for a specific gVisor MM caching bug. +TEST_F(MMapTest, AccessCOWInvalidatesCachedSegments) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + auto zero_fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + + // Get a two-page private mapping and fill it with 1s. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + memset(addr_, 1, 2 * kPageSize); + MaybeSave(); + + // Fork to make the mapping copy-on-write. + pid_t const pid = fork(); + if (pid == 0) { + // The child process waits for the parent to SIGKILL it. + while (true) { + pause(); + } + } + ASSERT_THAT(pid, SyscallSucceeds()); + auto cleanup_child = Cleanup([&] { + EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds()); + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + }); + + // Induce a read-only Access of the first page of the mapping, which will not + // cause a copy. The usermem.Segment should be cached. + ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + + // Induce a writable Access of both pages of the mapping. This should + // invalidate the cached Segment. + ASSERT_THAT(PreadFd(zero_fd.get(), addr_, 2 * kPageSize, 0), + SyscallSucceedsWithValue(2 * kPageSize)); + + // Induce a read-only Access of the first page of the mapping again. It should + // read the 0s that were stored in the mapping by the read from /dev/zero. If + // the read failed to invalidate the cached Segment, it will instead read the + // 1s in the stale page. + ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + std::vector<char> buf(kPageSize); + ASSERT_THAT(PreadFd(fd.get(), buf.data(), kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + for (size_t i = 0; i < kPageSize; i++) { + ASSERT_EQ(0, buf[i]) << "at offset " << i; + } +} + +TEST_F(MMapTest, NoReserve) { + const size_t kSize = 10 * 1 << 20; // 10M + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0), + SyscallSucceeds()); + EXPECT_GT(addr, 0); + + // Check that every page can be read/written. Technically, writing to memory + // could SIGSEGV in case there is no more memory available. In gVisor it + // would never happen though because NORESERVE is ignored. In Linux, it's + // possible to fail, but allocation is small enough that it's highly likely + // to succeed. + for (size_t j = 0; j < kSize; j += kPageSize) { + EXPECT_EQ(0, reinterpret_cast<char*>(addr)[j]); + reinterpret_cast<char*>(addr)[j] = j; + } +} + +// Map more than the gVisor page-cache map unit (64k) and ensure that +// it is consistent with reading from the file. +TEST_F(MMapFileTest, Bug38498194) { + // Choose a sufficiently large map unit. + constexpr int kSize = 4 * 1024 * 1024; + EXPECT_THAT(ftruncate(fd_.get(), kSize), SyscallSucceeds()); + + // Map a large enough region so that multiple internal segments + // are created to back the mapping. + uintptr_t addr; + ASSERT_THAT( + addr = Map(0, kSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + std::vector<char> expect(kSize, 'a'); + std::copy(expect.data(), expect.data() + expect.size(), + reinterpret_cast<volatile char*>(addr)); + + // Trigger writeback for gVisor. In Linux pages stay cached until + // it can't hold onto them anymore. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + std::vector<char> buf(kSize); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + EXPECT_EQ(buf, expect) << std::string(buf.data(), buf.size()); +} + +// Tests that reading from a file to a memory mapping of the same file does not +// deadlock. +TEST_F(MMapFileTest, SelfRead) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(Read(reinterpret_cast<char*>(addr), kPageSize / 2), + SyscallSucceedsWithValue(kPageSize / 2)); + // The resulting file contents are poorly-specified and irrelevant. +} + +// Tests that writing to a file from a memory mapping of the same file does not +// deadlock. +TEST_F(MMapFileTest, SelfWrite) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(Write(reinterpret_cast<char*>(addr), kPageSize / 2), + SyscallSucceedsWithValue(kPageSize / 2)); + // The resulting file contents are poorly-specified and irrelevant. +} + +TEST(MMapDeathTest, TruncateAfterCOWBreak) { + SetupGvisorDeathTest(); + + // Create and map a single-page file. + auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDWR)); + ASSERT_THAT(ftruncate(fd.get(), kPageSize), SyscallSucceeds()); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0)); + + // Write to this mapping, causing the page to be copied for write. + memset(mapping.ptr(), 'a', mapping.len()); + MaybeSave(); // Trigger a co-operative save cycle. + + // Truncate the file and expect it to invalidate the copied page. + ASSERT_THAT(ftruncate(fd.get(), 0), SyscallSucceeds()); + EXPECT_EXIT(*reinterpret_cast<volatile char*>(mapping.ptr()), + ::testing::KilledBySignal(SIGBUS), ""); +} + +// Conditional on MAP_32BIT. +#ifdef __x86_64__ + +TEST(MMapNoFixtureTest, Map32Bit) { + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE | MAP_32BIT)); + EXPECT_LT(mapping.addr(), static_cast<uintptr_t>(1) << 32); + EXPECT_LE(mapping.endaddr(), static_cast<uintptr_t>(1) << 32); +} + +#endif // defined(__x86_64__) + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc new file mode 100644 index 000000000..76da8b75a --- /dev/null +++ b/test/syscalls/linux/mount.cc @@ -0,0 +1,302 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <unistd.h> +#include <functional> +#include <memory> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "absl/time/time.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/mount_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(MountTest, MountBadFilesystem) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + // Linux expects a valid target before it checks the file system name. + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(mount("", dir.path().c_str(), "foobar", 0, ""), + SyscallFailsWithErrno(ENODEV)); +} + +TEST(MountTest, MountInvalidTarget) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = NewTempAbsPath(); + EXPECT_THAT(mount("", dir.c_str(), "tmpfs", 0, ""), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(MountTest, MountPermDenied) { + // Clear CAP_SYS_ADMIN. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false)); + } + + // Linux expects a valid target before checking capability. + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(mount("", dir.path().c_str(), "", 0, ""), + SyscallFailsWithErrno(EPERM)); +} + +TEST(MountTest, UmountPermDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0)); + + // Drop privileges in another thread, so we can still unmount the mounted + // directory. + ScopedThread([&]() { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false)); + EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EPERM)); + }); +} + +TEST(MountTest, MountOverBusy) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777)); + + // Should be able to mount over a busy directory. + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0)); +} + +TEST(MountTest, OpenFileBusy) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0)); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777)); + + // An open file should prevent unmounting. + EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EBUSY)); +} + +TEST(MountTest, UmountDetach) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + // structure: + // + // dir (mount point) + // subdir + // file + // + // We show that we can walk around in the mount after detach-unmount dir. + // + // We show that even though dir is unreachable from outside the mount, we can + // still reach dir's (former) parent! + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + auto mount = + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "mode=0700", + /* umountflags= */ MNT_DETACH)); + const struct stat after = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_NE(before.st_ino, after.st_ino); + + // Create files in the new mount. + constexpr char kContents[] = "no no no"; + auto const subdir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + auto const file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(dir.path(), kContents, 0777)); + + auto const dir_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(subdir.path(), O_RDONLY | O_DIRECTORY)); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + // Unmount the tmpfs. + mount.Release()(); + + const struct stat after2 = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_EQ(before.st_ino, after2.st_ino); + + // Can still read file after unmounting. + std::vector<char> buf(sizeof(kContents)); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()), SyscallSucceeds()); + + // Walk to dir. + auto const mounted_dir = ASSERT_NO_ERRNO_AND_VALUE( + OpenAt(dir_fd.get(), "..", O_DIRECTORY | O_RDONLY)); + // Walk to dir/file. + auto const fd_again = ASSERT_NO_ERRNO_AND_VALUE( + OpenAt(mounted_dir.get(), std::string(Basename(file.path())), O_RDONLY)); + + std::vector<char> buf2(sizeof(kContents)); + EXPECT_THAT(ReadFd(fd_again.get(), buf2.data(), buf2.size()), + SyscallSucceeds()); + EXPECT_EQ(buf, buf2); + + // Walking outside the unmounted realm should still work, too! + auto const dir_parent = ASSERT_NO_ERRNO_AND_VALUE( + OpenAt(mounted_dir.get(), "..", O_DIRECTORY | O_RDONLY)); +} + +TEST(MountTest, ActiveSubmountBusy) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount1 = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0)); + + auto const dir2 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + auto const mount2 = + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir2.path(), "tmpfs", 0, "", 0)); + + // Since dir now has an active submount, should not be able to unmount. + EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EBUSY)); +} + +TEST(MountTest, MountTmpfs) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + + { + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0)); + + const struct stat s = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_EQ(s.st_mode, S_IFDIR | 0700); + EXPECT_NE(s.st_ino, before.st_ino); + + EXPECT_NO_ERRNO(Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777)); + } + + // Now that dir is unmounted again, we should have the old inode back. + const struct stat after = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_EQ(before.st_ino, after.st_ino); +} + +TEST(MountTest, MountTmpfsMagicValIgnored) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", MS_MGC_VAL, "mode=0700", 0)); +} + +// Passing nullptr to data is equivalent to "". +TEST(MountTest, NullData) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + EXPECT_THAT(mount("", dir.path().c_str(), "tmpfs", 0, nullptr), + SyscallSucceeds()); + EXPECT_THAT(umount2(dir.path().c_str(), 0), SyscallSucceeds()); +} + +TEST(MountTest, MountReadonly) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", MS_RDONLY, "mode=0777", 0)); + + const struct stat s = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path())); + EXPECT_EQ(s.st_mode, S_IFDIR | 0777); + + std::string const filename = JoinPath(dir.path(), "foo"); + EXPECT_THAT(open(filename.c_str(), O_RDWR | O_CREAT, 0777), + SyscallFailsWithErrno(EROFS)); +} + +PosixErrorOr<absl::Time> ATime(absl::string_view file) { + struct stat s = {}; + if (stat(std::string(file).c_str(), &s) == -1) { + return PosixError(errno, "stat failed"); + } + return absl::TimeFromTimespec(s.st_atim); +} + +// FIXME: Disabled until tmpfs stops using Handle, as only the gofer +// and host file system respect the MS_NOATIME flag. +TEST(MountTest, DISABLED_MountNoAtime) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = ASSERT_NO_ERRNO_AND_VALUE( + Mount("", dir.path(), "tmpfs", MS_NOATIME, "mode=0777", 0)); + + std::string const contents = "No no no, don't follow the instructions!"; + auto const file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(dir.path(), contents, 0777)); + + absl::Time const before = ASSERT_NO_ERRNO_AND_VALUE(ATime(file.path())); + + // Reading from the file should change the atime, but the MS_NOATIME flag + // should prevent that. + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + char buf[100]; + int read_n; + ASSERT_THAT(read_n = read(fd.get(), buf, sizeof(buf)), SyscallSucceeds()); + EXPECT_EQ(std::string(buf, read_n), contents); + + absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(ATime(file.path())); + + // Expect that atime hasn't changed. + EXPECT_EQ(before, after); +} + +TEST(MountTest, RenameRemoveMountPoint) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + auto const dir_parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir_parent.path())); + auto const new_dir = NewTempAbsPath(); + + auto const mount = + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0)); + + ASSERT_THAT(rename(dir.path().c_str(), new_dir.c_str()), + SyscallFailsWithErrno(EBUSY)); + + ASSERT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EBUSY)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/mremap.cc b/test/syscalls/linux/mremap.cc new file mode 100644 index 000000000..ededab336 --- /dev/null +++ b/test/syscalls/linux/mremap.cc @@ -0,0 +1,514 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <string.h> +#include <sys/mman.h> + +#include <string> + +#include "gmock/gmock.h" +#include "absl/strings/string_view.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::_; + +namespace gvisor { +namespace testing { + +namespace { + +// Wrapper for mremap that returns a PosixErrorOr<>, since the return type of +// void* isn't directly compatible with SyscallSucceeds. +PosixErrorOr<void*> Mremap(void* old_address, size_t old_size, size_t new_size, + int flags, void* new_address) { + void* rv = mremap(old_address, old_size, new_size, flags, new_address); + if (rv == MAP_FAILED) { + return PosixError(errno, "mremap failed"); + } + return rv; +} + +// Returns true if the page containing addr is mapped. +bool IsMapped(uintptr_t addr) { + int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)), + kPageSize, MS_ASYNC); + if (rv == 0) { + return true; + } + TEST_PCHECK_MSG(errno == ENOMEM, "msync failed with unexpected errno"); + return false; +} + +// Fixture for mremap tests parameterized by mmap flags. +using MremapParamTest = ::testing::TestWithParam<int>; + +TEST_P(MremapParamTest, Noop) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + + ASSERT_THAT(Mremap(m.ptr(), kPageSize, kPageSize, 0, nullptr), + IsPosixErrorOkAndHolds(m.ptr())); + EXPECT_TRUE(IsMapped(m.addr())); +} + +TEST_P(MremapParamTest, InPlace_ShrinkingWholeVMA) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // N.B. we must be in a single-threaded subprocess to ensure a + // background thread doesn't concurrently map the second page. + void* addr = mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, nullptr); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == m.ptr()); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(!IsMapped(m.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, InPlace_ShrinkingPartialVMA) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + void* addr = mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, nullptr); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == m.ptr()); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(!IsMapped(m.addr() + kPageSize)); + TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, InPlace_ShrinkingAcrossVMAs) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_READ, GetParam())); + // Changing permissions on the first page forces it to become a separate vma. + ASSERT_THAT(mprotect(m.ptr(), kPageSize, PROT_NONE), SyscallSucceeds()); + + const auto rest = [&] { + // Both old_size and new_size now span two vmas; mremap + // shouldn't care. + void* addr = mremap(m.ptr(), 3 * kPageSize, 2 * kPageSize, 0, nullptr); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == m.ptr()); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(IsMapped(m.addr() + kPageSize)); + TEST_CHECK(!IsMapped(m.addr() + 2 * kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, InPlace_ExpansionSuccess) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap the second page so that the first can be expanded back into it. + // + // N.B. we must be in a single-threaded subprocess to ensure a + // background thread doesn't concurrently map this page. + TEST_PCHECK( + munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(m.ptr(), kPageSize, 2 * kPageSize, 0, nullptr); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == m.ptr()); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(IsMapped(m.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, InPlace_ExpansionFailure) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap the second page, leaving a one-page hole. Trying to expand the + // first page to three pages should fail since the original third page + // is still mapped. + TEST_PCHECK( + munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(m.ptr(), kPageSize, 3 * kPageSize, 0, nullptr); + TEST_CHECK_MSG(addr == MAP_FAILED, "mremap unexpectedly succeeded"); + TEST_PCHECK_MSG(errno == ENOMEM, "mremap failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(!IsMapped(m.addr() + kPageSize)); + TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, MayMove_Expansion) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap the second page, leaving a one-page hole. Trying to expand the + // first page to three pages with MREMAP_MAYMOVE should force the + // mapping to be relocated since the original third page is still + // mapped. + TEST_PCHECK( + munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0); + MaybeSave(); + + void* addr2 = + mremap(m.ptr(), kPageSize, 3 * kPageSize, MREMAP_MAYMOVE, nullptr); + TEST_PCHECK_MSG(addr2 != MAP_FAILED, "mremap failed"); + MaybeSave(); + + const Mapping m2 = Mapping(addr2, 3 * kPageSize); + TEST_CHECK(m.addr() != m2.addr()); + + TEST_CHECK(!IsMapped(m.addr())); + TEST_CHECK(!IsMapped(m.addr() + kPageSize)); + TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize)); + TEST_CHECK(IsMapped(m2.addr())); + TEST_CHECK(IsMapped(m2.addr() + kPageSize)); + TEST_CHECK(IsMapped(m2.addr() + 2 * kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_SourceAndDestinationCannotOverlap) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + + ASSERT_THAT(Mremap(m.ptr(), kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, m.ptr()), + PosixErrorIs(EINVAL, _)); + EXPECT_TRUE(IsMapped(m.addr())); +} + +TEST_P(MremapParamTest, Fixed_SameSize) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap dst to create a hole. + TEST_PCHECK(munmap(dst.ptr(), kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(src.ptr(), kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(IsMapped(dst.addr())); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_SameSize_Unmapping) { + // Like the Fixed_SameSize case, but expect mremap to unmap the destination + // automatically. + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + void* addr = mremap(src.ptr(), kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(IsMapped(dst.addr())); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_ShrinkingWholeVMA) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap dst so we can check that mremap does not keep the + // second page. + TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(src.ptr(), 2 * kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(!IsMapped(src.addr() + kPageSize)); + TEST_CHECK(IsMapped(dst.addr())); + TEST_CHECK(!IsMapped(dst.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_ShrinkingPartialVMA) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap dst so we can check that mremap does not keep the + // second page. + TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(src.ptr(), 2 * kPageSize, kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(!IsMapped(src.addr() + kPageSize)); + TEST_CHECK(IsMapped(src.addr() + 2 * kPageSize)); + TEST_CHECK(IsMapped(dst.addr())); + TEST_CHECK(!IsMapped(dst.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_ShrinkingAcrossVMAs) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_READ, GetParam())); + // Changing permissions on the first page forces it to become a separate vma. + ASSERT_THAT(mprotect(src.ptr(), kPageSize, PROT_NONE), SyscallSucceeds()); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unlike flags=0, MREMAP_FIXED requires that [old_address, + // old_address+new_size) only spans a single vma. + void* addr = mremap(src.ptr(), 3 * kPageSize, 2 * kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_CHECK_MSG(addr == MAP_FAILED, "mremap unexpectedly succeeded"); + TEST_PCHECK_MSG(errno == EFAULT, "mremap failed with wrong errno"); + MaybeSave(); + + TEST_CHECK(IsMapped(src.addr())); + TEST_CHECK(IsMapped(src.addr() + kPageSize)); + // Despite failing, mremap should have unmapped [old_address+new_size, + // old_address+old_size) (i.e. the third page). + TEST_CHECK(!IsMapped(src.addr() + 2 * kPageSize)); + // Despite failing, mremap should have unmapped the destination pages. + TEST_CHECK(!IsMapped(dst.addr())); + TEST_CHECK(!IsMapped(dst.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST_P(MremapParamTest, Fixed_Expansion) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam())); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam())); + + const auto rest = [&] { + // Unmap dst so we can check that mremap actually maps all pages + // at the destination. + TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0); + MaybeSave(); + + void* addr = mremap(src.ptr(), kPageSize, 2 * kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()); + TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed"); + TEST_CHECK(addr == dst.ptr()); + MaybeSave(); + + TEST_CHECK(!IsMapped(src.addr())); + TEST_CHECK(IsMapped(dst.addr())); + TEST_CHECK(IsMapped(dst.addr() + kPageSize)); + }; + + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +INSTANTIATE_TEST_CASE_P(PrivateShared, MremapParamTest, + ::testing::Values(MAP_PRIVATE, MAP_SHARED)); + +// mremap with old_size == 0 only works with MAP_SHARED after Linux 4.14 +// (dba58d3b8c50 "mm/mremap: fail map duplication attempts for private +// mappings"). + +TEST(MremapTest, InPlace_Copy) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED)); + EXPECT_THAT(Mremap(m.ptr(), 0, kPageSize, 0, nullptr), + PosixErrorIs(ENOMEM, _)); +} + +TEST(MremapTest, MayMove_Copy) { + Mapping const m = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED)); + + // Remainder of this test executes in a subprocess to ensure that if mremap + // incorrectly removes m, it is not remapped by another thread. + const auto rest = [&] { + void* ptr = mremap(m.ptr(), 0, kPageSize, MREMAP_MAYMOVE, nullptr); + MaybeSave(); + TEST_PCHECK_MSG(ptr != MAP_FAILED, "mremap failed"); + TEST_CHECK(ptr != m.ptr()); + TEST_CHECK(IsMapped(m.addr())); + TEST_CHECK(IsMapped(reinterpret_cast<uintptr_t>(ptr))); + }; + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +TEST(MremapTest, MustMove_Copy) { + Mapping const src = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED)); + Mapping const dst = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE)); + + // Remainder of this test executes in a subprocess to ensure that if mremap + // incorrectly removes src, it is not remapped by another thread. + const auto rest = [&] { + void* ptr = mremap(src.ptr(), 0, kPageSize, MREMAP_MAYMOVE | MREMAP_FIXED, + dst.ptr()); + MaybeSave(); + TEST_PCHECK_MSG(ptr != MAP_FAILED, "mremap failed"); + TEST_CHECK(ptr == dst.ptr()); + TEST_CHECK(IsMapped(src.addr())); + TEST_CHECK(IsMapped(dst.addr())); + }; + EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0)); +} + +void ExpectAllBytesAre(absl::string_view v, char c) { + for (size_t i = 0; i < v.size(); i++) { + ASSERT_EQ(v[i], c) << "at offset " << i; + } +} + +TEST(MremapTest, ExpansionPreservesCOWPagesAndExposesNewFilePages) { + // Create a file with 3 pages. The first is filled with 'a', the second is + // filled with 'b', and the third is filled with 'c'. + TempPath const file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'a').c_str(), kPageSize), + SyscallSucceedsWithValue(kPageSize)); + ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'b').c_str(), kPageSize), + SyscallSucceedsWithValue(kPageSize)); + ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'c').c_str(), kPageSize), + SyscallSucceedsWithValue(kPageSize)); + + // Create a private mapping of the first 2 pages, and fill the second page + // with 'd'. + Mapping const src = ASSERT_NO_ERRNO_AND_VALUE(Mmap(nullptr, 2 * kPageSize, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, fd.get(), 0)); + memset(reinterpret_cast<void*>(src.addr() + kPageSize), 'd', kPageSize); + MaybeSave(); + + // Move the mapping while expanding it to 3 pages. The resulting mapping + // should contain the original first page of the file (filled with 'a'), + // followed by the private copy of the second page (filled with 'd'), followed + // by the newly-mapped third page of the file (filled with 'c'). + Mapping const dst = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(3 * kPageSize, PROT_NONE, MAP_PRIVATE)); + ASSERT_THAT(Mremap(src.ptr(), 2 * kPageSize, 3 * kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()), + IsPosixErrorOkAndHolds(dst.ptr())); + auto const v = dst.view(); + ExpectAllBytesAre(v.substr(0, kPageSize), 'a'); + ExpectAllBytesAre(v.substr(kPageSize, kPageSize), 'd'); + ExpectAllBytesAre(v.substr(2 * kPageSize, kPageSize), 'c'); +} + +TEST(MremapDeathTest, SharedAnon) { + SetupGvisorDeathTest(); + + // Reserve 4 pages of address space. + Mapping const reserved = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(4 * kPageSize, PROT_NONE, MAP_PRIVATE)); + + // Create a 2-page shared anonymous mapping at the beginning of the + // reservation. Fill the first page with 'a' and the second with 'b'. + Mapping const m = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(reserved.ptr(), 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0)); + memset(m.ptr(), 'a', kPageSize); + memset(reinterpret_cast<void*>(m.addr() + kPageSize), 'b', kPageSize); + MaybeSave(); + + // Shrink the mapping to 1 page in-place. + ASSERT_THAT(Mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, m.ptr()), + IsPosixErrorOkAndHolds(m.ptr())); + + // Expand the mapping to 3 pages, moving it forward by 1 page in the process + // since the old and new mappings can't overlap. + void* const new_m = reinterpret_cast<void*>(m.addr() + kPageSize); + ASSERT_THAT(Mremap(m.ptr(), kPageSize, 3 * kPageSize, + MREMAP_MAYMOVE | MREMAP_FIXED, new_m), + IsPosixErrorOkAndHolds(new_m)); + + // The first 2 pages of the mapping should still contain the data we wrote + // (i.e. shrinking should not have discarded the second page's data), while + // touching the third page should raise SIGBUS. + auto const v = + absl::string_view(static_cast<char const*>(new_m), 3 * kPageSize); + ExpectAllBytesAre(v.substr(0, kPageSize), 'a'); + ExpectAllBytesAre(v.substr(kPageSize, kPageSize), 'b'); + EXPECT_EXIT(ExpectAllBytesAre(v.substr(2 * kPageSize, kPageSize), '\0'), + ::testing::KilledBySignal(SIGBUS), ""); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/msync.cc b/test/syscalls/linux/msync.cc new file mode 100644 index 000000000..0ddc621aa --- /dev/null +++ b/test/syscalls/linux/msync.cc @@ -0,0 +1,145 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mman.h> +#include <unistd.h> + +#include <functional> +#include <string> +#include <utility> +#include <vector> + +#include "test/util/file_descriptor.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Parameters for msync tests. Use a std::tuple so we can use +// ::testing::Combine. +using MsyncTestParam = + std::tuple<int, // msync flags + std::function<PosixErrorOr<Mapping>()> // returns mapping to + // msync + >; + +class MsyncParameterizedTest : public ::testing::TestWithParam<MsyncTestParam> { + protected: + int msync_flags() const { return std::get<0>(GetParam()); } + + PosixErrorOr<Mapping> GetMapping() const { + auto rv = std::get<1>(GetParam())(); + return rv; + } +}; + +// All valid msync(2) flag combinations (not including MS_INVALIDATE, which +// gVisor doesn't implement). +constexpr std::initializer_list<int> kMsyncFlags = {MS_SYNC, MS_ASYNC, 0}; + +// Returns functions that return mappings that should be successfully +// msync()able. +std::vector<std::function<PosixErrorOr<Mapping>()>> SyncableMappings() { + std::vector<std::function<PosixErrorOr<Mapping>()>> funcs; + for (bool const writable : {false, true}) { + for (int const mflags : {MAP_PRIVATE, MAP_SHARED}) { + int const prot = PROT_READ | (writable ? PROT_WRITE : 0); + int const oflags = O_CREAT | (writable ? O_RDWR : O_RDONLY); + funcs.push_back([=] { + return MmapAnon(kPageSize, prot, mflags); + }); + funcs.push_back([=]() -> PosixErrorOr<Mapping> { + std::string const path = NewTempAbsPath(); + ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, oflags, 0644)); + // Don't unlink the file since that breaks save/restore. Just let the + // test infrastructure clean up all of our temporary files when we're + // done. + return Mmap(nullptr, kPageSize, prot, mflags, fd.get(), 0); + }); + } + } + return funcs; +} + +PosixErrorOr<Mapping> NoMappings() { + return PosixError(EINVAL, "unexpected attempt to create a mapping"); +} + +// "Fixture" for msync tests that hold for all valid flags, but do not create +// mappings. +using MsyncNoMappingTest = MsyncParameterizedTest; + +TEST_P(MsyncNoMappingTest, UnmappedAddressWithZeroLengthSucceeds) { + EXPECT_THAT(msync(nullptr, 0, msync_flags()), SyscallSucceeds()); +} + +TEST_P(MsyncNoMappingTest, UnmappedAddressWithNonzeroLengthFails) { + EXPECT_THAT(msync(nullptr, kPageSize, msync_flags()), + SyscallFailsWithErrno(ENOMEM)); +} + +INSTANTIATE_TEST_CASE_P(All, MsyncNoMappingTest, + ::testing::Combine(::testing::ValuesIn(kMsyncFlags), + ::testing::Values(NoMappings))); + +// "Fixture" for msync tests that are not parameterized by msync flags, but do +// create mappings. +using MsyncNoFlagsTest = MsyncParameterizedTest; + +TEST_P(MsyncNoFlagsTest, BothSyncAndAsyncFails) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT(msync(m.ptr(), m.len(), MS_SYNC | MS_ASYNC), + SyscallFailsWithErrno(EINVAL)); +} + +INSTANTIATE_TEST_CASE_P( + All, MsyncNoFlagsTest, + ::testing::Combine(::testing::Values(0), // ignored + ::testing::ValuesIn(SyncableMappings()))); + +// "Fixture" for msync tests parameterized by both msync flags and sources of +// mappings. +using MsyncFullParamTest = MsyncParameterizedTest; + +TEST_P(MsyncFullParamTest, NormallySucceeds) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT(msync(m.ptr(), m.len(), msync_flags()), SyscallSucceeds()); +} + +TEST_P(MsyncFullParamTest, UnalignedLengthSucceeds) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT(msync(m.ptr(), m.len() - 1, msync_flags()), SyscallSucceeds()); +} + +TEST_P(MsyncFullParamTest, UnalignedAddressFails) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT( + msync(reinterpret_cast<void*>(m.addr() + 1), m.len() - 1, msync_flags()), + SyscallFailsWithErrno(EINVAL)); +} + +INSTANTIATE_TEST_CASE_P( + All, MsyncFullParamTest, + ::testing::Combine(::testing::ValuesIn(kMsyncFlags), + ::testing::ValuesIn(SyncableMappings()))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/munmap.cc b/test/syscalls/linux/munmap.cc new file mode 100644 index 000000000..e20039950 --- /dev/null +++ b/test/syscalls/linux/munmap.cc @@ -0,0 +1,53 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/mman.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class MunmapTest : public ::testing::Test { + protected: + void SetUp() override { + m_ = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, m_); + } + + void* m_ = nullptr; +}; + +TEST_F(MunmapTest, HappyCase) { + EXPECT_THAT(munmap(m_, kPageSize), SyscallSucceeds()); +} + +TEST_F(MunmapTest, ZeroLength) { + EXPECT_THAT(munmap(m_, 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(MunmapTest, LastPageRoundUp) { + // Attempt to unmap up to and including the last page. + EXPECT_THAT(munmap(m_, static_cast<size_t>(-kPageSize + 1)), + SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc new file mode 100644 index 000000000..5770680cd --- /dev/null +++ b/test/syscalls/linux/open.cc @@ -0,0 +1,340 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <linux/capability.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// This test is currently very rudimentary. +// +// There are plenty of extra cases to cover once the sentry supports them. +// +// Different types of opens: +// * O_CREAT +// * O_DIRECTORY +// * O_NOFOLLOW +// * O_PATH <- Will we ever support this? +// +// Special operations on open: +// * O_EXCL +// +// Special files: +// * Blocking behavior for a named pipe. +// +// Different errors: +// * EACCES +// * EEXIST +// * ENAMETOOLONG +// * ELOOP +// * ENOTDIR +// * EPERM +class OpenTest : public FileTest { + void SetUp() override { + FileTest::SetUp(); + + ASSERT_THAT( + write(test_file_fd_.get(), test_data_.c_str(), test_data_.length()), + SyscallSucceedsWithValue(test_data_.length())); + EXPECT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), SyscallSucceeds()); + } + + public: + const std::string test_data_ = "hello world\n"; +}; + +TEST_F(OpenTest, ReadOnly) { + char buf; + const FileDescriptor ro_file = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + EXPECT_THAT(read(ro_file.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_THAT(lseek(ro_file.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(write(ro_file.get(), &buf, 1), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(OpenTest, WriteOnly) { + char buf; + const FileDescriptor wo_file = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY)); + + EXPECT_THAT(read(wo_file.get(), &buf, 1), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(lseek(wo_file.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(write(wo_file.get(), &buf, 1), SyscallSucceedsWithValue(1)); +} + +TEST_F(OpenTest, ReadWrite) { + char buf; + const FileDescriptor rw_file = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + EXPECT_THAT(read(rw_file.get(), &buf, 1), SyscallSucceedsWithValue(1)); + EXPECT_THAT(lseek(rw_file.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(write(rw_file.get(), &buf, 1), SyscallSucceedsWithValue(1)); +} + +TEST_F(OpenTest, RelPath) { + auto name = std::string(Basename(test_file_name_)); + + ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name, O_RDONLY)); +} + +TEST_F(OpenTest, AbsPath) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); +} + +TEST_F(OpenTest, AtRelPath) { + auto name = std::string(Basename(test_file_name_)); + const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE( + Open(GetAbsoluteTestTmpdir(), O_RDONLY | O_DIRECTORY)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(dirfd.get(), name, O_RDONLY)); +} + +TEST_F(OpenTest, AtAbsPath) { + const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE( + Open(GetAbsoluteTestTmpdir(), O_RDONLY | O_DIRECTORY)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(dirfd.get(), test_file_name_, O_RDONLY)); +} + +TEST_F(OpenTest, OpenNoFollowSymlink) { + const std::string link_path = JoinPath(GetAbsoluteTestTmpdir(), "link"); + ASSERT_THAT(symlink(test_file_name_.c_str(), link_path.c_str()), + SyscallSucceeds()); + auto cleanup = Cleanup([link_path]() { + EXPECT_THAT(unlink(link_path.c_str()), SyscallSucceeds()); + }); + + // Open will succeed without O_NOFOLLOW and fails with O_NOFOLLOW. + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(link_path, O_RDONLY)); + ASSERT_THAT(open(link_path.c_str(), O_RDONLY | O_NOFOLLOW), + SyscallFailsWithErrno(ELOOP)); +} + +TEST_F(OpenTest, OpenNoFollowStillFollowsLinksInPath) { + // We will create the following structure: + // tmp_folder/real_folder/file + // tmp_folder/sym_folder -> tmp_folder/real_folder + // + // We will then open tmp_folder/sym_folder/file with O_NOFOLLOW and it + // should succeed as O_NOFOLLOW only applies to the final path component. + auto tmp_path = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(GetAbsoluteTestTmpdir())); + auto sym_path = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), tmp_path.path())); + auto file_path = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(tmp_path.path())); + + auto path_via_symlink = JoinPath(sym_path.path(), Basename(file_path.path())); + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(path_via_symlink, O_RDONLY | O_NOFOLLOW)); +} + +TEST_F(OpenTest, Fault) { + char* totally_not_null = nullptr; + ASSERT_THAT(open(totally_not_null, O_RDONLY), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(OpenTest, AppendOnly) { + // First write some data to the fresh file. + const int64_t kBufSize = 1024; + std::vector<char> buf(kBufSize, 'a'); + + FileDescriptor fd0 = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); + + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(WriteFd(fd0.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + fd0.reset(); // Close the file early. + + // Next get two handles to the same file. We open two files because we want + // to make sure that appending is respected between them. + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND)); + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND)); + EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + // Then try to write to the first file and make sure the bytes are appended. + EXPECT_THAT(WriteFd(fd1.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Check that the size of the file is correct and that the offset has been + // incremented to that size. + struct stat s0; + EXPECT_THAT(fstat(fd1.get(), &s0), SyscallSucceeds()); + EXPECT_EQ(s0.st_size, kBufSize * 2); + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(kBufSize * 2)); + + // Then try to write to the second file and make sure the bytes are appended. + EXPECT_THAT(WriteFd(fd2.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Check that the size of the file is correct and that the offset has been + // incremented to that size. + struct stat s1; + EXPECT_THAT(fstat(fd2.get(), &s1), SyscallSucceeds()); + EXPECT_EQ(s1.st_size, kBufSize * 3); + EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(kBufSize * 3)); +} + +TEST_F(OpenTest, Truncate) { + { + // First write some data to the new file and close it. + FileDescriptor fd0 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY)); + std::vector<char> orig(10, 'a'); + EXPECT_THAT(WriteFd(fd0.get(), orig.data(), orig.size()), + SyscallSucceedsWithValue(orig.size())); + } + + // Then open with truncate and verify that offset is set to 0. + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_TRUNC)); + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + + // Then write less data to the file and ensure the old content is gone. + std::vector<char> want(5, 'b'); + EXPECT_THAT(WriteFd(fd1.get(), want.data(), want.size()), + SyscallSucceedsWithValue(want.size())); + + struct stat stat; + EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds()); + EXPECT_EQ(stat.st_size, want.size()); + EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(want.size())); + + // Read the data and ensure only the latest write is in the file. + std::vector<char> got(want.size() + 1, 'c'); + ASSERT_THAT(pread(fd1.get(), got.data(), got.size(), 0), + SyscallSucceedsWithValue(want.size())); + EXPECT_EQ(memcmp(want.data(), got.data(), want.size()), 0) + << "rbuf=" << got.data(); + EXPECT_EQ(got.back(), 'c'); // Last byte should not have been modified. +} + +TEST_F(OpenTest, NameTooLong) { + char buf[4097] = {}; + memset(buf, 'a', 4097); + EXPECT_THAT(open(buf, O_RDONLY), SyscallFailsWithErrno(ENAMETOOLONG)); +} + +TEST_F(OpenTest, DotsFromRoot) { + const FileDescriptor rootfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/", O_RDONLY | O_DIRECTORY)); + const FileDescriptor other_rootfd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAt(rootfd.get(), "..", O_RDONLY)); +} + +TEST_F(OpenTest, DirectoryWritableFails) { + ASSERT_THAT(open(GetAbsoluteTestTmpdir().c_str(), O_RDWR), + SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(OpenTest, FileNotDirectory) { + // Create a file and try to open it with O_DIRECTORY. + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_THAT(open(file.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallFailsWithErrno(ENOTDIR)); +} + +TEST_F(OpenTest, Null) { + char c = '\0'; + ASSERT_THAT(open(&c, O_RDONLY), SyscallFailsWithErrno(ENOENT)); +} + +// NOTE: While the man pages specify that this behavior should be +// undefined, Linux truncates the file on opening read only if we have write +// permission, so we will too. +TEST_F(OpenTest, CanTruncateReadOnly) { + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY | O_TRUNC)); + + struct stat stat; + EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds()); + EXPECT_EQ(stat.st_size, 0); +} + +// If we don't have read permission on the file, opening with +// O_TRUNC should fail. +TEST_F(OpenTest, CanTruncateReadOnlyNoWritePermission) { + // Drop capabilities that allow us to override file permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + const DisableSave ds; // Permissions are dropped. + ASSERT_THAT(chmod(test_file_name_.c_str(), S_IRUSR | S_IRGRP), + SyscallSucceeds()); + + ASSERT_THAT(open(test_file_name_.c_str(), O_RDONLY | O_TRUNC), + SyscallFailsWithErrno(EACCES)); + + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + struct stat stat; + EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds()); + EXPECT_EQ(stat.st_size, test_data_.size()); +} + +// If we don't have read permission but have write permission, opening O_WRONLY +// and O_TRUNC should succeed. +TEST_F(OpenTest, CanTruncateWriteOnlyNoReadPermission) { + const DisableSave ds; // Permissions are dropped. + + EXPECT_THAT(fchmod(test_file_fd_.get(), S_IWUSR | S_IWGRP), + SyscallSucceeds()); + + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY | O_TRUNC)); + + EXPECT_THAT(fchmod(test_file_fd_.get(), S_IRUSR | S_IRGRP), + SyscallSucceeds()); + + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + struct stat stat; + EXPECT_THAT(fstat(fd2.get(), &stat), SyscallSucceeds()); + EXPECT_EQ(stat.st_size, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc new file mode 100644 index 000000000..b2cbd63d1 --- /dev/null +++ b/test/syscalls/linux/open_create.cc @@ -0,0 +1,130 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/temp_umask.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { +TEST(CreateTest, TmpFile) { + int fd; + EXPECT_THAT(fd = open(JoinPath(GetAbsoluteTestTmpdir(), "a").c_str(), + O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(CreateTest, ExistingFile) { + int fd; + EXPECT_THAT( + fd = open(JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile").c_str(), + O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT( + fd = open(JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile").c_str(), + O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(CreateTest, CreateAtFile) { + int dirfd; + EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0666), + SyscallSucceeds()); + EXPECT_THAT(openat(dirfd, "CreateAtFile", O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(CreateTest, HonorsUmask_NoRandomSave) { + const DisableSave ds; // file cannot be re-opened as writable. + TempUmask mask(0222); + int fd; + ASSERT_THAT( + fd = open(JoinPath(GetAbsoluteTestTmpdir(), "UmaskedFile").c_str(), + O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + struct stat statbuf; + ASSERT_THAT(fstat(fd, &statbuf), SyscallSucceeds()); + EXPECT_EQ(0444, statbuf.st_mode & 0777); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(CreateTest, CreateExclusively) { + std::string filename = NewTempAbsPath(); + + int fd; + ASSERT_THAT(fd = open(filename.c_str(), O_CREAT | O_RDWR, 0644), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(open(filename.c_str(), O_CREAT | O_EXCL | O_RDWR, 0644), + SyscallFailsWithErrno(EEXIST)); +} + +TEST(CreateTest, CreateFailsOnUnpermittedDir) { + // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to + // always override directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_THAT(open("/foo", O_CREAT | O_RDWR, 0644), + SyscallFailsWithErrno(EACCES)); +} + +TEST(CreateTest, CreateFailsOnDirWithoutWritePerms) { + // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to + // always override directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + auto parent = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555)); + auto file = JoinPath(parent.path(), "foo"); + ASSERT_THAT(open(file.c_str(), O_CREAT | O_RDWR, 0644), + SyscallFailsWithErrno(EACCES)); +} + +// A file originally created RW, but opened RO can later be opened RW. +TEST(CreateTest, OpenCreateROThenRW) { + TempPath file(NewTempAbsPath()); + + // Create a RW file, but only open it RO. + FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE( + Open(file.path(), O_CREAT | O_EXCL | O_RDONLY, 0644)); + + // Now get a RW FD. + FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + // fd1 is not writable, but fd2 is. + char c = 'a'; + EXPECT_THAT(WriteFd(fd1.get(), &c, 1), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(WriteFd(fd2.get(), &c, 1), SyscallSucceedsWithValue(1)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/partial_bad_buffer.cc b/test/syscalls/linux/partial_bad_buffer.cc new file mode 100644 index 000000000..073a6b8c1 --- /dev/null +++ b/test/syscalls/linux/partial_bad_buffer.cc @@ -0,0 +1,305 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/uio.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::Gt; + +namespace gvisor { +namespace testing { + +namespace { + +constexpr char kMessage[] = "hello world"; + +// PartialBadBufferTest checks the result of various IO syscalls when passed a +// buffer that does not have the space specified in the syscall (most of it is +// PROT_NONE). Linux is annoyingly inconsistent among different syscalls, so we +// test all of them. +class PartialBadBufferTest : public ::testing::Test { + protected: + void SetUp() override { + // Create and open a directory for getdents cases. + directory_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT( + directory_fd_ = open(directory_.path().c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + + // Create and open a normal file, placing it in the directory + // so the getdents cases have some dirents. + name_ = JoinPath(directory_.path(), "a"); + ASSERT_THAT(fd_ = open(name_.c_str(), O_RDWR | O_CREAT, 0644), + SyscallSucceeds()); + + // Write some initial data. + size_t size = sizeof(kMessage) - 1; + EXPECT_THAT(WriteFd(fd_, &kMessage, size), SyscallSucceedsWithValue(size)); + + ASSERT_THAT(lseek(fd_, 0, SEEK_SET), SyscallSucceeds()); + + addr_ = mmap(0, 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(addr_, MAP_FAILED); + char* buf = reinterpret_cast<char*>(addr_); + + // Guard page for our read to run into. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, + PROT_NONE), + SyscallSucceeds()); + + // Leave only one free byte in the buffer. + bad_buffer_ = buf + kPageSize - 1; + } + + void TearDown() override { + EXPECT_THAT(munmap(addr_, 2 * kPageSize), SyscallSucceeds()) << addr_; + EXPECT_THAT(close(fd_), SyscallSucceeds()); + EXPECT_THAT(unlink(name_.c_str()), SyscallSucceeds()); + EXPECT_THAT(close(directory_fd_), SyscallSucceeds()); + } + + // Return buffer with n bytes of free space. + // N.B. this is the same buffer used to back bad_buffer_. + char* FreeBytes(size_t n) { + TEST_CHECK(n <= static_cast<size_t>(4096)); + return reinterpret_cast<char*>(addr_) + kPageSize - n; + } + + std::string name_; + int fd_; + TempPath directory_; + int directory_fd_; + void* addr_; + char* bad_buffer_; +}; + +// We do both "big" and "small" tests to try to hit the "zero copy" and +// non-"zero copy" paths, which have different code paths for handling faults. + +TEST_F(PartialBadBufferTest, ReadBig) { + EXPECT_THAT(RetryEINTR(read)(fd_, bad_buffer_, kPageSize), + SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, ReadSmall) { + EXPECT_THAT(RetryEINTR(read)(fd_, bad_buffer_, 10), + SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, PreadBig) { + EXPECT_THAT(RetryEINTR(pread)(fd_, bad_buffer_, kPageSize, 0), + SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, PreadSmall) { + EXPECT_THAT(RetryEINTR(pread)(fd_, bad_buffer_, 10, 0), + SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, ReadvBig) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = kPageSize; + + EXPECT_THAT(RetryEINTR(readv)(fd_, &vec, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, ReadvSmall) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = 10; + + EXPECT_THAT(RetryEINTR(readv)(fd_, &vec, 1), SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, PreadvBig) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = kPageSize; + + EXPECT_THAT(RetryEINTR(preadv)(fd_, &vec, 1, 0), SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, PreadvSmall) { + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = 10; + + EXPECT_THAT(RetryEINTR(preadv)(fd_, &vec, 1, 0), SyscallSucceedsWithValue(1)); + EXPECT_EQ('h', bad_buffer_[0]); +} + +TEST_F(PartialBadBufferTest, WriteBig) { + // FIXME: The sentry write syscalls will return immediately + // if Access returns an error, but Access may not return an error + // and the sentry will instead perform a partial write. + SKIP_IF(IsRunningOnGvisor()); + + EXPECT_THAT(RetryEINTR(write)(fd_, bad_buffer_, kPageSize), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(PartialBadBufferTest, WriteSmall) { + // FIXME: The sentry write syscalls will return immediately + // if Access returns an error, but Access may not return an error + // and the sentry will instead perform a partial write. + SKIP_IF(IsRunningOnGvisor()); + + EXPECT_THAT(RetryEINTR(write)(fd_, bad_buffer_, 10), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(PartialBadBufferTest, PwriteBig) { + // FIXME: The sentry write syscalls will return immediately + // if Access returns an error, but Access may not return an error + // and the sentry will instead perform a partial write. + SKIP_IF(IsRunningOnGvisor()); + + EXPECT_THAT(RetryEINTR(pwrite)(fd_, bad_buffer_, kPageSize, 0), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(PartialBadBufferTest, PwriteSmall) { + // FIXME: The sentry write syscalls will return immediately + // if Access returns an error, but Access may not return an error + // and the sentry will instead perform a partial write. + SKIP_IF(IsRunningOnGvisor()); + + EXPECT_THAT(RetryEINTR(pwrite)(fd_, bad_buffer_, 10, 0), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(PartialBadBufferTest, WritevBig) { + // FIXME: The sentry write syscalls will return immediately + // if Access returns an error, but Access may not return an error + // and the sentry will instead perform a partial write. + SKIP_IF(IsRunningOnGvisor()); + + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = kPageSize; + + EXPECT_THAT(RetryEINTR(writev)(fd_, &vec, 1), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(PartialBadBufferTest, WritevSmall) { + // FIXME: The sentry write syscalls will return immediately + // if Access returns an error, but Access may not return an error + // and the sentry will instead perform a partial write. + SKIP_IF(IsRunningOnGvisor()); + + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = 10; + + EXPECT_THAT(RetryEINTR(writev)(fd_, &vec, 1), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(PartialBadBufferTest, PwritevBig) { + // FIXME: The sentry write syscalls will return immediately + // if Access returns an error, but Access may not return an error + // and the sentry will instead perform a partial write. + SKIP_IF(IsRunningOnGvisor()); + + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = kPageSize; + + EXPECT_THAT(RetryEINTR(pwritev)(fd_, &vec, 1, 0), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(PartialBadBufferTest, PwritevSmall) { + // FIXME: The sentry write syscalls will return immediately + // if Access returns an error, but Access may not return an error + // and the sentry will instead perform a partial write. + SKIP_IF(IsRunningOnGvisor()); + + struct iovec vec; + vec.iov_base = bad_buffer_; + vec.iov_len = 10; + + EXPECT_THAT(RetryEINTR(pwritev)(fd_, &vec, 1, 0), + SyscallFailsWithErrno(EFAULT)); +} + +// getdents returns EFAULT when the you claim the buffer is large enough, but +// it actually isn't. +TEST_F(PartialBadBufferTest, GetdentsBig) { + EXPECT_THAT(RetryEINTR(syscall)(SYS_getdents64, directory_fd_, bad_buffer_, + kPageSize), + SyscallFailsWithErrno(EFAULT)); +} + +// getdents returns EINVAL when the you claim the buffer is too small. +TEST_F(PartialBadBufferTest, GetdentsSmall) { + EXPECT_THAT( + RetryEINTR(syscall)(SYS_getdents64, directory_fd_, bad_buffer_, 10), + SyscallFailsWithErrno(EINVAL)); +} + +// getdents will write entries into a buffer if there is space before it faults. +TEST_F(PartialBadBufferTest, GetdentsOneEntry) { + // 30 bytes is enough for one (small) entry. + char* buf = FreeBytes(30); + + EXPECT_THAT( + RetryEINTR(syscall)(SYS_getdents64, directory_fd_, buf, kPageSize), + SyscallSucceedsWithValue(Gt(0))); +} + +// Verify that when write returns EFAULT the kernel hasn't silently written +// the initial valid bytes. +TEST_F(PartialBadBufferTest, WriteEfaultIsntPartial) { + // FIXME: The sentry write syscalls will return immediately + // if Access returns an error, but Access may not return an error + // and the sentry will instead perform a partial write. + SKIP_IF(IsRunningOnGvisor()); + + bad_buffer_[0] = 'A'; + EXPECT_THAT(RetryEINTR(write)(fd_, bad_buffer_, 10), + SyscallFailsWithErrno(EFAULT)); + + size_t size = 255; + char buf[255]; + memset(buf, 0, size); + + EXPECT_THAT(RetryEINTR(pread)(fd_, buf, size, 0), + SyscallSucceedsWithValue(sizeof(kMessage) - 1)); + + // 'A' has not been written. + EXPECT_STREQ(buf, kMessage); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pause.cc b/test/syscalls/linux/pause.cc new file mode 100644 index 000000000..4e1148c24 --- /dev/null +++ b/test/syscalls/linux/pause.cc @@ -0,0 +1,88 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <signal.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <atomic> + +#include "gtest/gtest.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void NoopSignalHandler(int sig, siginfo_t* info, void* context) {} + +} // namespace + +TEST(PauseTest, OnlyReturnsWhenSignalHandled) { + struct sigaction sa; + sigfillset(&sa.sa_mask); + + // Ensure that SIGUSR1 is ignored. + sa.sa_handler = SIG_IGN; + ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); + + // Register a handler for SIGUSR2. + sa.sa_sigaction = NoopSignalHandler; + sa.sa_flags = SA_SIGINFO; + ASSERT_THAT(sigaction(SIGUSR2, &sa, nullptr), SyscallSucceeds()); + + // The child sets their own tid. + absl::Mutex mu; + pid_t child_tid = 0; + bool child_tid_available = false; + std::atomic<int> sent_signal{0}; + std::atomic<int> waking_signal{0}; + ScopedThread t([&] { + mu.Lock(); + child_tid = gettid(); + child_tid_available = true; + mu.Unlock(); + EXPECT_THAT(pause(), SyscallFailsWithErrno(EINTR)); + waking_signal.store(sent_signal.load()); + }); + mu.Lock(); + mu.Await(absl::Condition(&child_tid_available)); + mu.Unlock(); + + // Wait a bit to let the child enter pause(). + absl::SleepFor(absl::Seconds(3)); + + // The child should not be woken by SIGUSR1. + sent_signal.store(SIGUSR1); + ASSERT_THAT(tgkill(getpid(), child_tid, SIGUSR1), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(3)); + + // The child should be woken by SIGUSR2. + sent_signal.store(SIGUSR2); + ASSERT_THAT(tgkill(getpid(), child_tid, SIGUSR2), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(3)); + + EXPECT_EQ(SIGUSR2, waking_signal.load()); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc new file mode 100644 index 000000000..4731157e8 --- /dev/null +++ b/test/syscalls/linux/pipe.cc @@ -0,0 +1,480 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> /* Obtain O_* constant definitions */ +#include <sys/ioctl.h> +#include <sys/uio.h> +#include <unistd.h> + +#include <vector> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Buffer size of a pipe. +// +// TODO: Get this from F_GETPIPE_SZ. +constexpr int kPipeSize = 65536; + +class PipeTest : public ::testing::Test { + public: + static void SetUpTestCase() { + // Tests intentionally generate SIGPIPE. + TEST_PCHECK(signal(SIGPIPE, SIG_IGN) != SIG_ERR); + } + + static void TearDownTestCase() { + TEST_PCHECK(signal(SIGPIPE, SIG_DFL) != SIG_ERR); + } +}; + +TEST_F(PipeTest, Basic) { + // fds[0] is read end, fds[1] is write end. + int fds[2]; + int i = 0x12345678; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + // Ensure that the inode number is the same for each end. + struct stat rst; + ASSERT_THAT(fstat(fds[0], &rst), SyscallSucceeds()); + struct stat wst; + ASSERT_THAT(fstat(fds[1], &wst), SyscallSucceeds()); + EXPECT_EQ(rst.st_ino, wst.st_ino); + + ASSERT_THAT(write(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EBADF)); + ASSERT_THAT(read(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EBADF)); + + ASSERT_THAT(write(fds[1], &i, sizeof(i)), + SyscallSucceedsWithValue(sizeof(i))); + int j; + ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j))); + EXPECT_EQ(i, j); + + ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(fds[1], F_GETFL), SyscallSucceedsWithValue(O_WRONLY)); + + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(PipeTest, BasicCloExec) { + // fds[0] is read end, fds[1] is write end. + int fds[2]; + int i = 0x12345678; + ASSERT_THAT(pipe2(fds, O_CLOEXEC), SyscallSucceeds()); + + ASSERT_THAT(write(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EBADF)); + ASSERT_THAT(read(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EBADF)); + + ASSERT_THAT(write(fds[1], &i, sizeof(i)), + SyscallSucceedsWithValue(sizeof(i))); + int j; + ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j))); + EXPECT_EQ(i, j); + + ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(fds[1], F_GETFL), SyscallSucceeds()); + + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(PipeTest, BasicNoBlock) { + // fds[0] is read end, fds[1] is write end. + int fds[2]; + int i = 0x12345678; + ASSERT_THAT(pipe2(fds, O_NONBLOCK), SyscallSucceeds()); + + ASSERT_THAT(write(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EBADF)); + ASSERT_THAT(read(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EBADF)); + + ASSERT_THAT(read(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EWOULDBLOCK)); + ASSERT_THAT(write(fds[1], &i, sizeof(i)), + SyscallSucceedsWithValue(sizeof(i))); + int j; + ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j))); + EXPECT_EQ(i, j); + ASSERT_THAT(read(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EWOULDBLOCK)); + + ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceedsWithValue(O_NONBLOCK)); + ASSERT_THAT(fcntl(fds[1], F_GETFL), + SyscallSucceedsWithValue(O_NONBLOCK | O_WRONLY)); + + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(PipeTest, BasicBothOptions) { + // fds[0] is read end, fds[1] is write end. + int fds[2]; + int i = 0x12345678; + ASSERT_THAT(pipe2(fds, O_NONBLOCK | O_CLOEXEC), SyscallSucceeds()); + + ASSERT_THAT(write(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EBADF)); + ASSERT_THAT(read(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EBADF)); + + ASSERT_THAT(read(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EWOULDBLOCK)); + ASSERT_THAT(write(fds[1], &i, sizeof(i)), + SyscallSucceedsWithValue(sizeof(i))); + int j; + ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j))); + EXPECT_EQ(i, j); + ASSERT_THAT(read(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EWOULDBLOCK)); + + ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceedsWithValue(O_NONBLOCK)); + ASSERT_THAT(fcntl(fds[1], F_GETFL), + SyscallSucceedsWithValue(O_NONBLOCK | O_WRONLY)); + + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(PipeTest, BasicBadOptions) { + int fds[2]; + ASSERT_THAT(pipe2(fds, 0xDEAD), SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(PipeTest, Seek) { + // fds[0] is read end, fds[1] is write end. + int fds[2]; + int i = 0x12345678; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + ASSERT_THAT(lseek(fds[0], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[1], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[0], 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[0], 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[1], 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[1], 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + + ASSERT_THAT(lseek(fds[0], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[0], 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[1], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[1], 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + + ASSERT_THAT(write(fds[1], &i, sizeof(i)), + SyscallSucceedsWithValue(sizeof(i))); + int j; + + ASSERT_THAT(lseek(fds[0], 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[0], 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[1], 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[1], 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + + ASSERT_THAT(lseek(fds[0], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[0], 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[1], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + ASSERT_THAT(lseek(fds[1], 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE)); + + ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j))); + EXPECT_EQ(i, j); + + ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(fds[1], F_GETFL), SyscallSucceedsWithValue(O_WRONLY)); + + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(PipeTest, AbsoluteOffsetSyscallsFail) { + // Syscalls for IO at absolute offsets fail because pipes are not seekable. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + std::vector<char> buf(4096); + struct iovec iov; + + EXPECT_THAT(pread(fds[1], buf.data(), buf.size(), 0), + SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(pwrite(fds[0], buf.data(), buf.size(), 0), + SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(preadv(fds[1], &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(pwritev(fds[0], &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); + + EXPECT_THAT(close(fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(PipeTest, WriterSideCloses) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + int rfd = fds[0]; + int i = 123; + ScopedThread t([rfd]() { + int j; + ASSERT_THAT(read(rfd, &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j))); + // This will return when the close() completes. + ASSERT_THAT(read(rfd, &j, sizeof(j)), SyscallSucceeds()); + // This will return straight away. + ASSERT_THAT(read(rfd, &j, sizeof(j)), SyscallSucceeds()); + }); + // Sleep a bit so the thread can block. + absl::SleepFor(absl::Seconds(1.0)); + ASSERT_THAT(write(fds[1], &i, sizeof(i)), + SyscallSucceedsWithValue(sizeof(i))); + // Sleep a bit so the thread can block again. + absl::SleepFor(absl::Seconds(3.0)); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); + t.Join(); + + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); +} + +TEST_F(PipeTest, WriterSideClosesReadDataFirst) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + int i = 123; + ASSERT_THAT(write(fds[1], &i, sizeof(i)), + SyscallSucceedsWithValue(sizeof(i))); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); + int j; + ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j))); + ASSERT_EQ(j, i); + ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceeds()); + + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); +} + +TEST_F(PipeTest, ReaderSideCloses) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + int i = 123; + ASSERT_THAT(write(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EPIPE)); + + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(PipeTest, CloseTwice) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); + ASSERT_THAT(close(fds[0]), SyscallFailsWithErrno(EBADF)); + ASSERT_THAT(close(fds[1]), SyscallFailsWithErrno(EBADF)); + + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[0]), SyscallFailsWithErrno(EBADF)); + ASSERT_THAT(close(fds[1]), SyscallFailsWithErrno(EBADF)); +} + +// Blocking write returns EPIPE when read end is closed if nothing has been +// written. +TEST_F(PipeTest, BlockWriteClosed) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + int wfd = fds[1]; + + absl::Notification notify; + ScopedThread t([wfd, ¬ify]() { + std::vector<char> buf(kPipeSize); + // Exactly fill the pipe buffer. + ASSERT_THAT(WriteFd(wfd, buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + notify.Notify(); + + // Attempt to write one more byte. Blocks. + // N.B. Don't use WriteFd, we don't want a retry. + ASSERT_THAT(write(wfd, buf.data(), 1), SyscallFailsWithErrno(EPIPE)); + }); + + notify.WaitForNotification(); + absl::SleepFor(absl::Seconds(1.0)); + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + + t.Join(); + + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +// Blocking write returns EPIPE when read end is closed even if something has +// been written. +// +// FIXME: Pipe writes blocking early allows S/R to interrupt the +// write(2) call before the buffer is full. Then the next call will will return +// non-zero instead of EPIPE. +TEST_F(PipeTest, BlockPartialWriteClosed_NoRandomSave) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + int wfd = fds[1]; + + ScopedThread t([wfd]() { + std::vector<char> buf(2 * kPipeSize); + // Write more than fits in the buffer. Blocks then returns partial write + // when the other end is closed. The next call returns EPIPE. + if (IsRunningOnGvisor()) { + // FIXME: Pipe writes block early on gVisor, resulting in a + // shorter than expected partial write. + ASSERT_THAT(write(wfd, buf.data(), buf.size()), + SyscallSucceedsWithValue(::testing::Gt(0))); + } else { + ASSERT_THAT(write(wfd, buf.data(), buf.size()), + SyscallSucceedsWithValue(kPipeSize)); + } + ASSERT_THAT(write(wfd, buf.data(), buf.size()), + SyscallFailsWithErrno(EPIPE)); + }); + + // Leave time for write to become blocked. + absl::SleepFor(absl::Seconds(1.0)); + + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + + t.Join(); + + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(PipeTest, ReadFromClosedFd_NoRandomSave) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + int rfd = fds[0]; + absl::Notification notify; + ScopedThread t([rfd, ¬ify]() { + int f; + notify.Notify(); + ASSERT_THAT(read(rfd, &f, sizeof(f)), SyscallSucceedsWithValue(sizeof(f))); + ASSERT_EQ(123, f); + }); + notify.WaitForNotification(); + // Make sure that the thread gets to read(). + absl::SleepFor(absl::Seconds(5.0)); + { + // We cannot save/restore here as the read end of pipe is closed but there + // is ongoing read() above. We will not be able to restart the read() + // successfully in restore run since the read fd is closed. + const DisableSave ds; + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + int i = 123; + ASSERT_THAT(write(fds[1], &i, sizeof(i)), + SyscallSucceedsWithValue(sizeof(i))); + t.Join(); + } + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(PipeTest, FionRead) { + // fds[0] is read end, fds[1] is write end. + int fds[2]; + int data[2] = {0x12345678, 0x9101112}; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + int n = -1; + EXPECT_THAT(ioctl(fds[0], FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + n = -1; + EXPECT_THAT(ioctl(fds[1], FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + EXPECT_THAT(write(fds[1], data, sizeof(data)), + SyscallSucceedsWithValue(sizeof(data))); + + n = -1; + EXPECT_THAT(ioctl(fds[0], FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, sizeof(data)); + n = -1; + EXPECT_THAT(ioctl(fds[1], FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, sizeof(data)); +} + +// Test that opening an empty anonymous pipe RDONLY via /proc/self/fd/N does not +// block waiting for a writer. +TEST_F(PipeTest, OpenViaProcSelfFD) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + FileDescriptor rfd(fds[0]); + FileDescriptor wfd(fds[1]); + + // Close the write end of the pipe. + wfd.release(); + + // Open other side via /proc/self/fd. It should not block. + FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(absl::StrCat("/proc/self/fd/", fds[0]), O_RDONLY)); +} + +// Test that opening and reading from an anonymous pipe (with existing writes) +// RDONLY via /proc/self/fd/N returns the existing data. +TEST_F(PipeTest, OpenViaProcSelfFDWithWrites) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + FileDescriptor rfd(fds[0]); + FileDescriptor wfd(fds[1]); + + // Write to the pipe and then close the write fd. + char data = 'x'; + ASSERT_THAT(write(fds[1], &data, 1), SyscallSucceedsWithValue(1)); + wfd.release(); + + // Open read side via /proc/self/fd, and read from it. + FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(absl::StrCat("/proc/self/fd/", fds[0]), O_RDONLY)); + char got; + ASSERT_THAT(read(proc_self_fd.get(), &got, 1), SyscallSucceedsWithValue(1)); + + // We should get what we sent. + EXPECT_EQ(got, data); +} + +TEST_F(PipeTest, LargeFile) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + FileDescriptor rfd(fds[0]); + FileDescriptor wfd(fds[1]); + + int rflags; + EXPECT_THAT(rflags = fcntl(rfd.get(), F_GETFL), SyscallSucceeds()); + + // The kernel did *not* set O_LARGEFILE. + EXPECT_EQ(rflags, 0); +} + +// Test that accessing /proc/<PID>/fd/<FD> correctly decrements the refcount of +// that file descriptor. +TEST_F(PipeTest, ProcFDReleasesFile) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + FileDescriptor rfd(fds[0]); + FileDescriptor wfd(fds[1]); + + // Stat the pipe FD, which shouldn't alter the refcount of the write end of + // the pipe. + struct stat wst; + ASSERT_THAT(lstat(absl::StrCat("/proc/self/fd/", wfd.get()).c_str(), &wst), + SyscallSucceeds()); + + // Close the write end of the pipe and ensure that read indicates EOF. + wfd.reset(); + char buf; + ASSERT_THAT(read(rfd.get(), &buf, 1), SyscallSucceedsWithValue(0)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/poll.cc b/test/syscalls/linux/poll.cc new file mode 100644 index 000000000..897fd0bec --- /dev/null +++ b/test/syscalls/linux/poll.cc @@ -0,0 +1,279 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <poll.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <algorithm> + +#include "gtest/gtest.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/base_poll_test.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class PollTest : public BasePollTest { + protected: + void SetUp() override { BasePollTest::SetUp(); } + void TearDown() override { BasePollTest::TearDown(); } +}; + +TEST_F(PollTest, InvalidFds) { + // fds is invalid because it's null, but we tell ppoll the length is non-zero. + EXPECT_THAT(poll(nullptr, 1, 1), SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT(poll(nullptr, -1, 1), SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(PollTest, NullFds) { + EXPECT_THAT(poll(nullptr, 0, 10), SyscallSucceeds()); +} + +TEST_F(PollTest, ZeroTimeout) { + EXPECT_THAT(poll(nullptr, 0, 0), SyscallSucceeds()); +} + +// If random S/R interrupts the poll, SIGALRM may be delivered before poll +// restarts, causing the poll to hang forever. +TEST_F(PollTest, NegativeTimeout_NoRandomSave) { + // Negative timeout mean wait forever so set a timer. + SetTimer(absl::Milliseconds(100)); + EXPECT_THAT(poll(nullptr, 0, -1), SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); +} + +TEST_F(PollTest, NonBlockingEventPOLLIN) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Write some data to the pipe. + char s[] = "foo\n"; + ASSERT_THAT(WriteFd(fd1.get(), s, strlen(s) + 1), SyscallSucceeds()); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {fd0.get(), POLLIN, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1)); + + // Should trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); +} + +TEST_F(PollTest, BlockingEventPOLLIN) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Start a blocking poll on the read fd. + absl::Notification notify; + ScopedThread t([&fd0, ¬ify]() { + notify.Notify(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {fd0.get(), POLLIN, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, -1), SyscallSucceedsWithValue(1)); + + // Should trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); + }); + + notify.WaitForNotification(); + absl::SleepFor(absl::Seconds(1.0)); + + // Write some data to the pipe. + char s[] = "foo\n"; + ASSERT_THAT(WriteFd(fd1.get(), s, strlen(s) + 1), SyscallSucceeds()); +} + +TEST_F(PollTest, NonBlockingEventPOLLHUP) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Close the writer fd. + fd1.reset(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {fd0.get(), POLLIN, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1)); + + // Should trigger POLLHUP event. + EXPECT_EQ(poll_fd.revents & POLLHUP, POLLHUP); + + // Should not trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, 0); +} + +TEST_F(PollTest, BlockingEventPOLLHUP) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Start a blocking poll on the read fd. + absl::Notification notify; + ScopedThread t([&fd0, ¬ify]() { + notify.Notify(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {fd0.get(), POLLIN, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, -1), SyscallSucceedsWithValue(1)); + + // Should trigger POLLHUP event. + EXPECT_EQ(poll_fd.revents & POLLHUP, POLLHUP); + + // Should not trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, 0); + }); + + notify.WaitForNotification(); + absl::SleepFor(absl::Seconds(1.0)); + + // Write some data and close the writer fd. + fd1.reset(); +} + +TEST_F(PollTest, NonBlockingEventPOLLERR) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Close the reader fd. + fd0.reset(); + + // Poll on the writer fd with POLLOUT event. + struct pollfd poll_fd = {fd1.get(), POLLOUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1)); + + // Should trigger POLLERR event. + EXPECT_EQ(poll_fd.revents & POLLERR, POLLERR); + + // Should also trigger POLLOUT event. + EXPECT_EQ(poll_fd.revents & POLLOUT, POLLOUT); +} + +// This test will validate that if an FD is already ready on some event, whether +// it's POLLIN or POLLOUT it will not immediately return unless that's actually +// what the caller was interested in. +TEST_F(PollTest, ImmediatelyReturnOnlyOnPollEvents) { + // Create a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Wait for read related event on the write side of the pipe, since a write + // is possible on fds[1] it would mean that POLLOUT would return immediately. + // We should make sure that we're not woken up with that state that we didn't + // specificially request. + constexpr int kTimeoutMs = 100; + struct pollfd poll_fd = {fd1.get(), POLLIN | POLLPRI | POLLRDHUP, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMs), + SyscallSucceedsWithValue(0)); // We should timeout. + EXPECT_EQ(poll_fd.revents, 0); // Nothing should be in returned events. + + // Now let's poll on POLLOUT and we should get back 1 fd as being ready and + // it should contain POLLOUT in the revents. + poll_fd.events = POLLOUT; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMs), + SyscallSucceedsWithValue(1)); // 1 fd should have an event. + EXPECT_EQ(poll_fd.revents, POLLOUT); // POLLOUT should be in revents. +} + +// This test validates that poll(2) while data is available immediately returns. +TEST_F(PollTest, PollLevelTriggered) { + int fds[2] = {}; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, /*protocol=*/0, fds), + SyscallSucceeds()); + + FileDescriptor fd0(fds[0]); + FileDescriptor fd1(fds[1]); + + // Write two bytes to the socket. + const char* kBuf = "aa"; + ASSERT_THAT(RetryEINTR(send)(fd0.get(), kBuf, /*len=*/2, /*flags=*/0), + SyscallSucceedsWithValue(2)); // 2 bytes should be written. + + // Poll(2) should immediately return as there is data available to read. + constexpr int kInfiniteTimeout = -1; + struct pollfd poll_fd = {fd1.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, /*nfds=*/1, kInfiniteTimeout), + SyscallSucceedsWithValue(1)); // 1 fd should be ready to read. + EXPECT_NE(poll_fd.revents & POLLIN, 0); + + // Read a single byte. + char read_byte = 0; + ASSERT_THAT(RetryEINTR(recv)(fd1.get(), &read_byte, /*len=*/1, /*flags=*/0), + SyscallSucceedsWithValue(1)); // 1 byte should be read. + ASSERT_EQ(read_byte, 'a'); // We should have read a single 'a'. + + // Create a separate pollfd for our second poll. + struct pollfd poll_fd_after = {fd1.get(), POLLIN, 0}; + + // Poll(2) should again immediately return since we only read one byte. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd_after, /*nfds=*/1, kInfiniteTimeout), + SyscallSucceedsWithValue(1)); // 1 fd should be ready to read. + EXPECT_NE(poll_fd_after.revents & POLLIN, 0); +} + +TEST_F(PollTest, Nfds) { + // Stash value of RLIMIT_NOFILES. + struct rlimit rlim; + TEST_PCHECK(getrlimit(RLIMIT_NOFILE, &rlim) == 0); + rlim_t max_fds = rlim.rlim_cur; + + // Create the biggest possible pollfd array such that each element is valid. + // + // Each entry in the 'fds' array refers to stdout (fd=1) and polls for + // "writable" events (events=POLLOUT). This essentially guarantees that the + // poll() is a no-op and allows negative testing of the 'nfds' parameter. + std::vector<struct pollfd> fds(max_fds, {.fd = 1, .events = POLLOUT}); + + // Verify that 'nfds' up to RLIMIT_NOFILE are allowed. + EXPECT_THAT(RetryEINTR(poll)(fds.data(), 1, 1), SyscallSucceedsWithValue(1)); + EXPECT_THAT(RetryEINTR(poll)(fds.data(), max_fds / 2, 1), + SyscallSucceedsWithValue(max_fds / 2)); + EXPECT_THAT(RetryEINTR(poll)(fds.data(), max_fds, 1), + SyscallSucceedsWithValue(max_fds)); + + // If 'nfds' exceeds RLIMIT_NOFILE then it must fail with EINVAL. + EXPECT_THAT(poll(fds.data(), max_fds + 1, 1), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ppoll.cc b/test/syscalls/linux/ppoll.cc new file mode 100644 index 000000000..f8c388c00 --- /dev/null +++ b/test/syscalls/linux/ppoll.cc @@ -0,0 +1,155 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <poll.h> +#include <signal.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/base_poll_test.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +// Linux and glibc have a different idea of the sizeof sigset_t. When calling +// the syscall directly, use what the kernel expects. +unsigned kSigsetSize = SIGRTMAX / 8; + +// Linux ppoll(2) differs from the glibc wrapper function in that Linux updates +// the timeout with the amount of time remaining. In order to test this behavior +// we need to use the syscall directly. +int syscallPpoll(struct pollfd* fds, nfds_t nfds, struct timespec* timeout_ts, + const sigset_t* sigmask, unsigned mask_size) { + return syscall(SYS_ppoll, fds, nfds, timeout_ts, sigmask, mask_size); +} + +class PpollTest : public BasePollTest { + protected: + void SetUp() override { BasePollTest::SetUp(); } + void TearDown() override { BasePollTest::TearDown(); } +}; + +TEST_F(PpollTest, InvalidFds) { + // fds is invalid because it's null, but we tell ppoll the length is non-zero. + struct timespec timeout = {}; + sigset_t sigmask; + TEST_PCHECK(sigemptyset(&sigmask) == 0); + EXPECT_THAT(syscallPpoll(nullptr, 1, &timeout, &sigmask, kSigsetSize), + SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT(syscallPpoll(nullptr, -1, &timeout, &sigmask, kSigsetSize), + SyscallFailsWithErrno(EINVAL)); +} + +// See that when fds is null, ppoll behaves like sleep. +TEST_F(PpollTest, NullFds) { + struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10)); + ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +TEST_F(PpollTest, ZeroTimeout) { + struct timespec timeout = {}; + ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +// If random S/R interrupts the ppoll, SIGALRM may be delivered before ppoll +// restarts, causing the ppoll to hang forever. +TEST_F(PpollTest, NoTimeout_NoRandomSave) { + // When there's no timeout, ppoll may never return so set a timer. + SetTimer(absl::Milliseconds(100)); + // See that we get interrupted by the timer. + ASSERT_THAT(syscallPpoll(nullptr, 0, nullptr, nullptr, 0), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); +} + +TEST_F(PpollTest, InvalidTimeoutNegative) { + struct timespec timeout = absl::ToTimespec(absl::Nanoseconds(-1)); + EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(PpollTest, InvalidTimeoutNotNormalized) { + struct timespec timeout = {0, 1000000001}; + EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(PpollTest, InvalidMaskSize) { + struct timespec timeout = {}; + sigset_t sigmask; + TEST_PCHECK(sigemptyset(&sigmask) == 0); + EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, &sigmask, 128), + SyscallFailsWithErrno(EINVAL)); +} + +// Verify that signals blocked by the ppoll mask (that would otherwise be +// allowed) do not interrupt ppoll. +TEST_F(PpollTest, SignalMaskBlocksSignal) { + absl::Duration duration(absl::Seconds(30)); + struct timespec timeout = absl::ToTimespec(duration); + absl::Duration timer_duration(absl::Seconds(10)); + + // Call with a mask that blocks SIGALRM. See that ppoll is not interrupted + // (i.e. returns 0) and that upon completion, the timer has fired. + sigset_t mask; + ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + TEST_PCHECK(sigaddset(&mask, SIGALRM) == 0); + SetTimer(timer_duration); + MaybeSave(); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, &mask, kSigsetSize), + SyscallSucceeds()); + EXPECT_TRUE(TimerFired()); + EXPECT_EQ(absl::DurationFromTimespec(timeout), absl::Duration()); +} + +// Verify that signals allowed by the ppoll mask (that would otherwise be +// blocked) interrupt ppoll. +TEST_F(PpollTest, SignalMaskAllowsSignal) { + absl::Duration duration(absl::Seconds(30)); + struct timespec timeout = absl::ToTimespec(duration); + absl::Duration timer_duration(absl::Seconds(10)); + + sigset_t mask; + ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + + // Block SIGALRM. + auto cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGALRM)); + + // Call with a mask that unblocks SIGALRM. See that ppoll is interrupted. + SetTimer(timer_duration); + MaybeSave(); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, &mask, kSigsetSize), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); + EXPECT_GT(absl::DurationFromTimespec(timeout), absl::Duration()); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/prctl.cc b/test/syscalls/linux/prctl.cc new file mode 100644 index 000000000..44f3df6a3 --- /dev/null +++ b/test/syscalls/linux/prctl.cc @@ -0,0 +1,171 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <string> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +DEFINE_bool(prctl_no_new_privs_test_child, false, + "If true, exit with the return value of prctl(PR_GET_NO_NEW_PRIVS) " + "plus an offset (see test source)."); + +namespace gvisor { +namespace testing { + +namespace { + +TEST(PrctlTest, NameInitialized) { + const size_t name_length = 20; + char name[name_length] = {}; + ASSERT_THAT(prctl(PR_GET_NAME, name), SyscallSucceeds()); + ASSERT_NE(std::string(name), ""); +} + +TEST(PrctlTest, SetNameLongName) { + const size_t name_length = 20; + const std::string long_name(name_length, 'A'); + ASSERT_THAT(prctl(PR_SET_NAME, long_name.c_str()), SyscallSucceeds()); + char truncated_name[name_length] = {}; + ASSERT_THAT(prctl(PR_GET_NAME, truncated_name), SyscallSucceeds()); + const size_t truncated_length = 15; + ASSERT_EQ(long_name.substr(0, truncated_length), std::string(truncated_name)); +} + +// Offset added to exit code from test child to distinguish from other abnormal +// exits. +constexpr int kPrctlNoNewPrivsTestChildExitBase = 100; + +TEST(PrctlTest, NoNewPrivsPreservedAcrossCloneForkAndExecve) { + // Check if no_new_privs is already set. If it is, we can still test that it's + // preserved across clone/fork/execve, but we also expect it to still be set + // at the end of the test. Otherwise, call prctl(PR_SET_NO_NEW_PRIVS) so as + // not to contaminate the original thread. + int no_new_privs; + ASSERT_THAT(no_new_privs = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceeds()); + ScopedThread([] { + ASSERT_THAT(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), SyscallSucceeds()); + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + ScopedThread([] { + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + // Note that these ASSERT_*s failing will only return from this thread, + // but this is the intended behavior. + pid_t child_pid = -1; + int execve_errno = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", + {"/proc/self/exe", "--prctl_no_new_privs_test_child"}, {}, + nullptr, &child_pid, &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), + SyscallSucceeds()); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), kPrctlNoNewPrivsTestChildExitBase + 1); + + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + }); + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + }); + EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0), + SyscallSucceedsWithValue(no_new_privs)); +} + +TEST(PrctlTest, PDeathSig) { + pid_t child_pid; + + // Make the new process' parent a separate thread since the parent death + // signal fires when the parent *thread* exits. + ScopedThread([&] { + child_pid = fork(); + TEST_CHECK(child_pid >= 0); + if (child_pid == 0) { + // In child process. + TEST_CHECK(prctl(PR_SET_PDEATHSIG, SIGKILL) >= 0); + int signo; + TEST_CHECK(prctl(PR_GET_PDEATHSIG, &signo) >= 0); + TEST_CHECK(signo == SIGKILL); + // Enable tracing, then raise SIGSTOP and expect our parent to suppress + // it. + TEST_CHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) >= 0); + raise(SIGSTOP); + // Sleep until killed by our parent death signal. sleep(3) is + // async-signal-safe, absl::SleepFor isn't. + while (true) { + sleep(10); + } + } + // In parent process. + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << "status = " << status; + + // Suppress the SIGSTOP and detach from the child. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + }); + + // The child should have been killed by its parent death SIGKILL. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << "status = " << status; +} + +// This test is to validate that calling prctl with PR_SET_MM without the +// CAP_SYS_RESOURCE returns EPERM. +TEST(PrctlTest, InvalidPrSetMM) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))) { + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_RESOURCE, + false)); // Drop capability to test below. + } + ASSERT_THAT(prctl(PR_SET_MM, 0, 0, 0, 0), SyscallFailsWithErrno(EPERM)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (FLAGS_prctl_no_new_privs_test_child) { + exit(gvisor::testing::kPrctlNoNewPrivsTestChildExitBase + + prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0)); + } + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/prctl_setuid.cc b/test/syscalls/linux/prctl_setuid.cc new file mode 100644 index 000000000..c1b561464 --- /dev/null +++ b/test/syscalls/linux/prctl_setuid.cc @@ -0,0 +1,262 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> +#include <sys/prctl.h> +#include <string> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +DEFINE_int32(scratch_uid, 65534, "scratch UID"); +// This flag is used to verify that after an exec PR_GET_KEEPCAPS +// returns 0, the return code will be offset by kPrGetKeepCapsExitBase. +DEFINE_bool(prctl_pr_get_keepcaps, false, + "If true the test will verify that prctl with pr_get_keepcaps" + "returns 0. The test will exit with the result of that check."); + +// These tests exist seperately from prctl because we need to start +// them as root. Setuid() has the behavior that permissions are fully +// removed if one of the UIDs were 0 before a setuid() call. This +// behavior can be changed by using PR_SET_KEEPCAPS and that is what +// is tested here. +// +// Reference setuid(2): +// The setuid() function checks the effective user ID of +// the caller and if it is the superuser, all process-related user ID's +// are set to uid. After this has occurred, it is impossible for the +// program to regain root privileges. +// +// Thus, a set-user-ID-root program wishing to temporarily drop root +// privileges, assume the identity of an unprivileged user, and then +// regain root privileges afterward cannot use setuid(). You can +// accomplish this with seteuid(2). +namespace gvisor { +namespace testing { + +// Offset added to exit code from test child to distinguish from other abnormal +// exits. +constexpr int kPrGetKeepCapsExitBase = 100; + +namespace { + +class PrctlKeepCapsSetuidTest : public ::testing::Test { + protected: + void SetUp() override { + // PR_GET_KEEPCAPS will only return 0 or 1 (on success). + ASSERT_THAT(original_keepcaps_ = prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceeds()); + ASSERT_TRUE(original_keepcaps_ == 0 || original_keepcaps_ == 1); + } + + void TearDown() override { + // Restore PR_SET_KEEPCAPS. + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, original_keepcaps_, 0, 0, 0), + SyscallSucceeds()); + + // Verify that it was restored. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceedsWithValue(original_keepcaps_)); + } + + // The original keep caps value exposed so tests can use it if they need. + int original_keepcaps_ = 0; +}; + +// This test will verify that a bad value, eg. not 0 or 1 for +// PR_SET_KEEPCAPS will return EINVAL as required by prctl(2). +TEST_F(PrctlKeepCapsSetuidTest, PrctlBadArgsToKeepCaps) { + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 2, 0, 0, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// This test will verify that a setuid(2) without PR_SET_KEEPCAPS will cause +// all capabilities to be dropped. +TEST_F(PrctlKeepCapsSetuidTest, SetUidNoKeepCaps) { + // getuid(2) never fails. + if (getuid() != 0) { + SKIP_IF(!IsRunningOnGvisor()); + FAIL() << "User is not root on gvisor platform."; + } + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting + // this test. Otherwise, the files are created by root (UID before the + // test), but cannot be opened by the `uid` set below after the test. After + // calling setuid(non-zero-UID), there is no way to get root privileges + // back. + ScopedThread([] { + // Start by verifying we have a capability. + TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + + // Verify that PR_GET_KEEPCAPS is disabled. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceedsWithValue(0)); + + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that + // all threads have the same UIDs, so using the setuid wrapper sets all + // threads' real UID. + EXPECT_THAT(syscall(SYS_setuid, FLAGS_scratch_uid), SyscallSucceeds()); + + // Verify that we changed uid. + EXPECT_THAT(getuid(), SyscallSucceedsWithValue(FLAGS_scratch_uid)); + + // Verify we lost the capability in the effective set, this always happens. + TEST_CHECK(!HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + + // We should have also lost it in the permitted set by the setuid() so + // SetCapability should fail when we try to add it back to the effective set + ASSERT_FALSE(SetCapability(CAP_SYS_ADMIN, true).ok()); + }); +} + +// This test will verify that a setuid with PR_SET_KEEPCAPS will cause +// capabilities to be retained after we switch away from the root user. +TEST_F(PrctlKeepCapsSetuidTest, SetUidKeepCaps) { + // getuid(2) never fails. + if (getuid() != 0) { + SKIP_IF(!IsRunningOnGvisor()); + FAIL() << "User is not root on gvisor platform."; + } + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting + // this test. Otherwise, the files are created by root (UID before the + // test), but cannot be opened by the `uid` set below after the test. After + // calling setuid(non-zero-UID), there is no way to get root privileges + // back. + ScopedThread([] { + // Start by verifying we have a capability. + TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + + // Set PR_SET_KEEPCAPS. + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds()); + + // Verify PR_SET_KEEPCAPS was set before we proceed. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceedsWithValue(1)); + + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that + // all threads have the same UIDs, so using the setuid wrapper sets all + // threads' real UID. + EXPECT_THAT(syscall(SYS_setuid, FLAGS_scratch_uid), SyscallSucceeds()); + + // Verify that we changed uid. + EXPECT_THAT(getuid(), SyscallSucceedsWithValue(FLAGS_scratch_uid)); + + // Verify we lost the capability in the effective set, this always happens. + TEST_CHECK(!HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + + // We lost the capability in the effective set, but it will still + // exist in the permitted set so we can elevate the capability. + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, true)); + + // Verify we got back the capability in the effective set. + TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie()); + }); +} + +// This test will verify that PR_SET_KEEPCAPS is not retained +// across an execve. According to prctl(2): +// "The "keep capabilities" value will be reset to 0 on subsequent +// calls to execve(2)." +TEST_F(PrctlKeepCapsSetuidTest, NoKeepCapsAfterExec) { + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds()); + + // Verify PR_SET_KEEPCAPS was set before we proceed. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), SyscallSucceedsWithValue(1)); + + pid_t child_pid = -1; + int execve_errno = 0; + // Do an exec and then verify that PR_GET_KEEPCAPS returns 0 + // see the body of main below. + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + "/proc/self/exe", {"/proc/self/exe", "--prctl_pr_get_keepcaps"}, {}, + nullptr, &child_pid, &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_TRUE(WIFEXITED(status)); + // PR_SET_KEEPCAPS should have been cleared by the exec. + // Success should return gvisor::testing::kPrGetKeepCapsExitBase + 0 + ASSERT_EQ(WEXITSTATUS(status), kPrGetKeepCapsExitBase); +} + +TEST_F(PrctlKeepCapsSetuidTest, NoKeepCapsAfterNewUserNamespace) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + + // Fork to avoid changing the user namespace of the original test process. + pid_t const child_pid = fork(); + + if (child_pid == 0) { + // Verify that the keepcaps flag is set to 0 when we change user namespaces. + TEST_PCHECK(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == 0); + MaybeSave(); + + TEST_PCHECK(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0) == 1); + MaybeSave(); + + TEST_PCHECK(unshare(CLONE_NEWUSER) == 0); + MaybeSave(); + + TEST_PCHECK(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0) == 0); + MaybeSave(); + + _exit(0); + } + + int status; + ASSERT_THAT(child_pid, SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; +} + +// This test will verify that PR_SET_KEEPCAPS and PR_GET_KEEPCAPS work correctly +TEST_F(PrctlKeepCapsSetuidTest, PrGetKeepCaps) { + // Set PR_SET_KEEPCAPS to the negation of the original. + ASSERT_THAT(prctl(PR_SET_KEEPCAPS, !original_keepcaps_, 0, 0, 0), + SyscallSucceeds()); + + // Verify it was set. + ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), + SyscallSucceedsWithValue(!original_keepcaps_)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (FLAGS_prctl_pr_get_keepcaps) { + return gvisor::testing::kPrGetKeepCapsExitBase + + prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0); + } + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/pread64.cc b/test/syscalls/linux/pread64.cc new file mode 100644 index 000000000..4e5bcfcde --- /dev/null +++ b/test/syscalls/linux/pread64.cc @@ -0,0 +1,152 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class Pread64Test : public ::testing::Test { + void SetUp() override { + name_ = NewTempAbsPath(); + ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_CREAT, 0644)); + } + + void TearDown() override { unlink(name_.c_str()); } + + public: + std::string name_; +}; + +TEST(Pread64TestNoTempFile, BadFileDescriptor) { + char buf[1024]; + EXPECT_THAT(pread64(-1, buf, 1024, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(Pread64Test, ZeroBuffer) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR)); + + char msg[] = "hello world"; + EXPECT_THAT(pwrite64(fd.get(), msg, strlen(msg), 0), + SyscallSucceedsWithValue(strlen(msg))); + + char buf[10]; + EXPECT_THAT(pread64(fd.get(), buf, 0, 0), SyscallSucceedsWithValue(0)); +} + +TEST_F(Pread64Test, BadBuffer) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR)); + + char msg[] = "hello world"; + EXPECT_THAT(pwrite64(fd.get(), msg, strlen(msg), 0), + SyscallSucceedsWithValue(strlen(msg))); + + char* bad_buffer = nullptr; + EXPECT_THAT(pread64(fd.get(), bad_buffer, 1024, 0), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(Pread64Test, WriteOnlyNotReadable) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_WRONLY)); + + char buf[1024]; + EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(Pread64Test, DirNotReadable) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY)); + + char buf[1024]; + EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(Pread64Test, BadOffset) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDONLY)); + + char buf[1024]; + EXPECT_THAT(pread64(fd.get(), buf, 1024, -1), SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(Pread64Test, OffsetNotIncremented) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR)); + + char msg[] = "hello world"; + EXPECT_THAT(write(fd.get(), msg, strlen(msg)), + SyscallSucceedsWithValue(strlen(msg))); + int offset; + EXPECT_THAT(offset = lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds()); + + char buf1[1024]; + EXPECT_THAT(pread64(fd.get(), buf1, 1024, 0), + SyscallSucceedsWithValue(strlen(msg))); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(offset)); + + char buf2[1024]; + EXPECT_THAT(pread64(fd.get(), buf2, 1024, 3), + SyscallSucceedsWithValue(strlen(msg) - 3)); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(offset)); +} + +TEST_F(Pread64Test, EndOfFile) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDONLY)); + + char buf[1024]; + EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallSucceedsWithValue(0)); +} + +TEST(Pread64TestNoTempFile, CantReadSocketPair_NoRandomSave) { + int sock_fds[2]; + EXPECT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds), SyscallSucceeds()); + + char buf[1024]; + EXPECT_THAT(pread64(sock_fds[0], buf, 1024, 0), + SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(pread64(sock_fds[1], buf, 1024, 0), + SyscallFailsWithErrno(ESPIPE)); + + EXPECT_THAT(close(sock_fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(sock_fds[1]), SyscallSucceeds()); +} + +TEST(Pread64TestNoTempFile, CantReadPipe) { + char buf[1024]; + + int pipe_fds[2]; + EXPECT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + EXPECT_THAT(pread64(pipe_fds[0], buf, 1024, 0), + SyscallFailsWithErrno(ESPIPE)); + + EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/preadv.cc b/test/syscalls/linux/preadv.cc new file mode 100644 index 000000000..8d3aed43c --- /dev/null +++ b/test/syscalls/linux/preadv.cc @@ -0,0 +1,94 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <atomic> +#include <string> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { +TEST(PreadvTest, MMConcurrencyStress) { + // Fill a one-page file with zeroes (the contents don't really matter). + const auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + /* parent = */ GetAbsoluteTestTmpdir(), + /* content = */ std::string(kPageSize, 0), TempPath::kDefaultFileMode)); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Get a one-page private mapping to read to. + const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + + // Repeatedly fork in a separate thread to force the mapping to become + // copy-on-write. + std::atomic<bool> done(false); + const ScopedThread t([&] { + while (!done.load()) { + const pid_t pid = fork(); + TEST_CHECK(pid >= 0); + if (pid == 0) { + // In child. The parent was obviously multithreaded, so it's neither + // safe nor necessary to do much more than exit. + syscall(SYS_exit_group, 0); + } + int status; + ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; + } + }); + + // Repeatedly read to the mapping. + struct iovec iov[2]; + iov[0].iov_base = m.ptr(); + iov[0].iov_len = kPageSize / 2; + iov[1].iov_base = reinterpret_cast<void*>(m.addr() + kPageSize / 2); + iov[1].iov_len = kPageSize / 2; + constexpr absl::Duration kTestDuration = absl::Seconds(5); + const absl::Time end = absl::Now() + kTestDuration; + while (absl::Now() < end) { + // Among other causes, save/restore cycles may cause interruptions resulting + // in partial reads, so we don't expect any particular return value. + EXPECT_THAT(RetryEINTR(preadv)(fd.get(), iov, 2, 0), SyscallSucceeds()); + } + + // Stop the other thread. + done.store(true); + + // The test passes if it neither deadlocks nor crashes the OS. +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/preadv2.cc b/test/syscalls/linux/preadv2.cc new file mode 100644 index 000000000..642eed624 --- /dev/null +++ b/test/syscalls/linux/preadv2.cc @@ -0,0 +1,217 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <atomic> +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/syscalls/linux/readv_common.h" +#include "test/util/file_descriptor.h" +#include "test/util/memory_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +#ifndef SYS_preadv2 +#if defined(__x86_64__) +#define SYS_preadv2 327 +#else +#error "Unknown architecture" +#endif +#endif // SYS_preadv2 + +#ifndef RWF_HIPRI +#define RWF_HIPRI 0x1 +#endif // RWF_HIPRI + +constexpr int kBufSize = 1024; + +std::string SetContent() { + std::string content; + for (int i = 0; i < kBufSize; i++) { + content += static_cast<char>((i % 10) + '0'); + } + return content; +} + +// This test is the base case where we call preadv (no offset, no flags). +TEST(Preadv2Test, TestBaseCall) { + if (!IsRunningOnGvisor()) { + SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6); + } + std::string content = SetContent(); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), content, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + std::vector<char> buf(kBufSize); + struct iovec iov; + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt*/ 1, + /*offset=*/0, /*flags=*/0), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_EQ(content, std::string(buf.data(), buf.size())); +} + +// This test is where we call preadv with an offset and no flags. +TEST(Preadv2Test, TestValidPositiveOffset) { + if (!IsRunningOnGvisor()) { + SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6); + } + std::string content = SetContent(); + const std::string prefix = "0"; + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), prefix + content, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + std::vector<char> buf(kBufSize, '0'); + struct iovec iov; + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1, + /*offset=*/prefix.size(), /*flags=*/0), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_EQ(content, std::string(buf.data(), buf.size())); +} + +// This test is the base case where we call readv by using -1 as the offset. The +// read should use the file offset, so the test increments it by one prior to +// calling preadv2. +TEST(Preadv2Test, TestNegativeOneOffset) { + if (!IsRunningOnGvisor()) { + SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6); + } + std::string content = SetContent(); + const std::string prefix = "231"; + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), prefix + content, TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + ASSERT_THAT(lseek(fd.get(), prefix.size(), SEEK_SET), + SyscallSucceedsWithValue(prefix.size())); + + std::vector<char> buf(kBufSize, '0'); + struct iovec iov; + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1, + /*offset=*/static_cast<off_t>(-1), /*flags=*/0), + SyscallSucceedsWithValue(kBufSize)); + + EXPECT_EQ(content, std::string(buf.data(), buf.size())); +} + +// This test calls preadv2 with an invalid flag. +TEST(Preadv2Test, TestInvalidFlag) { + if (!IsRunningOnGvisor()) { + SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6); + } + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY | O_DIRECT)); + + struct iovec iov; + + EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1, + /*offset=*/0, /*flags=*/RWF_HIPRI << 1), + SyscallFailsWithErrno(EINVAL)); +} + +// This test calls preadv2 with an invalid offset. +TEST(Preadv2Test, TestInvalidOffset) { + if (!IsRunningOnGvisor()) { + SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6); + } + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY | O_DIRECT)); + struct iovec iov; + + EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1, + /*offset=*/static_cast<off_t>(-8), /*flags=*/RWF_HIPRI), + SyscallFailsWithErrno(EINVAL)); +} + +// This test calls preadv with a file set O_WRONLY. +TEST(Preadv2Test, TestUnreadableFile) { + if (!IsRunningOnGvisor()) { + SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6); + } + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY)); + struct iovec iov; + + EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1, + /*offset=*/0, /*flags=*/0), + SyscallFailsWithErrno(EBADF)); +} + +// Calling preadv2 with a non-negative offset calls preadv. Calling preadv with +// an unseekable file is not allowed. A pipe is used for an unseekable file. +TEST(Preadv2Test, TestUnseekableFile) { + if (!IsRunningOnGvisor()) { + SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6); + } + + int pipe_fds[2]; + + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + + struct iovec iov; + + EXPECT_THAT(syscall(SYS_preadv2, pipe_fds[0], &iov, /*iov_cnt=*/1, + /*offset=*/2, /*flags=*/0), + SyscallFailsWithErrno(ESPIPE)); + + EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/priority.cc b/test/syscalls/linux/priority.cc new file mode 100644 index 000000000..69a58a422 --- /dev/null +++ b/test/syscalls/linux/priority.cc @@ -0,0 +1,215 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_split.h" +#include "test/util/capability_util.h" +#include "test/util/fs_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// These tests are for both the getpriority(2) and setpriority(2) syscalls +// These tests are very rudimentary because getpriority and setpriority +// have not yet been fully implemented. + +// Getpriority does something +TEST(GetpriorityTest, Implemented) { + // "getpriority() can legitimately return the value -1, it is necessary to + // clear the external variable errno prior to the call" + errno = 0; + EXPECT_THAT(getpriority(PRIO_PROCESS, /*who=*/0), SyscallSucceeds()); +} + +// Invalid which +TEST(GetpriorityTest, InvalidWhich) { + errno = 0; + EXPECT_THAT(getpriority(/*which=*/3, /*who=*/0), + SyscallFailsWithErrno(EINVAL)); +} + +// Process is found when which=PRIO_PROCESS +TEST(GetpriorityTest, ValidWho) { + errno = 0; + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), SyscallSucceeds()); +} + +// Process is not found when which=PRIO_PROCESS +TEST(GetpriorityTest, InvalidWho) { + errno = 0; + // Flaky, but it's tough to avoid a race condition when finding an unused pid + EXPECT_THAT(getpriority(PRIO_PROCESS, /*who=*/INT_MAX - 1), + SyscallFailsWithErrno(ESRCH)); +} + +// Setpriority does something +TEST(SetpriorityTest, Implemented) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + // No need to clear errno for setpriority(): + // "The setpriority() call returns 0 if there is no error, or -1 if there is" + EXPECT_THAT(setpriority(PRIO_PROCESS, /*who=*/0, /*nice=*/16), + SyscallSucceeds()); +} + +// Invalid which +TEST(Setpriority, InvalidWhich) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + EXPECT_THAT(setpriority(/*which=*/3, /*who=*/0, /*nice=*/16), + SyscallFailsWithErrno(EINVAL)); +} + +// Process is found when which=PRIO_PROCESS +TEST(SetpriorityTest, ValidWho) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/16), + SyscallSucceeds()); +} + +// niceval is within the range [-20, 19] +TEST(SetpriorityTest, InsideRange) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + // Set 0 < niceval < 19 + int nice = 12; + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), nice), SyscallSucceeds()); + + errno = 0; + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(nice)); + + // Set -20 < niceval < 0 + nice = -12; + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), nice), SyscallSucceeds()); + + errno = 0; + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(nice)); +} + +// Verify that priority/niceness are exposed via /proc/PID/stat. +TEST(SetpriorityTest, NicenessExposedViaProcfs) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + constexpr int kNiceVal = 12; + ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), kNiceVal), SyscallSucceeds()); + + errno = 0; + ASSERT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(kNiceVal)); + + // Now verify we can read that same value via /proc/self/stat. + std::string proc_stat; + ASSERT_NO_ERRNO(GetContents("/proc/self/stat", &proc_stat)); + std::vector<std::string> pieces = absl::StrSplit(proc_stat, ' '); + ASSERT_GT(pieces.size(), 20); + + int niceness_procfs = 0; + ASSERT_TRUE(absl::SimpleAtoi(pieces[18], &niceness_procfs)); + EXPECT_EQ(niceness_procfs, kNiceVal); +} + +// In the kernel's implementation, values outside the range of [-20, 19] are +// truncated to these minimum and maximum values. See +// https://elixir.bootlin.com/linux/v4.4/source/kernel/sys.c#L190 +TEST(SetpriorityTest, OutsideRange) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + // Set niceval > 19 + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/100), + SyscallSucceeds()); + + errno = 0; + // Test niceval truncated to 19 + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(/*maxnice=*/19)); + + // Set niceval < -20 + EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/-100), + SyscallSucceeds()); + + errno = 0; + // Test niceval truncated to -20 + EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), + SyscallSucceedsWithValue(/*minnice=*/-20)); +} + +// Process is not found when which=PRIO_PROCESS +TEST(SetpriorityTest, InvalidWho) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + // Flaky, but it's tough to avoid a race condition when finding an unused pid + EXPECT_THAT(setpriority(PRIO_PROCESS, + /*who=*/INT_MAX - 1, + /*nice=*/16), + SyscallFailsWithErrno(ESRCH)); +} + +// Nice succeeds, correctly modifies (or in this case does not +// modify priority of process +TEST(SetpriorityTest, NiceSucceeds) { + errno = 0; + const int priority_before = getpriority(PRIO_PROCESS, /*who=*/0); + ASSERT_THAT(nice(/*inc=*/0), SyscallSucceeds()); + + // nice(0) should not change priority + EXPECT_EQ(priority_before, getpriority(PRIO_PROCESS, /*who=*/0)); +} + +// Threads resulting from clone() maintain parent's priority +// Changes to child priority do not affect parent's priority +TEST(GetpriorityTest, CloneMaintainsPriority) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE))); + + constexpr int kParentPriority = 16; + constexpr int kChildPriority = 14; + ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), kParentPriority), + SyscallSucceeds()); + + ScopedThread([kParentPriority, kChildPriority]() { + // Check that priority equals that of parent thread + pid_t my_tid; + EXPECT_THAT(my_tid = syscall(__NR_gettid), SyscallSucceeds()); + EXPECT_THAT(getpriority(PRIO_PROCESS, my_tid), + SyscallSucceedsWithValue(kParentPriority)); + + // Change the child thread's priority + EXPECT_THAT(setpriority(PRIO_PROCESS, my_tid, kChildPriority), + SyscallSucceeds()); + }); + + // Check that parent's priority reemained the same even though + // the child's priority was altered + EXPECT_EQ(kParentPriority, getpriority(PRIO_PROCESS, syscall(__NR_gettid))); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/priority_execve.cc b/test/syscalls/linux/priority_execve.cc new file mode 100644 index 000000000..5604bd3d0 --- /dev/null +++ b/test/syscalls/linux/priority_execve.cc @@ -0,0 +1,42 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +int main(int argc, char** argv, char** envp) { + errno = 0; + int prio = getpriority(PRIO_PROCESS, getpid()); + + // NOTE: getpriority() can legitimately return negative values + // in the range [-20, 0). If errno is set, exit with a value that + // could not be reached by a valid priority. Valid exit values + // for the test are in the range [1, 40], so we'll use 0. + if (errno != 0) { + printf("getpriority() failed with errno = %d\n", errno); + exit(0); + } + + // Used by test to verify priority is being maintained through + // calls to execve(). Since prio should always be in the range + // [-20, 19], we offset by 20 so as not to have negative exit codes. + exit(20 - prio); + + return 0; +} diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc new file mode 100644 index 000000000..e64df97b0 --- /dev/null +++ b/test/syscalls/linux/proc.cc @@ -0,0 +1,1830 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <elf.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <signal.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/stat.h> +#include <sys/utsname.h> +#include <syscall.h> +#include <unistd.h> + +#include <algorithm> +#include <atomic> +#include <functional> +#include <map> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +// NOTE: No, this isn't really a syscall but this is a really simple +// way to get it tested on both gVisor, PTrace and Linux. + +using ::testing::AllOf; +using ::testing::ContainerEq; +using ::testing::Contains; +using ::testing::ContainsRegex; +using ::testing::Gt; +using ::testing::HasSubstr; +using ::testing::IsSupersetOf; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; +using ::testing::UnorderedElementsAreArray; + +// Exported by glibc. +extern char** environ; + +namespace gvisor { +namespace testing { +namespace { + +// O_LARGEFILE as defined by Linux. glibc tries to be clever by setting it to 0 +// because "it isn't needed", even though Linux can return it via F_GETFL. +constexpr int kOLargeFile = 00100000; + +// Takes the subprocess command line and pid. +// If it returns !OK, WithSubprocess returns immediately. +using SubprocessCallback = std::function<PosixError(int)>; + +std::vector<std::string> saved_argv; // NOLINT + +// Helper function to dump /proc/{pid}/status and check the +// state data. State should = "Z" for zombied or "RSD" for +// running, interruptible sleeping (S), or uninterruptible sleep +// (D). +void CompareProcessState(absl::string_view state, int pid) { + auto status_file = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", pid, "/status"))); + EXPECT_THAT(status_file, ContainsRegex(absl::StrCat("State:.[", state, + "]\\s+\\(\\w+\\)"))); +} + +// Run callbacks while a subprocess is running, zombied, and/or exited. +PosixError WithSubprocess(SubprocessCallback const& running, + SubprocessCallback const& zombied, + SubprocessCallback const& exited) { + int pipe_fds[2] = {}; + if (pipe(pipe_fds) < 0) { + return PosixError(errno, "pipe"); + } + + int child_pid = fork(); + if (child_pid < 0) { + return PosixError(errno, "fork"); + } + + if (child_pid == 0) { + close(pipe_fds[0]); // Close the read end. + const DisableSave ds; // Timing issues. + + // Write to the pipe to tell it we're ready. + char buf = 'a'; + int res = 0; + res = WriteFd(pipe_fds[1], &buf, sizeof(buf)); + TEST_CHECK_MSG(res == sizeof(buf), "Write failure in subprocess"); + + while (true) { + SleepSafe(absl::Milliseconds(100)); + } + __builtin_unreachable(); + } + + close(pipe_fds[1]); // Close the write end. + + int status = 0; + auto wait_cleanup = Cleanup([child_pid, &status] { + EXPECT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); + }); + auto kill_cleanup = Cleanup([child_pid] { + EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + }); + + // Wait for the child. + char buf = 0; + int res = ReadFd(pipe_fds[0], &buf, sizeof(buf)); + if (res < 0) { + return PosixError(errno, "Read from pipe"); + } else if (res == 0) { + return PosixError(EPIPE, "Unable to read from pipe: EOF"); + } + + if (running) { + // The first arg, RSD, refers to a "running process", or a process with a + // state of Running (R), Interruptable Sleep (S) or Uninterruptable + // Sleep (D). + CompareProcessState("RSD", child_pid); + RETURN_IF_ERRNO(running(child_pid)); + } + + // Kill the process. + kill_cleanup.Release()(); + siginfo_t info; + // Wait until the child process has exited (WEXITED flag) but don't + // reap the child (WNOWAIT flag). + waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED); + + if (zombied) { + // Arg of "Z" refers to a Zombied Process. + CompareProcessState("Z", child_pid); + RETURN_IF_ERRNO(zombied(child_pid)); + } + + // Wait on the process. + wait_cleanup.Release()(); + // If the process is reaped, then then this should return + // with ECHILD. + EXPECT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallFailsWithErrno(ECHILD)); + + if (exited) { + RETURN_IF_ERRNO(exited(child_pid)); + } + + return NoError(); +} + +// Access the file returned by name when a subprocess is running. +PosixError AccessWhileRunning(std::function<std::string(int pid)> name, int flags, + std::function<void(int fd)> access) { + FileDescriptor fd; + return WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); + + access(fd.get()); + return NoError(); + }, + nullptr, nullptr); +} + +// Access the file returned by name when the a subprocess is zombied. +PosixError AccessWhileZombied(std::function<std::string(int pid)> name, int flags, + std::function<void(int fd)> access) { + FileDescriptor fd; + return WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); + return NoError(); + }, + [&](int pid) -> PosixError { + // Zombied. + access(fd.get()); + return NoError(); + }, + nullptr); +} + +// Access the file returned by name when the a subprocess is exited. +PosixError AccessWhileExited(std::function<std::string(int pid)> name, int flags, + std::function<void(int fd)> access) { + FileDescriptor fd; + return WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); + return NoError(); + }, + nullptr, + [&](int pid) -> PosixError { + // Exited. + access(fd.get()); + return NoError(); + }); +} + +// ReadFd(fd=/proc/PID/basename) while PID is running. +int ReadWhileRunning(std::string const& basename, void* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileRunning( + [&](int pid) -> std::string { + return absl::StrCat("/proc/", pid, "/", basename); + }, + O_RDONLY, + [&](int fd) { + ret = ReadFd(fd, buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// ReadFd(fd=/proc/PID/basename) while PID is zombied. +int ReadWhileZombied(std::string const& basename, void* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileZombied( + [&](int pid) -> std::string { + return absl::StrCat("/proc/", pid, "/", basename); + }, + O_RDONLY, + [&](int fd) { + ret = ReadFd(fd, buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// ReadFd(fd=/proc/PID/basename) while PID is exited. +int ReadWhileExited(std::string const& basename, void* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileExited( + [&](int pid) -> std::string { + return absl::StrCat("/proc/", pid, "/", basename); + }, + O_RDONLY, + [&](int fd) { + ret = ReadFd(fd, buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// readlinkat(fd=/proc/PID/, basename) while PID is running. +int ReadlinkWhileRunning(std::string const& basename, char* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileRunning( + [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, + O_DIRECTORY, + [&](int fd) { + ret = readlinkat(fd, basename.c_str(), buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// readlinkat(fd=/proc/PID/, basename) while PID is zombied. +int ReadlinkWhileZombied(std::string const& basename, char* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileZombied( + [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, + O_DIRECTORY, + [&](int fd) { + ret = readlinkat(fd, basename.c_str(), buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// readlinkat(fd=/proc/PID/, basename) while PID is exited. +int ReadlinkWhileExited(std::string const& basename, char* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileExited( + [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, + O_DIRECTORY, + [&](int fd) { + ret = readlinkat(fd, basename.c_str(), buf, count); + err = errno; + })); + errno = err; + return ret; +} + +TEST(ProcSelfTest, IsThreadGroupLeader) { + ScopedThread([] { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_NE(tgid, tid); + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self")); + EXPECT_EQ(link, absl::StrCat(tgid)); + }); +} + +TEST(ProcThreadSelfTest, Basic) { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_EQ(tgid, tid); + auto link_threadself = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); + EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); + // Just read one file inside thread-self to ensure that the link is valid. + auto link_threadself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); + auto link_procself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); + EXPECT_EQ(link_threadself_exe, link_procself_exe); +} + +TEST(ProcThreadSelfTest, Thread) { + ScopedThread([] { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_NE(tgid, tid); + auto link_threadself = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); + + EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); + // Just read one file inside thread-self to ensure that the link is valid. + auto link_threadself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); + auto link_procself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); + EXPECT_EQ(link_threadself_exe, link_procself_exe); + // A thread should not have "/proc/<tid>/task". + struct stat s; + EXPECT_THAT(stat("/proc/thread-self/task", &s), + SyscallFailsWithErrno(ENOENT)); + }); +} + +// Returns the /proc/PID/maps entry for the MAP_PRIVATE | MAP_ANONYMOUS mapping +// m with start address addr and length len. +std::string AnonymousMapsEntry(uintptr_t addr, size_t len, int prot) { + return absl::StrCat(absl::Hex(addr, absl::PadSpec::kZeroPad8), "-", + absl::Hex(addr + len, absl::PadSpec::kZeroPad8), " ", + prot & PROT_READ ? "r" : "-", + prot & PROT_WRITE ? "w" : "-", + prot & PROT_EXEC ? "x" : "-", "p 00000000 00:00 0 "); +} + +std::string AnonymousMapsEntryForMapping(const Mapping& m, int prot) { + return AnonymousMapsEntry(m.addr(), m.len(), prot); +} + +PosixErrorOr<std::map<uint64_t, uint64_t>> ReadProcSelfAuxv() { + std::string auxv_file; + RETURN_IF_ERRNO(GetContents("/proc/self/auxv", &auxv_file)); + const Elf64_auxv_t* auxv_data = + reinterpret_cast<const Elf64_auxv_t*>(auxv_file.data()); + std::map<uint64_t, uint64_t> auxv_entries; + for (int i = 0; auxv_data[i].a_type != AT_NULL; i++) { + auto a_type = auxv_data[i].a_type; + EXPECT_EQ(0, auxv_entries.count(a_type)) << "a_type: " << a_type; + auxv_entries.emplace(a_type, auxv_data[i].a_un.a_val); + } + return auxv_entries; +} + +TEST(ProcSelfAuxv, EntryPresence) { + auto auxv_entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); + + EXPECT_EQ(auxv_entries.count(AT_ENTRY), 1); + EXPECT_EQ(auxv_entries.count(AT_PHDR), 1); + EXPECT_EQ(auxv_entries.count(AT_PHENT), 1); + EXPECT_EQ(auxv_entries.count(AT_PHNUM), 1); + EXPECT_EQ(auxv_entries.count(AT_BASE), 1); + EXPECT_EQ(auxv_entries.count(AT_CLKTCK), 1); + EXPECT_EQ(auxv_entries.count(AT_RANDOM), 1); + EXPECT_EQ(auxv_entries.count(AT_EXECFN), 1); + EXPECT_EQ(auxv_entries.count(AT_PAGESZ), 1); + EXPECT_EQ(auxv_entries.count(AT_SYSINFO_EHDR), 1); +} + +TEST(ProcSelfAuxv, EntryValues) { + auto proc_auxv = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); + + // We need to find the ELF auxiliary vector. The section of memory pointed to + // by envp contains some pointers to non-null pointers, followed by a single + // pointer to a null pointer, followed by the auxiliary vector. + char** envpi = environ; + while (*envpi) { + ++envpi; + } + + const Elf64_auxv_t* envp_auxv = + reinterpret_cast<const Elf64_auxv_t*>(envpi + 1); + int i; + for (i = 0; envp_auxv[i].a_type != AT_NULL; i++) { + auto a_type = envp_auxv[i].a_type; + EXPECT_EQ(proc_auxv.count(a_type), 1); + EXPECT_EQ(proc_auxv[a_type], envp_auxv[i].a_un.a_val) + << "a_type: " << a_type; + } + EXPECT_EQ(i, proc_auxv.size()); +} + +// Just open and read /proc/self/maps, check that we can find [stack] +TEST(ProcSelfMaps, Basic) { + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> stacks; + // Make sure there's a stack in there. + for (const auto& str : strings) { + if (str.find("[stack]") != std::string::npos) { + stacks.push_back(str); + } + } + ASSERT_EQ(1, stacks.size()) << "[stack] not found in: " << proc_self_maps; + // Linux pads to 73 characters then we add 7. + EXPECT_EQ(80, stacks[0].length()); +} + +TEST(ProcSelfMaps, Map1) { + Mapping mapping = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_READ, MAP_PRIVATE)); + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> addrs; + // Make sure if is listed. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(mapping, PROT_READ)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); +} + +TEST(ProcSelfMaps, Map2) { + // NOTE: The permissions must be different or the pages will get merged. + Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); + Mapping map2 = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); + + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> addrs; + // Make sure if is listed. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); + addrs.clear(); + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); +} + +TEST(ProcSelfMaps, MapUnmap) { + Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); + Mapping map2 = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); + + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> addrs; + // Make sure if is listed. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()) << proc_self_maps; + addrs.clear(); + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); + + map2.reset(); + + // Read it again. + proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + strings = absl::StrSplit(proc_self_maps, '\n'); + // First entry should be there. + addrs.clear(); + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); + addrs.clear(); + // But not the second. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { + addrs.push_back(str); + } + } + ASSERT_EQ(0, addrs.size()); +} + +TEST(ProcSelfMaps, Mprotect) { + if (!IsRunningOnGvisor()) { + // FIXME: Linux's mprotect() sometimes fails to merge VMAs in this + // case. + LOG(WARNING) << "Skipping test on Linux"; + return; + } + + // Reserve 5 pages of address space. + Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(5 * kPageSize, PROT_NONE, MAP_PRIVATE)); + + // Change the permissions on the middle 3 pages. (The first and last pages may + // be merged with other vmas on either side, so they aren't tested directly; + // they just ensure that the middle 3 pages are bracketed by VMAs with + // incompatible permissions.) + ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + kPageSize), + 3 * kPageSize, PROT_READ), + SyscallSucceeds()); + + // Check that the middle 3 pages make up a single VMA. + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, + 3 * kPageSize, PROT_READ))); + + // Change the permissions on the middle page only. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), + kPageSize, PROT_READ | PROT_WRITE), + SyscallSucceeds()); + + // Check that the single VMA has been split into 3 VMAs. + proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + strings = absl::StrSplit(proc_self_maps, '\n'); + EXPECT_THAT( + strings, + IsSupersetOf( + {AnonymousMapsEntry(m.addr() + kPageSize, kPageSize, PROT_READ), + AnonymousMapsEntry(m.addr() + 2 * kPageSize, kPageSize, + PROT_READ | PROT_WRITE), + AnonymousMapsEntry(m.addr() + 3 * kPageSize, kPageSize, + PROT_READ)})); + + // Change the permissions on the middle page back. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), + kPageSize, PROT_READ), + SyscallSucceeds()); + + // Check that the 3 VMAs have been merged back into a single VMA. + proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + strings = absl::StrSplit(proc_self_maps, '\n'); + EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, + 3 * kPageSize, PROT_READ))); +} + +TEST(ProcSelfFd, OpenFd) { + int pipe_fds[2]; + ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds()); + + // Reopen the write end. + const std::string path = absl::StrCat("/proc/self/fd/", pipe_fds[1]); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_WRONLY)); + + // Ensure that a read/write works. + const std::string data = "hello"; + std::unique_ptr<char[]> buffer(new char[data.size()]); + EXPECT_THAT(write(fd.get(), data.c_str(), data.size()), + SyscallSucceedsWithValue(5)); + EXPECT_THAT(read(pipe_fds[0], buffer.get(), data.size()), + SyscallSucceedsWithValue(5)); + EXPECT_EQ(strncmp(buffer.get(), data.c_str(), data.size()), 0); + + // Cleanup. + ASSERT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +TEST(ProcSelfFdInfo, CorrectFds) { + // Make sure there is at least one open file. + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Get files in /proc/self/fd. + auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fd", false)); + + // Get files in /proc/self/fdinfo. + auto fdinfo_files = + ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fdinfo", false)); + + // They should contain the same fds. + EXPECT_THAT(fd_files, UnorderedElementsAreArray(fdinfo_files)); + + // Both should contain fd. + auto fd_s = absl::StrCat(fd.get()); + EXPECT_THAT(fd_files, Contains(fd_s)); +} + +TEST(ProcSelfFdInfo, Flags) { + std::string path = NewTempAbsPath(); + + // Create file here with O_CREAT to test that O_CREAT does not appear in + // fdinfo flags. + int flags = O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC; + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, flags, 0644)); + + // Automatically delete path. + TempPath temp_path(path); + + // O_CREAT does not appear in fdinfo flags. + flags &= ~O_CREAT; + + // O_LARGEFILE always appears (on x86_64). + flags |= kOLargeFile; + + auto fd_info = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/self/fdinfo/", fd.get()))); + EXPECT_THAT(fd_info, HasSubstr(absl::StrFormat("flags:\t%#o", flags))); +} + +TEST(ProcSelfExe, Absolute) { + auto exe = ASSERT_NO_ERRNO_AND_VALUE( + ReadLink(absl::StrCat("/proc/", getpid(), "/exe"))); + EXPECT_EQ(exe[0], '/'); +} + +// Sanity check for /proc/cpuinfo fields that must be present. +TEST(ProcCpuinfo, RequiredFieldsArePresent) { + std::string proc_cpuinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo")); + ASSERT_FALSE(proc_cpuinfo.empty()); + std::vector<std::string> cpuinfo_fields = absl::StrSplit(proc_cpuinfo, '\n'); + + // This list of "required" fields is taken from reading the file + // arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally + // printed by the kernel. + static const char* required_fields[] = { + "processor", + "vendor_id", + "cpu family", + "model\t\t:", + "model name", + "stepping", + "cpu MHz", + "fpu\t\t:", + "fpu_exception", + "cpuid level", + "wp", + "bogomips", + "clflush size", + "cache_alignment", + "address sizes", + "power management", + }; + + // Check that the usual fields are there. We don't really care about the + // contents. + for (const std::string& field : required_fields) { + EXPECT_THAT(proc_cpuinfo, HasSubstr(field)); + } +} + +// Sanity checks that uptime is present. +TEST(ProcUptime, IsPresent) { + std::string proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); + ASSERT_FALSE(proc_uptime.empty()); + std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' '); + + // Parse once. + double uptime0, uptime1, idletime0, idletime1; + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime0)); + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime0)); + + // Sleep for one second. + absl::SleepFor(absl::Seconds(1)); + + // Parse again. + proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); + ASSERT_FALSE(proc_uptime.empty()); + uptime_parts = absl::StrSplit(proc_uptime, ' '); + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime1)); + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime1)); + + // Sanity check. + // + // We assert that between 0.99 and 59.99 seconds have passed. If more than a + // minute has passed, then we must be executing really, really slowly. + EXPECT_GE(uptime0, 0.0); + EXPECT_GE(idletime0, 0.0); + EXPECT_GT(uptime1, uptime0); + EXPECT_GE(uptime1, uptime0 + 0.99); + EXPECT_LE(uptime1, uptime0 + 59.99); + EXPECT_GE(idletime1, idletime0); +} + +TEST(ProcMeminfo, ContainsBasicFields) { + std::string proc_meminfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo")); + EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"), + ContainsRegex(R"(MemFree:\s+[0-9]+ kB)"))); +} + +TEST(ProcStat, ContainsBasicFields) { + std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + + std::vector<std::string> names; + for (auto const& line : absl::StrSplit(proc_stat, '\n')) { + std::vector<std::string> fields = + absl::StrSplit(line, ' ', absl::SkipWhitespace()); + if (fields.empty()) { + continue; + } + names.push_back(fields[0]); + } + + EXPECT_THAT(names, + IsSupersetOf({"cpu", "intr", "ctxt", "btime", "processes", + "procs_running", "procs_blocked", "softirq"})); +} + +TEST(ProcStat, EndsWithNewline) { + std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + EXPECT_EQ(proc_stat.back(), '\n'); +} + +TEST(ProcStat, Fields) { + std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + + std::vector<std::string> names; + for (auto const& line : absl::StrSplit(proc_stat, '\n')) { + std::vector<std::string> fields = + absl::StrSplit(line, ' ', absl::SkipWhitespace()); + if (fields.empty()) { + continue; + } + + if (absl::StartsWith(fields[0], "cpu")) { + // As of Linux 3.11, each CPU entry has 10 fields, plus the name. + EXPECT_GE(fields.size(), 11) << proc_stat; + } else if (fields[0] == "ctxt") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "btime") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "itime") { + // Single field. + ASSERT_EQ(fields.size(), 2) << proc_stat; + // This is the only floating point field. + double val; + EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_stat; + continue; + } else if (fields[0] == "processes") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "procs_running") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "procs_blocked") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "softirq") { + // As of Linux 3.11, there are 10 softirqs. 12 fields for name + total. + EXPECT_GE(fields.size(), 12) << proc_stat; + } + + // All fields besides itime are valid base 10 numbers. + for (size_t i = 1; i < fields.size(); i++) { + uint64_t val; + EXPECT_TRUE(absl::SimpleAtoi(fields[i], &val)) << proc_stat; + } + } +} + +TEST(ProcLoadavg, EndsWithNewline) { + std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); + EXPECT_EQ(proc_loadvg.back(), '\n'); +} + +TEST(ProcLoadavg, Fields) { + std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); + std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n'); + + // Single line. + EXPECT_EQ(lines.size(), 2) << proc_loadvg; + + std::vector<std::string> fields = + absl::StrSplit(lines[0], absl::ByAnyChar(" /"), absl::SkipWhitespace()); + + // Six fields. + EXPECT_EQ(fields.size(), 6) << proc_loadvg; + + double val; + uint64_t val2; + // First three fields are floating point numbers. + EXPECT_TRUE(absl::SimpleAtod(fields[0], &val)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtod(fields[2], &val)) << proc_loadvg; + // Rest of the fields are valid base 10 numbers. + EXPECT_TRUE(absl::SimpleAtoi(fields[3], &val2)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtoi(fields[4], &val2)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtoi(fields[5], &val2)) << proc_loadvg; +} + +// NOTE: Tests in priority.cc also check certain priority related fields in +// /proc/self/stat. + +class ProcPidStatTest : public ::testing::TestWithParam<std::string> {}; + +TEST_P(ProcPidStatTest, HasBasicFields) { + std::string proc_pid_stat = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", GetParam(), "/stat"))); + + ASSERT_FALSE(proc_pid_stat.empty()); + std::vector<std::string> fields = absl::StrSplit(proc_pid_stat, ' '); + ASSERT_GE(fields.size(), 24); + EXPECT_EQ(absl::StrCat(getpid()), fields[0]); + // fields[1] is the thread name. + EXPECT_EQ("R", fields[2]); // task state + EXPECT_EQ(absl::StrCat(getppid()), fields[3]); + + uint64_t vss; + ASSERT_TRUE(absl::SimpleAtoi(fields[22], &vss)); + EXPECT_GT(vss, 0); + + uint64_t rss; + ASSERT_TRUE(absl::SimpleAtoi(fields[23], &rss)); + EXPECT_GT(rss, 0); +} + +INSTANTIATE_TEST_CASE_P(SelfAndNumericPid, ProcPidStatTest, + ::testing::Values("self", absl::StrCat(getpid()))); + +using ProcPidStatmTest = ::testing::TestWithParam<std::string>; + +TEST_P(ProcPidStatmTest, HasBasicFields) { + std::string proc_pid_statm = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", GetParam(), "/statm"))); + ASSERT_FALSE(proc_pid_statm.empty()); + std::vector<std::string> fields = absl::StrSplit(proc_pid_statm, ' '); + ASSERT_GE(fields.size(), 7); + + uint64_t vss; + ASSERT_TRUE(absl::SimpleAtoi(fields[0], &vss)); + EXPECT_GT(vss, 0); + + uint64_t rss; + ASSERT_TRUE(absl::SimpleAtoi(fields[1], &rss)); + EXPECT_GT(rss, 0); +} + +INSTANTIATE_TEST_CASE_P(SelfAndNumericPid, ProcPidStatmTest, + ::testing::Values("self", absl::StrCat(getpid()))); + +PosixErrorOr<uint64_t> CurrentRSS() { + ASSIGN_OR_RETURN_ERRNO(auto proc_self_stat, GetContents("/proc/self/stat")); + if (proc_self_stat.empty()) { + return PosixError(EINVAL, "empty /proc/self/stat"); + } + + std::vector<std::string> fields = absl::StrSplit(proc_self_stat, ' '); + if (fields.size() < 24) { + return PosixError( + EINVAL, + absl::StrCat("/proc/self/stat has too few fields: ", proc_self_stat)); + } + + uint64_t rss; + if (!absl::SimpleAtoi(fields[23], &rss)) { + return PosixError( + EINVAL, absl::StrCat("/proc/self/stat RSS field is not a number: ", + fields[23])); + } + + // RSS is given in number of pages. + return rss * kPageSize; +} + +// The size of mapping created by MapPopulateRSS. +constexpr uint64_t kMappingSize = 100 << 20; + +// Tolerance on RSS comparisons to account for background thread mappings, +// reclaimed pages, newly faulted pages, etc. +constexpr uint64_t kRSSTolerance = 5 << 20; + +// Capture RSS before and after an anonymous mapping with passed prot. +void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) { + *before = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); + + // N.B. The kernel asynchronously accumulates per-task RSS counters into the + // mm RSS, which is exposed by /proc/PID/stat. Task exit is a synchronization + // point (kernel/exit.c:do_exit -> sync_mm_rss), so perform the mapping on + // another thread to ensure it is reflected in RSS after the thread exits. + Mapping mapping; + ScopedThread t([&mapping, prot] { + mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kMappingSize, prot, MAP_PRIVATE | MAP_POPULATE)); + }); + t.Join(); + + *after = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); +} + +// TODO: Test for PROT_READ + MAP_POPULATE anonymous mappings. Their +// semantics are more subtle: +// +// Small pages -> Zero page mapped, not counted in RSS +// (mm/memory.c:do_anonymous_page). +// +// Huge pages (THP enabled, use_zero_page=0) -> Pages committed +// (mm/memory.c:__handle_mm_fault -> create_huge_pmd). +// +// Huge pages (THP enabled, use_zero_page=1) -> Zero page mapped, not counted in +// RSS (mm/huge_memory.c:do_huge_pmd_anonymous_page). + +// PROT_WRITE + MAP_POPULATE anonymous mappings are always committed. +TEST(ProcSelfStat, PopulateWriteRSS) { + uint64_t before, after; + MapPopulateRSS(PROT_READ | PROT_WRITE, &before, &after); + + // Mapping is committed. + EXPECT_NEAR(before + kMappingSize, after, kRSSTolerance); +} + +// PROT_NONE + MAP_POPULATE anonymous mappings are never committed. +TEST(ProcSelfStat, PopulateNoneRSS) { + uint64_t before, after; + MapPopulateRSS(PROT_NONE, &before, &after); + + // Mapping not committed. + EXPECT_NEAR(before, after, kRSSTolerance); +} + +// Returns the calling thread's name. +PosixErrorOr<std::string> ThreadName() { + // "The buffer should allow space for up to 16 bytes; the returned std::string + // will be null-terminated if it is shorter than that." - prctl(2). But we + // always want the thread name to be null-terminated. + char thread_name[17]; + int rc = prctl(PR_GET_NAME, thread_name, 0, 0, 0); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "prctl(PR_GET_NAME)"); + } + thread_name[16] = '\0'; + return std::string(thread_name); +} + +// Parses the contents of a /proc/[pid]/status file into a collection of +// key-value pairs. +PosixErrorOr<std::map<std::string, std::string>> ParseProcStatus( + absl::string_view status_str) { + std::map<std::string, std::string> fields; + for (absl::string_view const line : + absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { + const std::pair<absl::string_view, absl::string_view> kv = + absl::StrSplit(line, absl::MaxSplits(":\t", 1)); + if (kv.first.empty()) { + return PosixError( + EINVAL, absl::StrCat("failed to parse key in line \"", line, "\"")); + } + std::string key(kv.first); + if (fields.count(key)) { + return PosixError(EINVAL, + absl::StrCat("duplicate key \"", kv.first, "\"")); + } + std::string value(kv.second); + absl::StripLeadingAsciiWhitespace(&value); + fields.emplace(std::move(key), std::move(value)); + } + return fields; +} + +TEST(ParseProcStatusTest, ParsesSimpleStatusFileWithMixedWhitespaceCorrectly) { + EXPECT_THAT( + ParseProcStatus( + "Name:\tinit\nState:\tS (sleeping)\nCapEff:\t 0000001fffffffff\n"), + IsPosixErrorOkAndHolds(UnorderedElementsAre( + Pair("Name", "init"), Pair("State", "S (sleeping)"), + Pair("CapEff", "0000001fffffffff")))); +} + +TEST(ParseProcStatusTest, DetectsDuplicateKeys) { + auto proc_status_or = ParseProcStatus("Name:\tfoo\nName:\tfoo\n"); + EXPECT_THAT(proc_status_or, + PosixErrorIs(EINVAL, ::testing::StrEq("duplicate key \"Name\""))); +} + +TEST(ParseProcStatusTest, DetectsMissingTabs) { + EXPECT_THAT(ParseProcStatus("Name:foo\nPid: 1\n"), + IsPosixErrorOkAndHolds(UnorderedElementsAre(Pair("Name:foo", ""), + Pair("Pid: 1", "")))); +} + +TEST(ProcPidStatusTest, HasBasicFields) { + // Do this on a separate thread since we want tgid != tid. + ScopedThread([] { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_NE(tgid, tid); + const auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(ThreadName()); + + std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", tid, "/status"))); + + ASSERT_FALSE(status_str.empty()); + const auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); + EXPECT_THAT(status, IsSupersetOf({Pair("Name", thread_name), + Pair("Tgid", absl::StrCat(tgid)), + Pair("Pid", absl::StrCat(tid)), + Pair("PPid", absl::StrCat(getppid()))})); + }); +} + +TEST(ProcPidStatusTest, StateRunning) { + // Task must be running when reading the file. + const pid_t tid = syscall(SYS_gettid); + std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", tid, "/status"))); + + EXPECT_THAT(ParseProcStatus(status_str), + IsPosixErrorOkAndHolds(Contains(Pair("State", "R (running)")))); +} + +TEST(ProcPidStatusTest, StateSleeping_NoRandomSave) { + // Starts a child process that blocks and checks that State is sleeping. + auto res = WithSubprocess( + [&](int pid) -> PosixError { + // Because this test is timing based we will disable cooperative saving + // and the test itself also has random saving disabled. + const DisableSave ds; + // Try multiple times in case the child isn't sleeping when status file + // is read. + MonotonicTimer timer; + timer.Start(); + for (;;) { + ASSIGN_OR_RETURN_ERRNO( + std::string status_str, + GetContents(absl::StrCat("/proc/", pid, "/status"))); + ASSIGN_OR_RETURN_ERRNO(auto map, ParseProcStatus(status_str)); + if (map["State"] == std::string("S (sleeping)")) { + // Test passed! + return NoError(); + } + if (timer.Duration() > absl::Seconds(10)) { + return PosixError(ETIMEDOUT, "Timeout waiting for child to sleep"); + } + absl::SleepFor(absl::Milliseconds(10)); + } + }, + nullptr, nullptr); + ASSERT_NO_ERRNO(res); +} + +TEST(ProcPidStatusTest, ValuesAreTabDelimited) { + std::string status_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); + ASSERT_FALSE(status_str.empty()); + for (absl::string_view const line : + absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { + EXPECT_NE(std::string::npos, line.find(":\t")); + } +} + +// Threads properly counts running threads. +// +// TODO: Test zombied threads while the thread group leader is still +// running with generalized fork and clone children from the wait test. +TEST(ProcPidStatusTest, Threads) { + char buf[4096] = {}; + EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf) - 1), + SyscallSucceedsWithValue(Gt(0))); + + auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); + auto it = status.find("Threads"); + ASSERT_NE(it, status.end()); + int threads = -1; + EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) + << "Threads value " << it->second << " is not a number"; + // Don't make assumptions about the exact number of threads, as it may not be + // constant. + EXPECT_GE(threads, 1); + + memset(buf, 0, sizeof(buf)); + EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf) - 1), + SyscallSucceedsWithValue(Gt(0))); + + status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); + it = status.find("Threads"); + ASSERT_NE(it, status.end()); + threads = -1; + EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) + << "Threads value " << it->second << " is not a number"; + // There must be only the thread group leader remaining, zombied. + EXPECT_EQ(threads, 1); +} + +// Returns true if all characters in s are digits. +bool IsDigits(absl::string_view s) { + return std::all_of(s.begin(), s.end(), absl::ascii_isdigit); +} + +TEST(ProcPidStatTest, VSSRSS) { + std::string status_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); + ASSERT_FALSE(status_str.empty()); + auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); + + const auto vss_it = status.find("VmSize"); + ASSERT_NE(vss_it, status.end()); + + absl::string_view vss_str(vss_it->second); + + // Room for the " kB" suffix plus at least one digit. + ASSERT_GT(vss_str.length(), 3); + EXPECT_TRUE(absl::EndsWith(vss_str, " kB")); + // Everything else is part of a number. + EXPECT_TRUE(IsDigits(vss_str.substr(0, vss_str.length() - 3))) << vss_str; + // ... which is not 0. + EXPECT_NE('0', vss_str[0]); + + const auto rss_it = status.find("VmRSS"); + ASSERT_NE(rss_it, status.end()); + + absl::string_view rss_str(rss_it->second); + + // Room for the " kB" suffix plus at least one digit. + ASSERT_GT(rss_str.length(), 3); + EXPECT_TRUE(absl::EndsWith(rss_str, " kB")); + // Everything else is part of a number. + EXPECT_TRUE(IsDigits(rss_str.substr(0, rss_str.length() - 3))) << rss_str; + // ... which is not 0. + EXPECT_NE('0', rss_str[0]); +} + +// Parse an array of NUL-terminated char* arrays, returning a vector of strings. +std::vector<std::string> ParseNulTerminatedStrings(std::string contents) { + EXPECT_EQ('\0', contents.back()); + // The split will leave an empty std::string if the NUL-byte remains, so pop it. + contents.pop_back(); + + return absl::StrSplit(contents, '\0'); +} + +TEST(ProcPidCmdline, MatchesArgv) { + std::vector<std::string> proc_cmdline = ParseNulTerminatedStrings( + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); + EXPECT_THAT(saved_argv, ContainerEq(proc_cmdline)); +} + +TEST(ProcPidEnviron, MatchesEnviron) { + std::vector<std::string> proc_environ = ParseNulTerminatedStrings( + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/environ"))); + // Get the environment from the environ variable, which we will compare with + // /proc/self/environ. + std::vector<std::string> env; + for (char** v = environ; *v; v++) { + env.push_back(*v); + } + EXPECT_THAT(env, ContainerEq(proc_environ)); +} + +TEST(ProcPidCmdline, SubprocessForkSameCmdline) { + std::vector<std::string> proc_cmdline_parent; + std::vector<std::string> proc_cmdline; + proc_cmdline_parent = ParseNulTerminatedStrings( + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); + auto res = WithSubprocess( + [&](int pid) -> PosixError { + ASSIGN_OR_RETURN_ERRNO( + auto raw_cmdline, + GetContents(absl::StrCat("/proc/", pid, "/cmdline"))); + proc_cmdline = ParseNulTerminatedStrings(raw_cmdline); + return NoError(); + }, + nullptr, nullptr); + ASSERT_NO_ERRNO(res); + + for (size_t i = 0; i < proc_cmdline_parent.size(); i++) { + EXPECT_EQ(proc_cmdline_parent[i], proc_cmdline[i]); + } +} + +// Test whether /proc/PID/ symlinks can be read for a running process. +TEST(ProcPidSymlink, SubprocessRunning) { + char buf[1]; + + EXPECT_THAT(ReadlinkWhileRunning("exe", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadlinkWhileRunning("ns/net", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadlinkWhileRunning("ns/pid", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadlinkWhileRunning("ns/user", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} +// FIXME: Inconsistent behavior between gVisor and linux +// on proc files. +TEST(ProcPidSymlink, SubprocessZombied) { + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + char buf[1]; + + int want = EACCES; + if (!IsRunningOnGvisor()) { + auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); + if (version.major == 4 && version.minor > 3) { + want = ENOENT; + } + } + + EXPECT_THAT(ReadlinkWhileZombied("exe", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + + if (!IsRunningOnGvisor()) { + EXPECT_THAT(ReadlinkWhileZombied("ns/net", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + } + + // FIXME: Inconsistent behavior between gVisor and linux + // on proc files. + // 4.17 & gVisor: Syscall succeeds and returns 1 + // EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), + // SyscallFailsWithErrno(EACCES)); + + // FIXME: Inconsistent behavior between gVisor and linux + // on proc files. + // 4.17 & gVisor: Syscall succeeds and returns 1. + // EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), + // SyscallFailsWithErrno(EACCES)); +} + +// Test whether /proc/PID/ symlinks can be read for an exited process. +TEST(ProcPidSymlink, SubprocessExited) { + // FIXME: These all succeed on gVisor. + SKIP_IF(IsRunningOnGvisor()); + + char buf[1]; + + EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadlinkWhileExited("ns/net", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadlinkWhileExited("ns/pid", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadlinkWhileExited("ns/user", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); +} + +// /proc/PID/exe points to the correct binary. +TEST(ProcPidExe, Subprocess) { + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); + auto expected_absolute_path = + ASSERT_NO_ERRNO_AND_VALUE(MakeAbsolute(link, "")); + + char actual[PATH_MAX + 1] = {}; + ASSERT_THAT(ReadlinkWhileRunning("exe", actual, sizeof(actual)), + SyscallSucceedsWithValue(Gt(0))); + EXPECT_EQ(actual, expected_absolute_path); +} + +// Test whether /proc/PID/ files can be read for a running process. +TEST(ProcPidFile, SubprocessRunning) { + char buf[1]; + + EXPECT_THAT(ReadWhileRunning("auxv", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("cmdline", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("comm", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("gid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("io", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("maps", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("stat", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + +// Test whether /proc/PID/ files can be read for a zombie process. +TEST(ProcPidFile, SubprocessZombie) { + char buf[1]; + // 4.17: Succeeds and returns 1 + // gVisor: Succeds and returns 0 + EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds()); + + EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); + + EXPECT_THAT(ReadWhileZombied("comm", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("gid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("maps", buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); + + EXPECT_THAT(ReadWhileZombied("stat", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // FIXME: Inconsistent behavior between gVisor and linux + // on proc files. + // gVisor & 4.17: Succeeds and returns 1. + // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)), + // SyscallFailsWithErrno(EACCES)); +} + +// Test whether /proc/PID/ files can be read for an exited process. +TEST(ProcPidFile, SubprocessExited) { + char buf[1]; + + // FIXME: Inconsistent behavior between kernels + // gVisor: Fails with ESRCH. + // 4.17: Succeeds and returns 1. + // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)), + // SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadWhileExited("cmdline", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + if (!IsRunningOnGvisor()) { + // FIXME: Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("comm", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("gid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + if (!IsRunningOnGvisor()) { + // FIXME: Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("io", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + if (!IsRunningOnGvisor()) { + // FIXME: Returns EOF on gVisor. + EXPECT_THAT(ReadWhileExited("maps", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + if (!IsRunningOnGvisor()) { + // FIXME: Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("stat", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + if (!IsRunningOnGvisor()) { + // FIXME: Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("status", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + +PosixError DirContainsImpl(absl::string_view path, + const std::vector<std::string>& targets, bool strict) { + ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false)); + bool success = true; + + for (auto& expected_entry : targets) { + auto cursor = std::find(listing.begin(), listing.end(), expected_entry); + if (cursor == listing.end()) { + success = false; + } + } + + if (!success) { + return PosixError( + ENOENT, + absl::StrCat("Failed to find one or more paths in '", path, "'")); + } + + if (strict) { + if (targets.size() != listing.size()) { + return PosixError( + EINVAL, + absl::StrCat("Expected to find ", targets.size(), " elements in '", + path, "', but found ", listing.size())); + } + } + + return NoError(); +} + +PosixError DirContains(absl::string_view path, + const std::vector<std::string>& targets) { + return DirContainsImpl(path, targets, false); +} + +PosixError DirContainsExactly(absl::string_view path, + const std::vector<std::string>& targets) { + return DirContainsImpl(path, targets, true); +} + +PosixError EventuallyDirContainsExactly(absl::string_view path, + const std::vector<std::string>& targets) { + constexpr int kRetryCount = 100; + const absl::Duration kRetryDelay = absl::Milliseconds(100); + + for (int i = 0; i < kRetryCount; ++i) { + auto res = DirContainsExactly(path, targets); + if (res.ok()) { + return res; + } else if (i < kRetryCount - 1) { + // Sleep if this isn't the final iteration. + absl::SleepFor(kRetryDelay); + } + } + return PosixError(ETIMEDOUT, + "Timed out while waiting for directory to contain files "); +} + +TEST(ProcTask, Basic) { + EXPECT_NO_ERRNO( + DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())})); +} + +std::vector<std::string> TaskFiles(const std::vector<std::string>& initial_contents, + const std::vector<pid_t>& pids) { + return VecCat<std::string>( + initial_contents, + ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); }, pids)); +} + +std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) { + return TaskFiles({".", "..", absl::StrCat(getpid())}, pids); +} + +// Helper class for creating a new task in the current thread group. +class BlockingChild { + public: + BlockingChild() : thread_([=] { Start(); }) {} + ~BlockingChild() { Join(); } + + pid_t Tid() const { + absl::MutexLock ml(&mu_); + mu_.Await(absl::Condition(&tid_ready_)); + return tid_; + } + + void Join() { Stop(); } + + private: + void Start() { + absl::MutexLock ml(&mu_); + tid_ = syscall(__NR_gettid); + tid_ready_ = true; + mu_.Await(absl::Condition(&stop_)); + } + + void Stop() { + absl::MutexLock ml(&mu_); + stop_ = true; + } + + mutable absl::Mutex mu_; + bool stop_ GUARDED_BY(mu_) = false; + pid_t tid_; + bool tid_ready_ GUARDED_BY(mu_) = false; + + // Must be last to ensure that the destructor for the thread is run before + // any other member of the object is destroyed. + ScopedThread thread_; +}; + +TEST(ProcTask, NewThreadAppears) { + auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task", false)); + BlockingChild child1; + EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", + TaskFiles(initial, {child1.Tid()}))); +} + +TEST(ProcTask, KilledThreadsDisappear) { + auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task/", false)); + + BlockingChild child1; + EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", + TaskFiles(initial, {child1.Tid()}))); + + // Stat child1's task file. + struct stat statbuf; + const std::string child1_task_file = + absl::StrCat("/proc/self/task/", child1.Tid()); + EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallSucceeds()); + + BlockingChild child2; + EXPECT_NO_ERRNO(DirContainsExactly( + "/proc/self/task", TaskFiles(initial, {child1.Tid(), child2.Tid()}))); + + BlockingChild child3; + BlockingChild child4; + BlockingChild child5; + EXPECT_NO_ERRNO(DirContainsExactly( + "/proc/self/task", + TaskFiles(initial, {child1.Tid(), child2.Tid(), child3.Tid(), + child4.Tid(), child5.Tid()}))); + + child2.Join(); + EXPECT_NO_ERRNO(EventuallyDirContainsExactly( + "/proc/self/task", TaskFiles(initial, {child1.Tid(), child3.Tid(), + child4.Tid(), child5.Tid()}))); + + child1.Join(); + child4.Join(); + EXPECT_NO_ERRNO(EventuallyDirContainsExactly( + "/proc/self/task", TaskFiles(initial, {child3.Tid(), child5.Tid()}))); + + // Stat child1's task file again. This time it should fail. + EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), + SyscallFailsWithErrno(ENOENT)); + + child3.Join(); + child5.Join(); + EXPECT_NO_ERRNO(EventuallyDirContainsExactly("/proc/self/task", initial)); +} + +TEST(ProcTask, ChildTaskDir) { + BlockingChild child1; + EXPECT_NO_ERRNO(DirContains("/proc/self/task", TaskFiles({child1.Tid()}))); + EXPECT_NO_ERRNO(DirContains(absl::StrCat("/proc/", child1.Tid(), "/task"), + TaskFiles({child1.Tid()}))); +} + +PosixError VerifyPidDir(std::string path) { + return DirContains(path, {"exe", "fd", "io", "maps", "ns", "stat", "status"}); +} + +TEST(ProcTask, VerifyTaskDir) { + EXPECT_NO_ERRNO(VerifyPidDir("/proc/self")); + + EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", getpid()))); + BlockingChild child1; + EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", child1.Tid()))); + + // Only the first level of task directories should contain the 'task' + // directory. That is: + // + // /proc/1234/task <- should exist + // /proc/1234/task/1234/task <- should not exist + // /proc/1234/task/1235/task <- should not exist (where 1235 is in the same + // thread group as 1234). + EXPECT_FALSE( + DirContains(absl::StrCat("/proc/self/task/", getpid()), {"task"}).ok()) + << "Found 'task' directory in an inner directory."; +} + +TEST(ProcTask, TaskDirCannotBeDeleted) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + EXPECT_THAT(rmdir("/proc/self/task"), SyscallFails()); + EXPECT_THAT(rmdir(absl::StrCat("/proc/self/task/", getpid()).c_str()), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ProcTask, TaskDirHasCorrectMetadata) { + struct stat st; + EXPECT_THAT(stat("/proc/self/task", &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + + // Verify file is readable and executable by everyone. + mode_t expected_permissions = + S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + mode_t permissions = st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO); + EXPECT_EQ(expected_permissions, permissions); +} + +TEST(ProcTask, TaskDirCanSeekToEnd) { + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/task", O_RDONLY)); + EXPECT_THAT(lseek(dirfd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(ProcTask, VerifyTaskDirNlinks) { + // A task directory will have 3 links if the taskgroup has a single + // thread. For example, the following shows where the links to + // '/proc/12345/task comes' from for a single threaded process with pid 12345: + // + // /proc/12345/task <-- 1 link for the directory itself + // . <-- link from "." + // .. + // 12345 + // . + // .. <-- link from ".." to parent. + // <other contents of a task dir> + // + // We can't assert an absolute number of links since we don't control how many + // threads the test framework spawns. Instead, we'll ensure creating a new + // thread increases the number of links as expected. + + // Once we reach the test body, we can count on the thread count being stable + // unless we spawn a new one. + uint64_t initial_links = ASSERT_NO_ERRNO_AND_VALUE(Links("/proc/self/task")); + ASSERT_GE(initial_links, 3); + + // For each new subtask, we should gain a new link. + BlockingChild child1; + EXPECT_THAT(Links("/proc/self/task"), + IsPosixErrorOkAndHolds(initial_links + 1)); + BlockingChild child2; + EXPECT_THAT(Links("/proc/self/task"), + IsPosixErrorOkAndHolds(initial_links + 2)); +} + +TEST(ProcTask, CommContainsThreadNameAndTrailingNewline) { + constexpr char kThreadName[] = "TestThread12345"; + ASSERT_THAT(prctl(PR_SET_NAME, kThreadName), SyscallSucceeds()); + + auto thread_name = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(JoinPath("/proc", absl::StrCat(getpid()), "task", + absl::StrCat(syscall(SYS_gettid)), "comm"))); + EXPECT_EQ(absl::StrCat(kThreadName, "\n"), thread_name); +} + +TEST(ProcTaskNs, NsDirExistsAndHasCorrectMetadata) { + EXPECT_NO_ERRNO(DirContains("/proc/self/ns", {"net", "pid", "user"})); + + // Let's just test the 'pid' entry, all of them are very similar. + struct stat st; + EXPECT_THAT(lstat("/proc/self/ns/pid", &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISLNK(st.st_mode)); + + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/ns/pid")); + EXPECT_THAT(link, ::testing::StartsWith("pid:[")); +} + +TEST(ProcTaskNs, AccessOnNsNodeSucceeds) { + EXPECT_THAT(access("/proc/self/ns/pid", F_OK), SyscallSucceeds()); +} + +TEST(ProcSysKernelHostname, Exists) { + EXPECT_THAT(open("/proc/sys/kernel/hostname", O_RDONLY), SyscallSucceeds()); +} + +TEST(ProcSysKernelHostname, MatchesUname) { + struct utsname buf; + EXPECT_THAT(uname(&buf), SyscallSucceeds()); + const std::string hostname = absl::StrCat(buf.nodename, "\n"); + auto procfs_hostname = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/hostname")); + EXPECT_EQ(procfs_hostname, hostname); +} + +TEST(ProcSysVmMmapMinAddr, HasNumericValue) { + const std::string mmap_min_addr_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/mmap_min_addr")); + uintptr_t mmap_min_addr; + EXPECT_TRUE(absl::SimpleAtoi(mmap_min_addr_str, &mmap_min_addr)) + << "/proc/sys/vm/mmap_min_addr does not contain a numeric value: " + << mmap_min_addr_str; +} + +TEST(ProcSysVmOvercommitMemory, HasNumericValue) { + const std::string overcommit_memory_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/overcommit_memory")); + uintptr_t overcommit_memory; + EXPECT_TRUE(absl::SimpleAtoi(overcommit_memory_str, &overcommit_memory)) + << "/proc/sys/vm/overcommit_memory does not contain a numeric value: " + << overcommit_memory; +} + +// Check that link for proc fd entries point the target node, not the +// symlink itself. +TEST(ProcTaskFd, FstatatFollowsSymlink) { + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + struct stat sproc = {}; + EXPECT_THAT( + fstatat(-1, absl::StrCat("/proc/self/fd/", fd.get()).c_str(), &sproc, 0), + SyscallSucceeds()); + + struct stat sfile = {}; + EXPECT_THAT(fstatat(-1, file.path().c_str(), &sfile, 0), SyscallSucceeds()); + + // If fstatat follows the fd symlink, the device and inode numbers should + // match at a minimum. + EXPECT_EQ(sproc.st_dev, sfile.st_dev); + EXPECT_EQ(sproc.st_ino, sfile.st_ino); + EXPECT_EQ(0, memcmp(&sfile, &sproc, sizeof(sfile))); +} + +TEST(ProcFilesystems, Bug65172365) { + std::string proc_filesystems = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/filesystems")); + ASSERT_FALSE(proc_filesystems.empty()); +} + +TEST(ProcFilesystems, PresenceOfShmMaxMniAll) { + uint64_t shmmax = 0; + uint64_t shmall = 0; + uint64_t shmmni = 0; + std::string proc_file; + proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmax")); + ASSERT_FALSE(proc_file.empty()); + ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmax)); + proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmall")); + ASSERT_FALSE(proc_file.empty()); + ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmall)); + proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmni")); + ASSERT_FALSE(proc_file.empty()); + ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmni)); + + ASSERT_GT(shmmax, 0); + ASSERT_GT(shmall, 0); + ASSERT_GT(shmmni, 0); + ASSERT_LE(shmall, shmmax); + + // These values should never be higher than this by default, for more + // information see uapi/linux/shm.h + ASSERT_LE(shmmax, ULONG_MAX - (1UL << 24)); + ASSERT_LE(shmall, ULONG_MAX - (1UL << 24)); +} + +// Check that /proc/mounts is a symlink to self/mounts. +TEST(ProcMounts, IsSymlink) { + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/mounts")); + EXPECT_EQ(link, "self/mounts"); +} + +// Check that /proc/self/mounts looks something like a real mounts file. +TEST(ProcSelfMounts, RequiredFieldsArePresent) { + auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts")); + EXPECT_THAT(mounts, + AllOf( + // Root mount. + ContainsRegex(R"(\S+ / \S+ (rw|ro)\S* [0-9]+ [0-9]+\s)"), + // Root mount. + ContainsRegex(R"(\S+ /proc \S+ rw\S* [0-9]+ [0-9]+\s)"))); +} +} // namespace +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + for (int i = 0; i < argc; ++i) { + gvisor::testing::saved_argv.emplace_back(std::string(argv[i])); + } + + gvisor::testing::TestInit(&argc, &argv); + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc new file mode 100644 index 000000000..6060d0644 --- /dev/null +++ b/test/syscalls/linux/proc_net.cc @@ -0,0 +1,59 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +TEST(ProcNetIfInet6, Format) { + auto ifinet6 = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/if_inet6")); + EXPECT_THAT(ifinet6, + ::testing::MatchesRegex( + // Ex: "00000000000000000000000000000001 01 80 10 80 lo\n" + "^([a-f\\d]{32}( [a-f\\d]{2}){4} +[a-z][a-z\\d]*\\n)+$")); +} + +TEST(ProcSysNetIpv4Sack, Exists) { + EXPECT_THAT(open("/proc/sys/net/ipv4/tcp_sack", O_RDONLY), SyscallSucceeds()); +} + +TEST(ProcSysNetIpv4Sack, CanReadAndWrite) { + auto const fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/sys/net/ipv4/tcp_sack", O_RDWR)); + + char buf; + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_TRUE(buf == '0' || buf == '1') << "unexpected tcp_sack: " << buf; + + char to_write = (buf == '1') ? '0' : '1'; + EXPECT_THAT(PwriteFd(fd.get(), &to_write, sizeof(to_write), 0), + SyscallSucceedsWithValue(sizeof(to_write))); + + buf = 0; + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); + EXPECT_EQ(buf, to_write); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pselect.cc b/test/syscalls/linux/pselect.cc new file mode 100644 index 000000000..3294f6c14 --- /dev/null +++ b/test/syscalls/linux/pselect.cc @@ -0,0 +1,190 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/select.h> + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/base_poll_test.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +struct MaskWithSize { + sigset_t* mask; + size_t mask_size; +}; + +// Linux and glibc have a different idea of the sizeof sigset_t. When calling +// the syscall directly, use what the kernel expects. +unsigned kSigsetSize = SIGRTMAX / 8; + +// Linux pselect(2) differs from the glibc wrapper function in that Linux +// updates the timeout with the amount of time remaining. In order to test this +// behavior we need to use the syscall directly. +int syscallPselect6(int nfds, fd_set* readfds, fd_set* writefds, + fd_set* exceptfds, struct timespec* timeout, + const MaskWithSize* mask_with_size) { + return syscall(SYS_pselect6, nfds, readfds, writefds, exceptfds, timeout, + mask_with_size); +} + +class PselectTest : public BasePollTest { + protected: + void SetUp() override { BasePollTest::SetUp(); } + void TearDown() override { BasePollTest::TearDown(); } +}; + +// See that when there are no FD sets, pselect behaves like sleep. +TEST_F(PselectTest, NullFds) { + struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10)); + ASSERT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); + + timeout = absl::ToTimespec(absl::Milliseconds(10)); + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +TEST_F(PselectTest, ClosedFds) { + fd_set read_set; + FD_ZERO(&read_set); + int fd; + ASSERT_THAT(fd = dup(1), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + FD_SET(fd, &read_set); + struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10)); + EXPECT_THAT( + syscallPselect6(fd + 1, &read_set, nullptr, nullptr, &timeout, nullptr), + SyscallFailsWithErrno(EBADF)); +} + +TEST_F(PselectTest, ZeroTimeout) { + struct timespec timeout = {}; + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +// If random S/R interrupts the pselect, SIGALRM may be delivered before pselect +// restarts, causing the pselect to hang forever. +TEST_F(PselectTest, NoTimeout_NoRandomSave) { + // When there's no timeout, pselect may never return so set a timer. + SetTimer(absl::Milliseconds(100)); + // See that we get interrupted by the timer. + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); +} + +TEST_F(PselectTest, InvalidTimeoutNegative) { + struct timespec timeout = absl::ToTimespec(absl::Seconds(-1)); + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallFailsWithErrno(EINVAL)); + EXPECT_EQ(timeout.tv_sec, -1); + EXPECT_EQ(timeout.tv_nsec, 0); +} + +TEST_F(PselectTest, InvalidTimeoutNotNormalized) { + struct timespec timeout = {0, 1000000001}; + ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr), + SyscallFailsWithErrno(EINVAL)); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_nsec, 1000000001); +} + +TEST_F(PselectTest, EmptySigMaskInvalidMaskSize) { + struct timespec timeout = {}; + MaskWithSize invalid = {nullptr, 7}; + EXPECT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, &invalid), + SyscallSucceeds()); +} + +TEST_F(PselectTest, EmptySigMaskValidMaskSize) { + struct timespec timeout = {}; + MaskWithSize invalid = {nullptr, 8}; + EXPECT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, &invalid), + SyscallSucceeds()); +} + +TEST_F(PselectTest, InvalidMaskSize) { + struct timespec timeout = {}; + sigset_t sigmask; + ASSERT_THAT(sigemptyset(&sigmask), SyscallSucceeds()); + MaskWithSize invalid = {&sigmask, 7}; + EXPECT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &invalid), + SyscallFailsWithErrno(EINVAL)); +} + +// Verify that signals blocked by the pselect mask (that would otherwise be +// allowed) do not interrupt pselect. +TEST_F(PselectTest, SignalMaskBlocksSignal) { + absl::Duration duration(absl::Seconds(30)); + struct timespec timeout = absl::ToTimespec(duration); + absl::Duration timer_duration(absl::Seconds(10)); + + // Call with a mask that blocks SIGALRM. See that pselect is not interrupted + // (i.e. returns 0) and that upon completion, the timer has fired. + sigset_t mask; + ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + ASSERT_THAT(sigaddset(&mask, SIGALRM), SyscallSucceeds()); + MaskWithSize mask_with_size = {&mask, kSigsetSize}; + SetTimer(timer_duration); + MaybeSave(); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT( + syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &mask_with_size), + SyscallSucceeds()); + EXPECT_TRUE(TimerFired()); + EXPECT_EQ(absl::DurationFromTimespec(timeout), absl::Duration()); +} + +// Verify that signals allowed by the pselect mask (that would otherwise be +// blocked) interrupt pselect. +TEST_F(PselectTest, SignalMaskAllowsSignal) { + absl::Duration duration = absl::Seconds(30); + struct timespec timeout = absl::ToTimespec(duration); + absl::Duration timer_duration = absl::Seconds(10); + + sigset_t mask; + ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + + // Block SIGALRM. + auto cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGALRM)); + + // Call with a mask that unblocks SIGALRM. See that pselect is interrupted. + MaskWithSize mask_with_size = {&mask, kSigsetSize}; + SetTimer(timer_duration); + MaybeSave(); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT( + syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &mask_with_size), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); + EXPECT_GT(absl::DurationFromTimespec(timeout), absl::Duration()); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc new file mode 100644 index 000000000..d3b3b8b02 --- /dev/null +++ b/test/syscalls/linux/ptrace.cc @@ -0,0 +1,948 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <elf.h> +#include <signal.h> +#include <stddef.h> +#include <sys/ptrace.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/user.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Sends sig to the current process with tgkill(2). +// +// glibc's raise(2) may change the signal mask before sending the signal. These +// extra syscalls make tests of syscall, signal interception, etc. difficult to +// write. +void RaiseSignal(int sig) { + pid_t pid = getpid(); + TEST_PCHECK(pid > 0); + pid_t tid = gettid(); + TEST_PCHECK(tid > 0); + TEST_PCHECK(tgkill(pid, tid, sig) == 0); +} + +// Returns the Yama ptrace scope. +PosixErrorOr<int> YamaPtraceScope() { + constexpr char kYamaPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope"; + + ASSIGN_OR_RETURN_ERRNO(bool exists, Exists(kYamaPtraceScopePath)); + if (!exists) { + // File doesn't exist means no Yama, so the scope is disabled -> 0. + return 0; + } + + std::string contents; + RETURN_IF_ERRNO(GetContents(kYamaPtraceScopePath, &contents)); + + int scope; + if (!absl::SimpleAtoi(contents, &scope)) { + return PosixError(EINVAL, absl::StrCat(contents, ": not a valid number")); + } + + return scope; +} + +TEST(PtraceTest, AttachSelf) { + EXPECT_THAT(ptrace(PTRACE_ATTACH, gettid(), 0, 0), + SyscallFailsWithErrno(EPERM)); +} + +TEST(PtraceTest, AttachSameThreadGroup) { + pid_t const tid = gettid(); + ScopedThread([&] { + EXPECT_THAT(ptrace(PTRACE_ATTACH, tid, 0, 0), SyscallFailsWithErrno(EPERM)); + }); +} + +TEST(PtraceTest, AttachParent_PeekData_PokeData_SignalSuppression) { + // Yama prevents attaching to a parent. Skip the test if the scope is anything + // except disabled. + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 0); + + constexpr long kBeforePokeDataValue = 10; + constexpr long kAfterPokeDataValue = 20; + + volatile long word = kBeforePokeDataValue; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Attach to the parent. + pid_t const parent_pid = getppid(); + TEST_PCHECK(ptrace(PTRACE_ATTACH, parent_pid, 0, 0) == 0); + MaybeSave(); + + // Block until the parent enters signal-delivery-stop as a result of the + // SIGSTOP sent by PTRACE_ATTACH. + int status; + TEST_PCHECK(waitpid(parent_pid, &status, 0) == parent_pid); + MaybeSave(); + TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + // Replace the value of word in the parent process with kAfterPokeDataValue. + long const parent_word = ptrace(PTRACE_PEEKDATA, parent_pid, &word, 0); + MaybeSave(); + TEST_CHECK(parent_word == kBeforePokeDataValue); + TEST_PCHECK( + ptrace(PTRACE_POKEDATA, parent_pid, &word, kAfterPokeDataValue) == 0); + MaybeSave(); + + // Detach from the parent and suppress the SIGSTOP. If the SIGSTOP is not + // suppressed, the parent will hang in group-stop, causing the test to time + // out. + TEST_PCHECK(ptrace(PTRACE_DETACH, parent_pid, 0, 0) == 0); + MaybeSave(); + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to complete. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Check that the child's PTRACE_POKEDATA was effective. + EXPECT_EQ(kAfterPokeDataValue, word); +} + +TEST(PtraceTest, GetSigMask) { + // <sys/user.h> doesn't define these until Linux 4.4, even though the features + // were added in 3.11. + constexpr auto kPtraceGetSigMask = static_cast<enum __ptrace_request>(0x420a); + constexpr auto kPtraceSetSigMask = static_cast<enum __ptrace_request>(0x420b); + // glibc and the Linux kernel define a sigset_t with different sizes. To avoid + // creating a kernel_sigset_t and recreating all the modification functions + // (sigemptyset, etc), we just hardcode the kernel sigset size. + constexpr int kSizeofKernelSigset = 8; + constexpr int kBlockSignal = SIGUSR1; + sigset_t blocked; + sigemptyset(&blocked); + sigaddset(&blocked, kBlockSignal); + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Install a signal handler for kBlockSignal to avoid termination and block + // it. + TEST_PCHECK(signal(kBlockSignal, +[](int signo) {}) != SIG_ERR); + MaybeSave(); + TEST_PCHECK(sigprocmask(SIG_SETMASK, &blocked, nullptr) == 0); + MaybeSave(); + + // Enable tracing. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + + // This should be blocked. + RaiseSignal(kBlockSignal); + + // This should be suppressed by parent, who will change signal mask in the + // meantime, which means kBlockSignal should be delivered once this resumes. + RaiseSignal(SIGSTOP); + + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Get current signal mask. + sigset_t set; + EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, kSizeofKernelSigset, &set), + SyscallSucceeds()); + EXPECT_THAT(blocked, EqualsSigset(set)); + + // Try to get current signal mask with bad size argument. + EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, 0, nullptr), + SyscallFailsWithErrno(EINVAL)); + + // Try to set bad signal mask. + sigset_t* bad_addr = reinterpret_cast<sigset_t*>(-1); + EXPECT_THAT( + ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, bad_addr), + SyscallFailsWithErrno(EFAULT)); + + // Set signal mask to empty set. + sigset_t set1; + sigemptyset(&set1); + EXPECT_THAT(ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, &set1), + SyscallSucceeds()); + + // Suppress SIGSTOP and resume the child. It should re-enter + // signal-delivery-stop for kBlockSignal. + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kBlockSignal) + << " status " << status; + + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + // Let's see that process exited normally. + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST(PtraceTest, GetSiginfo_SetSiginfo_SignalInjection) { + constexpr int kOriginalSigno = SIGUSR1; + constexpr int kInjectedSigno = SIGUSR2; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Override all signal handlers. + struct sigaction sa = {}; + sa.sa_handler = +[](int signo) { _exit(signo); }; + TEST_PCHECK(sigfillset(&sa.sa_mask) == 0); + for (int signo = 1; signo < 32; signo++) { + if (signo == SIGKILL || signo == SIGSTOP) { + continue; + } + TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0); + } + for (int signo = SIGRTMIN; signo <= SIGRTMAX; signo++) { + TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0); + } + + // Unblock all signals. + TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0); + MaybeSave(); + + // Send ourselves kOriginalSignal while ptraced and exit with the signal we + // actually receive via the signal handler, if any, or 0 if we don't receive + // a signal. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + RaiseSignal(kOriginalSigno); + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself kOriginalSigno and enter + // signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno) + << " status " << status; + + siginfo_t siginfo = {}; + ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo), + SyscallSucceeds()); + EXPECT_EQ(kOriginalSigno, siginfo.si_signo); + EXPECT_EQ(SI_TKILL, siginfo.si_code); + + // Replace the signal with kInjectedSigno, and check that the child exits + // with kInjectedSigno, indicating that signal injection was successful. + siginfo.si_signo = kInjectedSigno; + ASSERT_THAT(ptrace(PTRACE_SETSIGINFO, child_pid, 0, &siginfo), + SyscallSucceeds()); + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, kInjectedSigno), + SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == kInjectedSigno) + << " status " << status; +} + +TEST(PtraceTest, SIGKILLDoesNotCauseSignalDeliveryStop) { + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + RaiseSignal(SIGKILL); + TEST_CHECK_MSG(false, "Survived SIGKILL?"); + _exit(1); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Expect the child to die to SIGKILL without entering signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +TEST(PtraceTest, PtraceKill) { + constexpr int kOriginalSigno = SIGUSR1; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + + // PTRACE_KILL only works if tracee has entered signal-delivery-stop. + RaiseSignal(kOriginalSigno); + TEST_CHECK_MSG(false, "Failed to kill the process?"); + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself kOriginalSigno and enter + // signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno) + << " status " << status; + + ASSERT_THAT(ptrace(PTRACE_KILL, child_pid, 0, 0), SyscallSucceeds()); + + // Expect the child to die with SIGKILL. + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +TEST(PtraceTest, GetRegSet) { + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Enable tracing. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + + // Use kill explicitly because we check the syscall argument register below. + kill(getpid(), SIGSTOP); + + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Get the general registers. + struct user_regs_struct regs; + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov), + SyscallSucceeds()); + + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + +#ifdef __x86_64__ + // Child called kill(2), with SIGSTOP as arg 2. + EXPECT_EQ(regs.rsi, SIGSTOP); +#endif + + // Suppress SIGSTOP and resume the child. + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + // Let's see that process exited normally. + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST(PtraceTest, AttachingConvertsGroupStopToPtraceStop) { + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + while (true) { + pause(); + } + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // SIGSTOP the child and wait for it to stop. + ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Attach to the child and expect it to re-enter a traced group-stop despite + // already being stopped. + ASSERT_THAT(ptrace(PTRACE_ATTACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Verify that the child is ptrace-stopped by checking that it can receive + // ptrace commands requiring a ptrace-stop. + EXPECT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, 0), SyscallSucceeds()); + + // Group-stop is distinguished from signal-delivery-stop by PTRACE_GETSIGINFO + // failing with EINVAL. + siginfo_t siginfo = {}; + EXPECT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo), + SyscallFailsWithErrno(EINVAL)); + + // Detach from the child and expect it to stay stopped without a notification. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED | WNOHANG), + SyscallSucceedsWithValue(0)); + + // Sending it SIGCONT should cause it to leave its stop. + ASSERT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, WCONTINUED), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFCONTINUED(status)) << " status " << status; + + // Clean up the child. + ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +// Fixture for tests parameterized by whether or not to use PTRACE_O_TRACEEXEC. +class PtraceExecveTest : public ::testing::TestWithParam<bool> { + protected: + bool TraceExec() const { return GetParam(); } +}; + +TEST_P(PtraceExecveTest, Execve_GetRegs_PeekUser_SIGKILL_TraceClone_TraceExit) { + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Enable tracing, then raise SIGSTOP and expect our parent to suppress it. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + RaiseSignal(SIGSTOP); + MaybeSave(); + + // Call execve in a non-leader thread. + ExecveArray const owned_child_argv = {"/proc/self/exe"}; + char* const* const child_argv = owned_child_argv.get(); + ScopedThread t([&] { + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_CHECK_MSG(false, "Survived execve? (thread)"); + }); + t.Join(); + TEST_CHECK_MSG(false, "Survived execve? (main)"); + _exit(1); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Enable PTRACE_O_TRACECLONE so we can get the ID of the child's non-leader + // thread, PTRACE_O_TRACEEXIT so we can observe the leader's death, and + // PTRACE_O_TRACEEXEC if required by the test. (The leader doesn't call + // execve, but options should be inherited across clone.) + long opts = PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT; + if (TraceExec()) { + opts |= PTRACE_O_TRACEEXEC; + } + ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, opts), SyscallSucceeds()); + + // Suppress the SIGSTOP and wait for the child's leader thread to report + // PTRACE_EVENT_CLONE. Get the new thread's ID from the event. + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_CLONE << 8), status >> 8); + unsigned long eventmsg; + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg), + SyscallSucceeds()); + pid_t const nonleader_tid = eventmsg; + pid_t const leader_tid = child_pid; + + // The new thread should be ptraced and in signal-delivery-stop by SIGSTOP due + // to PTRACE_O_TRACECLONE. + // + // Before bf959931ddb88c4e4366e96dd22e68fa0db9527c "wait/ptrace: assume __WALL + // if the child is traced" (4.7) , waiting on it requires __WCLONE since, as a + // non-leader, its termination signal is 0. After, a standard wait is + // sufficient. + ASSERT_THAT(waitpid(nonleader_tid, &status, __WCLONE), + SyscallSucceedsWithValue(nonleader_tid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Resume both child threads. + for (pid_t const tid : {leader_tid, nonleader_tid}) { + ASSERT_THAT(ptrace(PTRACE_CONT, tid, 0, 0), SyscallSucceeds()); + } + + // The non-leader child thread should call execve, causing the leader thread + // to enter PTRACE_EVENT_EXIT with an apparent exit code of 0. At this point, + // the leader has not yet exited, so the non-leader should be blocked in + // execve. + ASSERT_THAT(waitpid(leader_tid, &status, 0), + SyscallSucceedsWithValue(leader_tid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8); + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg), + SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(eventmsg) && WEXITSTATUS(eventmsg) == 0) + << " eventmsg " << eventmsg; + EXPECT_THAT(waitpid(nonleader_tid, &status, __WCLONE | WNOHANG), + SyscallSucceedsWithValue(0)); + + // Allow the leader to continue exiting. This should allow the non-leader to + // complete its execve, causing the original leader to be reaped without + // further notice and the non-leader to steal its ID. + ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(leader_tid, &status, 0), + SyscallSucceedsWithValue(leader_tid)); + if (TraceExec()) { + // If PTRACE_O_TRACEEXEC was enabled, the execing thread should be in + // PTRACE_EVENT_EXEC-stop, with the event message set to its old thread ID. + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXEC << 8), status >> 8); + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg), + SyscallSucceeds()); + EXPECT_EQ(nonleader_tid, eventmsg); + } else { + // Otherwise, the execing thread should have received SIGTRAP and should now + // be in signal-delivery-stop. + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << " status " << status; + } + +#ifdef __x86_64__ + { + // CS should be 0x33, indicating an 64-bit binary. + constexpr uint64_t kAMD64UserCS = 0x33; + EXPECT_THAT(ptrace(PTRACE_PEEKUSER, leader_tid, + offsetof(struct user_regs_struct, cs), 0), + SyscallSucceedsWithValue(kAMD64UserCS)); + struct user_regs_struct regs = {}; + ASSERT_THAT(ptrace(PTRACE_GETREGS, leader_tid, 0, ®s), + SyscallSucceeds()); + EXPECT_EQ(kAMD64UserCS, regs.cs); + } +#endif // defined(__x86_64__) + + // PTRACE_O_TRACEEXIT should have been inherited across execve. Send SIGKILL, + // which should end the PTRACE_EVENT_EXEC-stop or signal-delivery-stop and + // leave the child in PTRACE_EVENT_EXIT-stop. + ASSERT_THAT(kill(leader_tid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(leader_tid, &status, 0), + SyscallSucceedsWithValue(leader_tid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8); + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg), + SyscallSucceeds()); + EXPECT_TRUE(WIFSIGNALED(eventmsg) && WTERMSIG(eventmsg) == SIGKILL) + << " eventmsg " << eventmsg; + + // End the PTRACE_EVENT_EXIT stop, allowing the child to exit. + ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(leader_tid, &status, 0), + SyscallSucceedsWithValue(leader_tid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +INSTANTIATE_TEST_CASE_P(TraceExec, PtraceExecveTest, ::testing::Bool()); + +// This test has expectations on when syscall-enter/exit-stops occur that are +// violated if saving occurs, since saving interrupts all syscalls, causing +// premature syscall-exit. +TEST(PtraceTest, + ExitWhenParentIsNotTracer_Syscall_TraceVfork_TraceVforkDone_NoRandomSave) { + constexpr int kExitTraceeExitCode = 99; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Block SIGCHLD so it doesn't interrupt wait4. + sigset_t mask; + TEST_PCHECK(sigemptyset(&mask) == 0); + TEST_PCHECK(sigaddset(&mask, SIGCHLD) == 0); + TEST_PCHECK(sigprocmask(SIG_SETMASK, &mask, nullptr) == 0); + MaybeSave(); + + // Enable tracing, then raise SIGSTOP and expect our parent to suppress it. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + RaiseSignal(SIGSTOP); + MaybeSave(); + + // Spawn a vfork child that exits immediately, and reap it. Don't save + // after vfork since the parent expects to see wait4 as the next syscall. + pid_t const pid = vfork(); + if (pid == 0) { + _exit(kExitTraceeExitCode); + } + TEST_PCHECK_MSG(pid > 0, "vfork failed"); + + int status; + TEST_PCHECK(wait4(pid, &status, 0, nullptr) > 0); + MaybeSave(); + TEST_CHECK(WIFEXITED(status) && WEXITSTATUS(status) == kExitTraceeExitCode); + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(child_pid, SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Enable PTRACE_O_TRACEVFORK so we can get the ID of the grandchild, + // PTRACE_O_TRACEVFORKDONE so we can observe PTRACE_EVENT_VFORK_DONE, and + // PTRACE_O_TRACESYSGOOD so syscall-enter/exit-stops are unambiguously + // indicated by a stop signal of SIGTRAP|0x80 rather than just SIGTRAP. + ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, + PTRACE_O_TRACEVFORK | PTRACE_O_TRACEVFORKDONE | + PTRACE_O_TRACESYSGOOD), + SyscallSucceeds()); + + // Suppress the SIGSTOP and wait for the child to report PTRACE_EVENT_VFORK. + // Get the new process' ID from the event. + ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK << 8), status >> 8); + unsigned long eventmsg; + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg), + SyscallSucceeds()); + pid_t const grandchild_pid = eventmsg; + + // The grandchild should be traced by us and in signal-delivery-stop by + // SIGSTOP due to PTRACE_O_TRACEVFORK. This allows us to wait on it even + // though we're not its parent. + ASSERT_THAT(waitpid(grandchild_pid, &status, 0), + SyscallSucceedsWithValue(grandchild_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Resume the child with PTRACE_SYSCALL. Since the grandchild is still in + // signal-delivery-stop, the child should remain in vfork() waiting for the + // grandchild to exec or exit. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); + ASSERT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Suppress the grandchild's SIGSTOP and wait for the grandchild to exit. Pass + // WNOWAIT to waitid() so that we don't acknowledge the grandchild's exit yet. + ASSERT_THAT(ptrace(PTRACE_CONT, grandchild_pid, 0, 0), SyscallSucceeds()); + siginfo_t siginfo = {}; + ASSERT_THAT(waitid(P_PID, grandchild_pid, &siginfo, WEXITED | WNOWAIT), + SyscallSucceeds()); + EXPECT_EQ(SIGCHLD, siginfo.si_signo); + EXPECT_EQ(CLD_EXITED, siginfo.si_code); + EXPECT_EQ(kExitTraceeExitCode, siginfo.si_status); + EXPECT_EQ(grandchild_pid, siginfo.si_pid); + EXPECT_EQ(getuid(), siginfo.si_uid); + + // The child should now be in PTRACE_EVENT_VFORK_DONE stop. The event + // message should still be the grandchild's PID. + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK_DONE << 8), status >> 8); + ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg), + SyscallSucceeds()); + EXPECT_EQ(grandchild_pid, eventmsg); + + // Resume the child with PTRACE_SYSCALL again and expect it to enter + // syscall-exit-stop for vfork() or clone(), either of which should return the + // grandchild's PID from the syscall. Aside from PTRACE_O_TRACESYSGOOD, + // syscall-stops are distinguished from signal-delivery-stop by + // PTRACE_GETSIGINFO returning a siginfo for which si_code == SIGTRAP or + // SIGTRAP|0x80. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) + << " status " << status; + ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo), + SyscallSucceeds()); + EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80)) + << "si_code = " << siginfo.si_code; +#ifdef __x86_64__ + { + struct user_regs_struct regs = {}; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + EXPECT_TRUE(regs.orig_rax == SYS_vfork || regs.orig_rax == SYS_clone) + << "orig_rax = " << regs.orig_rax; + EXPECT_EQ(grandchild_pid, regs.rax); + } +#endif // defined(__x86_64__) + + // After this point, the child will be making wait4 syscalls that will be + // interrupted by saving, so saving is not permitted. Note that this is + // explicitly released below once the grandchild exits. + DisableSave ds; + + // Resume the child with PTRACE_SYSCALL again and expect it to enter + // syscall-enter-stop for wait4(). + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) + << " status " << status; + ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo), + SyscallSucceeds()); + EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80)) + << "si_code = " << siginfo.si_code; +#ifdef __x86_64__ + { + EXPECT_THAT(ptrace(PTRACE_PEEKUSER, child_pid, + offsetof(struct user_regs_struct, orig_rax), 0), + SyscallSucceedsWithValue(SYS_wait4)); + } +#endif // defined(__x86_64__) + + // Resume the child with PTRACE_SYSCALL again. Since the grandchild is + // waiting for the tracer (us) to acknowledge its exit first, wait4 should + // block. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); + ASSERT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Acknowledge the grandchild's exit. + ASSERT_THAT(waitpid(grandchild_pid, &status, 0), + SyscallSucceedsWithValue(grandchild_pid)); + ds.reset(); + + // Now the child should enter syscall-exit-stop for wait4, returning with the + // grandchild's PID. + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) + << " status " << status; +#ifdef __x86_64__ + { + struct user_regs_struct regs = {}; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + EXPECT_EQ(SYS_wait4, regs.orig_rax); + EXPECT_EQ(grandchild_pid, regs.rax); + } +#endif // defined(__x86_64__) + + // Detach from the child and wait for it to exit. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +// These tests requires knowledge of architecture-specific syscall convention. +#ifdef __x86_64__ +TEST(PtraceTest, Sysemu_PokeUser) { + constexpr int kSysemuHelperFirstExitCode = 126; + constexpr uint64_t kSysemuInjectedExitGroupReturn = 42; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Enable tracing, then raise SIGSTOP and expect our parent to suppress it. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + RaiseSignal(SIGSTOP); + + // Try to exit_group, expecting the tracer to skip the syscall and set its + // own return value. + int const rv = syscall(SYS_exit_group, kSysemuHelperFirstExitCode); + TEST_PCHECK_MSG(rv == kSysemuInjectedExitGroupReturn, + "exit_group returned incorrect value"); + + _exit(0); + } + // In parent process. + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop + // for its first exit_group syscall. glibc doesn't necessarily define + // PTRACE_SYSEMU. + constexpr auto kPtraceSysemu = static_cast<__ptrace_request>(31); + ASSERT_THAT(ptrace(kPtraceSysemu, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << " status " << status; + + struct user_regs_struct regs = {}; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + EXPECT_EQ(SYS_exit_group, regs.orig_rax); + EXPECT_EQ(-ENOSYS, regs.rax); + EXPECT_EQ(kSysemuHelperFirstExitCode, regs.rdi); + + // Replace the exit_group return value, then resume the child, which should + // automatically skip the syscall. + ASSERT_THAT( + ptrace(PTRACE_POKEUSER, child_pid, offsetof(struct user_regs_struct, rax), + kSysemuInjectedExitGroupReturn), + SyscallSucceeds()); + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + + // The child should validate the injected return value and then exit normally. + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +// This test also cares about syscall-exit-stop. +TEST(PtraceTest, ERESTART_NoRandomSave) { + constexpr int kSigno = SIGUSR1; + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + + // Ignore, but unblock, kSigno. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + TEST_PCHECK(sigfillset(&sa.sa_mask) == 0); + TEST_PCHECK(sigaction(kSigno, &sa, nullptr) == 0); + MaybeSave(); + TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0); + MaybeSave(); + + // Enable tracing, then raise SIGSTOP and expect our parent to suppress it. + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + RaiseSignal(SIGSTOP); + + // Invoke the pause syscall, which normally should not return until we + // receive a signal that "either terminates the process or causes the + // invocation of a signal-catching function". + pause(); + + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + + // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) + << " status " << status; + + // After this point, the child's pause syscall will be interrupted by saving, + // so saving is not permitted. Note that this is explicitly released below + // once the child is stopped. + DisableSave ds; + + // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop for + // its pause syscall. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << " status " << status; + + struct user_regs_struct regs = {}; + ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + EXPECT_EQ(SYS_pause, regs.orig_rax); + EXPECT_EQ(-ENOSYS, regs.rax); + + // Resume the child with PTRACE_SYSCALL and expect it to block in the pause + // syscall. + ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); + ASSERT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Send the child kSigno, causing it to return ERESTARTNOHAND and enter + // syscall-exit-stop from the pause syscall. + constexpr int ERESTARTNOHAND = 514; + ASSERT_THAT(kill(child_pid, kSigno), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) + << " status " << status; + ds.reset(); + + ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + EXPECT_EQ(SYS_pause, regs.orig_rax); + EXPECT_EQ(-ERESTARTNOHAND, regs.rax); + + // Replace the return value from pause with 0, causing pause to not be + // restarted despite kSigno being ignored. + ASSERT_THAT(ptrace(PTRACE_POKEUSER, child_pid, + offsetof(struct user_regs_struct, rax), 0), + SyscallSucceeds()); + + // Detach from the child and wait for it to exit. + ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} +#endif // defined(__x86_64__) + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc new file mode 100644 index 000000000..253aa26ba --- /dev/null +++ b/test/syscalls/linux/pty.cc @@ -0,0 +1,1230 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <linux/major.h> +#include <poll.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <termios.h> +#include <unistd.h> +#include <iostream> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/strings/str_cat.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::AnyOf; +using ::testing::Contains; +using ::testing::Eq; +using ::testing::Not; + +// Tests Unix98 pseudoterminals. +// +// These tests assume that /dev/ptmx exists and is associated with a devpts +// filesystem mounted at /dev/pts/. While a Linux distribution could +// theoretically place those anywhere, glibc expects those locations, so they +// are effectively fixed. + +// Minor device number for an unopened ptmx file. +constexpr int kPtmxMinor = 2; + +// The timeout when polling for data from a pty. When data is written to one end +// of a pty, Linux asynchronously makes it available to the other end, so we +// have to wait. +constexpr absl::Duration kTimeout = absl::Seconds(20); + +// The maximum line size in bytes returned per read from a pty file. +constexpr int kMaxLineSize = 4096; + +// glibc defines its own, different, version of struct termios. We care about +// what the kernel does, not glibc. +#define KERNEL_NCCS 19 +struct kernel_termios { + tcflag_t c_iflag; + tcflag_t c_oflag; + tcflag_t c_cflag; + tcflag_t c_lflag; + cc_t c_line; + cc_t c_cc[KERNEL_NCCS]; +}; + +bool operator==(struct kernel_termios const& a, + struct kernel_termios const& b) { + return memcmp(&a, &b, sizeof(a)) == 0; +} + +// Returns the termios-style control character for the passed character. +// +// e.g., for Ctrl-C, i.e., ^C, call ControlCharacter('C'). +// +// Standard control characters are ASCII bytes 0 through 31. +constexpr char ControlCharacter(char c) { + // A is 1, B is 2, etc. + return c - 'A' + 1; +} + +// Returns the printable character the given control character represents. +constexpr char FromControlCharacter(char c) { return c + 'A' - 1; } + +// Returns true if c is a control character. +// +// Standard control characters are ASCII bytes 0 through 31. +constexpr bool IsControlCharacter(char c) { return c <= 31; } + +struct Field { + const char* name; + uint64_t mask; + uint64_t value; +}; + +// ParseFields returns a std::string representation of value, using the names in +// fields. +std::string ParseFields(const Field* fields, size_t len, uint64_t value) { + bool first = true; + std::string s; + for (size_t i = 0; i < len; i++) { + const Field f = fields[i]; + if ((value & f.mask) == f.value) { + if (!first) { + s += "|"; + } + s += f.name; + first = false; + value &= ~f.mask; + } + } + + if (value) { + if (!first) { + s += "|"; + } + absl::StrAppend(&s, value); + } + + return s; +} + +const Field kIflagFields[] = { + {"IGNBRK", IGNBRK, IGNBRK}, {"BRKINT", BRKINT, BRKINT}, + {"IGNPAR", IGNPAR, IGNPAR}, {"PARMRK", PARMRK, PARMRK}, + {"INPCK", INPCK, INPCK}, {"ISTRIP", ISTRIP, ISTRIP}, + {"INLCR", INLCR, INLCR}, {"IGNCR", IGNCR, IGNCR}, + {"ICRNL", ICRNL, ICRNL}, {"IUCLC", IUCLC, IUCLC}, + {"IXON", IXON, IXON}, {"IXANY", IXANY, IXANY}, + {"IXOFF", IXOFF, IXOFF}, {"IMAXBEL", IMAXBEL, IMAXBEL}, + {"IUTF8", IUTF8, IUTF8}, +}; + +const Field kOflagFields[] = { + {"OPOST", OPOST, OPOST}, {"OLCUC", OLCUC, OLCUC}, + {"ONLCR", ONLCR, ONLCR}, {"OCRNL", OCRNL, OCRNL}, + {"ONOCR", ONOCR, ONOCR}, {"ONLRET", ONLRET, ONLRET}, + {"OFILL", OFILL, OFILL}, {"OFDEL", OFDEL, OFDEL}, + {"NL0", NLDLY, NL0}, {"NL1", NLDLY, NL1}, + {"CR0", CRDLY, CR0}, {"CR1", CRDLY, CR1}, + {"CR2", CRDLY, CR2}, {"CR3", CRDLY, CR3}, + {"TAB0", TABDLY, TAB0}, {"TAB1", TABDLY, TAB1}, + {"TAB2", TABDLY, TAB2}, {"TAB3", TABDLY, TAB3}, + {"BS0", BSDLY, BS0}, {"BS1", BSDLY, BS1}, + {"FF0", FFDLY, FF0}, {"FF1", FFDLY, FF1}, + {"VT0", VTDLY, VT0}, {"VT1", VTDLY, VT1}, + {"XTABS", XTABS, XTABS}, +}; + +#ifndef IBSHIFT +// Shift from CBAUD to CIBAUD. +#define IBSHIFT 16 +#endif + +const Field kCflagFields[] = { + {"B0", CBAUD, B0}, + {"B50", CBAUD, B50}, + {"B75", CBAUD, B75}, + {"B110", CBAUD, B110}, + {"B134", CBAUD, B134}, + {"B150", CBAUD, B150}, + {"B200", CBAUD, B200}, + {"B300", CBAUD, B300}, + {"B600", CBAUD, B600}, + {"B1200", CBAUD, B1200}, + {"B1800", CBAUD, B1800}, + {"B2400", CBAUD, B2400}, + {"B4800", CBAUD, B4800}, + {"B9600", CBAUD, B9600}, + {"B19200", CBAUD, B19200}, + {"B38400", CBAUD, B38400}, + {"CS5", CSIZE, CS5}, + {"CS6", CSIZE, CS6}, + {"CS7", CSIZE, CS7}, + {"CS8", CSIZE, CS8}, + {"CSTOPB", CSTOPB, CSTOPB}, + {"CREAD", CREAD, CREAD}, + {"PARENB", PARENB, PARENB}, + {"PARODD", PARODD, PARODD}, + {"HUPCL", HUPCL, HUPCL}, + {"CLOCAL", CLOCAL, CLOCAL}, + {"B57600", CBAUD, B57600}, + {"B115200", CBAUD, B115200}, + {"B230400", CBAUD, B230400}, + {"B460800", CBAUD, B460800}, + {"B500000", CBAUD, B500000}, + {"B576000", CBAUD, B576000}, + {"B921600", CBAUD, B921600}, + {"B1000000", CBAUD, B1000000}, + {"B1152000", CBAUD, B1152000}, + {"B1500000", CBAUD, B1500000}, + {"B2000000", CBAUD, B2000000}, + {"B2500000", CBAUD, B2500000}, + {"B3000000", CBAUD, B3000000}, + {"B3500000", CBAUD, B3500000}, + {"B4000000", CBAUD, B4000000}, + {"CMSPAR", CMSPAR, CMSPAR}, + {"CRTSCTS", CRTSCTS, CRTSCTS}, + {"IB0", CIBAUD, B0 << IBSHIFT}, + {"IB50", CIBAUD, B50 << IBSHIFT}, + {"IB75", CIBAUD, B75 << IBSHIFT}, + {"IB110", CIBAUD, B110 << IBSHIFT}, + {"IB134", CIBAUD, B134 << IBSHIFT}, + {"IB150", CIBAUD, B150 << IBSHIFT}, + {"IB200", CIBAUD, B200 << IBSHIFT}, + {"IB300", CIBAUD, B300 << IBSHIFT}, + {"IB600", CIBAUD, B600 << IBSHIFT}, + {"IB1200", CIBAUD, B1200 << IBSHIFT}, + {"IB1800", CIBAUD, B1800 << IBSHIFT}, + {"IB2400", CIBAUD, B2400 << IBSHIFT}, + {"IB4800", CIBAUD, B4800 << IBSHIFT}, + {"IB9600", CIBAUD, B9600 << IBSHIFT}, + {"IB19200", CIBAUD, B19200 << IBSHIFT}, + {"IB38400", CIBAUD, B38400 << IBSHIFT}, + {"IB57600", CIBAUD, B57600 << IBSHIFT}, + {"IB115200", CIBAUD, B115200 << IBSHIFT}, + {"IB230400", CIBAUD, B230400 << IBSHIFT}, + {"IB460800", CIBAUD, B460800 << IBSHIFT}, + {"IB500000", CIBAUD, B500000 << IBSHIFT}, + {"IB576000", CIBAUD, B576000 << IBSHIFT}, + {"IB921600", CIBAUD, B921600 << IBSHIFT}, + {"IB1000000", CIBAUD, B1000000 << IBSHIFT}, + {"IB1152000", CIBAUD, B1152000 << IBSHIFT}, + {"IB1500000", CIBAUD, B1500000 << IBSHIFT}, + {"IB2000000", CIBAUD, B2000000 << IBSHIFT}, + {"IB2500000", CIBAUD, B2500000 << IBSHIFT}, + {"IB3000000", CIBAUD, B3000000 << IBSHIFT}, + {"IB3500000", CIBAUD, B3500000 << IBSHIFT}, + {"IB4000000", CIBAUD, B4000000 << IBSHIFT}, +}; + +const Field kLflagFields[] = { + {"ISIG", ISIG, ISIG}, {"ICANON", ICANON, ICANON}, + {"XCASE", XCASE, XCASE}, {"ECHO", ECHO, ECHO}, + {"ECHOE", ECHOE, ECHOE}, {"ECHOK", ECHOK, ECHOK}, + {"ECHONL", ECHONL, ECHONL}, {"NOFLSH", NOFLSH, NOFLSH}, + {"TOSTOP", TOSTOP, TOSTOP}, {"ECHOCTL", ECHOCTL, ECHOCTL}, + {"ECHOPRT", ECHOPRT, ECHOPRT}, {"ECHOKE", ECHOKE, ECHOKE}, + {"FLUSHO", FLUSHO, FLUSHO}, {"PENDIN", PENDIN, PENDIN}, + {"IEXTEN", IEXTEN, IEXTEN}, {"EXTPROC", EXTPROC, EXTPROC}, +}; + +std::string FormatCC(char c) { + if (isgraph(c)) { + return std::string(1, c); + } else if (c == ' ') { + return " "; + } else if (c == '\t') { + return "\\t"; + } else if (c == '\r') { + return "\\r"; + } else if (c == '\n') { + return "\\n"; + } else if (c == '\0') { + return "\\0"; + } else if (IsControlCharacter(c)) { + return absl::StrCat("^", std::string(1, FromControlCharacter(c))); + } + return absl::StrCat("\\x", absl::Hex(c)); +} + +std::ostream& operator<<(std::ostream& os, struct kernel_termios const& a) { + os << "{ c_iflag = " + << ParseFields(kIflagFields, ABSL_ARRAYSIZE(kIflagFields), a.c_iflag); + os << ", c_oflag = " + << ParseFields(kOflagFields, ABSL_ARRAYSIZE(kOflagFields), a.c_oflag); + os << ", c_cflag = " + << ParseFields(kCflagFields, ABSL_ARRAYSIZE(kCflagFields), a.c_cflag); + os << ", c_lflag = " + << ParseFields(kLflagFields, ABSL_ARRAYSIZE(kLflagFields), a.c_lflag); + os << ", c_line = " << a.c_line; + os << ", c_cc = { [VINTR] = '" << FormatCC(a.c_cc[VINTR]); + os << "', [VQUIT] = '" << FormatCC(a.c_cc[VQUIT]); + os << "', [VERASE] = '" << FormatCC(a.c_cc[VERASE]); + os << "', [VKILL] = '" << FormatCC(a.c_cc[VKILL]); + os << "', [VEOF] = '" << FormatCC(a.c_cc[VEOF]); + os << "', [VTIME] = '" << static_cast<int>(a.c_cc[VTIME]); + os << "', [VMIN] = " << static_cast<int>(a.c_cc[VMIN]); + os << ", [VSWTC] = '" << FormatCC(a.c_cc[VSWTC]); + os << "', [VSTART] = '" << FormatCC(a.c_cc[VSTART]); + os << "', [VSTOP] = '" << FormatCC(a.c_cc[VSTOP]); + os << "', [VSUSP] = '" << FormatCC(a.c_cc[VSUSP]); + os << "', [VEOL] = '" << FormatCC(a.c_cc[VEOL]); + os << "', [VREPRINT] = '" << FormatCC(a.c_cc[VREPRINT]); + os << "', [VDISCARD] = '" << FormatCC(a.c_cc[VDISCARD]); + os << "', [VWERASE] = '" << FormatCC(a.c_cc[VWERASE]); + os << "', [VLNEXT] = '" << FormatCC(a.c_cc[VLNEXT]); + os << "', [VEOL2] = '" << FormatCC(a.c_cc[VEOL2]); + os << "'}"; + return os; +} + +// Return the default termios settings for a new terminal. +struct kernel_termios DefaultTermios() { + struct kernel_termios t = {}; + t.c_iflag = IXON | ICRNL; + t.c_oflag = OPOST | ONLCR; + t.c_cflag = B38400 | CSIZE | CS8 | CREAD; + t.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN; + t.c_line = 0; + t.c_cc[VINTR] = ControlCharacter('C'); + t.c_cc[VQUIT] = ControlCharacter('\\'); + t.c_cc[VERASE] = '\x7f'; + t.c_cc[VKILL] = ControlCharacter('U'); + t.c_cc[VEOF] = ControlCharacter('D'); + t.c_cc[VTIME] = '\0'; + t.c_cc[VMIN] = 1; + t.c_cc[VSWTC] = '\0'; + t.c_cc[VSTART] = ControlCharacter('Q'); + t.c_cc[VSTOP] = ControlCharacter('S'); + t.c_cc[VSUSP] = ControlCharacter('Z'); + t.c_cc[VEOL] = '\0'; + t.c_cc[VREPRINT] = ControlCharacter('R'); + t.c_cc[VDISCARD] = ControlCharacter('O'); + t.c_cc[VWERASE] = ControlCharacter('W'); + t.c_cc[VLNEXT] = ControlCharacter('V'); + t.c_cc[VEOL2] = '\0'; + return t; +} + +// PollAndReadFd tries to read count bytes from buf within timeout. +// +// Returns a partial read if some bytes were read. +// +// fd must be non-blocking. +PosixErrorOr<size_t> PollAndReadFd(int fd, void* buf, size_t count, + absl::Duration timeout) { + absl::Time end = absl::Now() + timeout; + + size_t completed = 0; + absl::Duration remaining; + while ((remaining = end - absl::Now()) > absl::ZeroDuration()) { + struct pollfd pfd = {fd, POLLIN, 0}; + int ret = RetryEINTR(poll)(&pfd, 1, absl::ToInt64Milliseconds(remaining)); + if (ret < 0) { + return PosixError(errno, "poll failed"); + } else if (ret == 0) { + // Timed out. + continue; + } else if (ret != 1) { + return PosixError(EINVAL, absl::StrCat("Bad poll ret ", ret)); + } + + ssize_t n = + ReadFd(fd, static_cast<char*>(buf) + completed, count - completed); + if (n < 0) { + return PosixError(errno, "read failed"); + } + completed += n; + if (completed >= count) { + return completed; + } + } + + if (completed) { + return completed; + } + return PosixError(ETIMEDOUT, "Poll timed out"); +} + +// Opens the slave end of the passed master as R/W and nonblocking. +PosixErrorOr<FileDescriptor> OpenSlave(const FileDescriptor& master) { + // Get pty index. + int n; + int ret = ioctl(master.get(), TIOCGPTN, &n); + if (ret < 0) { + return PosixError(errno, "ioctl(TIOCGPTN) failed"); + } + + // Unlock pts. + int unlock = 0; + ret = ioctl(master.get(), TIOCSPTLCK, &unlock); + if (ret < 0) { + return PosixError(errno, "ioctl(TIOSPTLCK) failed"); + } + + return Open(absl::StrCat("/dev/pts/", n), O_RDWR | O_NONBLOCK); +} + +TEST(BasicPtyTest, StatUnopenedMaster) { + struct stat s; + ASSERT_THAT(stat("/dev/ptmx", &s), SyscallSucceeds()); + + EXPECT_EQ(s.st_rdev, makedev(TTYAUX_MAJOR, kPtmxMinor)); + EXPECT_EQ(s.st_size, 0); + EXPECT_EQ(s.st_blocks, 0); + + // ptmx attached to a specific devpts mount uses block size 1024. See + // fs/devpts/inode.c:devpts_fill_super. + // + // The global ptmx device uses the block size of the filesystem it is created + // on (which is usually 4096 for disk filesystems). + EXPECT_THAT(s.st_blksize, AnyOf(Eq(1024), Eq(4096))); +} + +// Waits for count bytes to be readable from fd. Unlike poll, which can return +// before all data is moved into a pty's read buffer, this function waits for +// all count bytes to become readable. +PosixErrorOr<int> WaitUntilReceived(int fd, int count) { + int buffered = -1; + absl::Duration remaining; + absl::Time end = absl::Now() + kTimeout; + while ((remaining = end - absl::Now()) > absl::ZeroDuration()) { + if (ioctl(fd, FIONREAD, &buffered) < 0) { + return PosixError(errno, "failed FIONREAD ioctl"); + } + if (buffered >= count) { + return buffered; + } + absl::SleepFor(absl::Milliseconds(500)); + } + return PosixError( + ETIMEDOUT, + absl::StrFormat( + "FIONREAD timed out, receiving only %d of %d expected bytes", + buffered, count)); +} + +// Verifies that there is nothing left to read from fd. +void ExpectFinished(const FileDescriptor& fd) { + // Nothing more to read. + char c; + EXPECT_THAT(ReadFd(fd.get(), &c, 1), SyscallFailsWithErrno(EAGAIN)); +} + +// Verifies that we can read expected bytes from fd into buf. +void ExpectReadable(const FileDescriptor& fd, int expected, char* buf) { + size_t n = ASSERT_NO_ERRNO_AND_VALUE( + PollAndReadFd(fd.get(), buf, expected, kTimeout)); + EXPECT_EQ(expected, n); +} + +TEST(BasicPtyTest, OpenMasterSlave) { + FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master)); +} + +// The slave entry in /dev/pts/ disappears when the master is closed, even if +// the slave is still open. +TEST(BasicPtyTest, SlaveEntryGoneAfterMasterClose) { + FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master)); + + // Get pty index. + int index = -1; + ASSERT_THAT(ioctl(master.get(), TIOCGPTN, &index), SyscallSucceeds()); + + std::string path = absl::StrCat("/dev/pts/", index); + + struct stat st; + EXPECT_THAT(stat(path.c_str(), &st), SyscallSucceeds()); + + master.reset(); + + EXPECT_THAT(stat(path.c_str(), &st), SyscallFailsWithErrno(ENOENT)); +} + +TEST(BasicPtyTest, Getdents) { + FileDescriptor master1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + int index1 = -1; + ASSERT_THAT(ioctl(master1.get(), TIOCGPTN, &index1), SyscallSucceeds()); + FileDescriptor slave1 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master1)); + + FileDescriptor master2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR)); + int index2 = -1; + ASSERT_THAT(ioctl(master2.get(), TIOCGPTN, &index2), SyscallSucceeds()); + FileDescriptor slave2 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master2)); + + // The directory contains ptmx, index1, and index2. (Plus any additional PTYs + // unrelated to this test.) + + std::vector<std::string> contents = + ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true)); + EXPECT_THAT(contents, Contains(absl::StrCat(index1))); + EXPECT_THAT(contents, Contains(absl::StrCat(index2))); + + master2.reset(); + + // The directory contains ptmx and index1, but not index2 since the master is + // closed. (Plus any additional PTYs unrelated to this test.) + + contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true)); + EXPECT_THAT(contents, Contains(absl::StrCat(index1))); + EXPECT_THAT(contents, Not(Contains(absl::StrCat(index2)))); + + // N.B. devpts supports legacy "single-instance" mode and new "multi-instance" + // mode. In legacy mode, devpts does not contain a "ptmx" device (the distro + // must use mknod to create it somewhere, presumably /dev/ptmx). + // Multi-instance mode does include a "ptmx" device tied to that mount. + // + // We don't check for the presence or absence of "ptmx", as distros vary in + // their usage of the two modes. +} + +class PtyTest : public ::testing::Test { + protected: + void SetUp() override { + master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK)); + slave_ = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master_)); + } + + void DisableCanonical() { + struct kernel_termios t = {}; + EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds()); + t.c_lflag &= ~ICANON; + EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + } + + void EnableCanonical() { + struct kernel_termios t = {}; + EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds()); + t.c_lflag |= ICANON; + EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + } + + // Master and slave ends of the PTY. Non-blocking. + FileDescriptor master_; + FileDescriptor slave_; +}; + +// Master to slave sanity test. +TEST_F(PtyTest, WriteMasterToSlave) { + // N.B. by default, the slave reads nothing until the master writes a newline. + constexpr char kBuf[] = "hello\n"; + + EXPECT_THAT(WriteFd(master_.get(), kBuf, sizeof(kBuf) - 1), + SyscallSucceedsWithValue(sizeof(kBuf) - 1)); + + // Linux moves data from the master to the slave via async work scheduled via + // tty_flip_buffer_push. Since it is asynchronous, the data may not be + // available for reading immediately. Instead we must poll and assert that it + // becomes available "soon". + + char buf[sizeof(kBuf)] = {}; + ExpectReadable(slave_, sizeof(buf) - 1, buf); + + EXPECT_EQ(memcmp(buf, kBuf, sizeof(kBuf)), 0); +} + +// Slave to master sanity test. +TEST_F(PtyTest, WriteSlaveToMaster) { + // N.B. by default, the master reads nothing until the slave writes a newline, + // and the master gets a carriage return. + constexpr char kInput[] = "hello\n"; + constexpr char kExpected[] = "hello\r\n"; + + EXPECT_THAT(WriteFd(slave_.get(), kInput, sizeof(kInput) - 1), + SyscallSucceedsWithValue(sizeof(kInput) - 1)); + + // Linux moves data from the master to the slave via async work scheduled via + // tty_flip_buffer_push. Since it is asynchronous, the data may not be + // available for reading immediately. Instead we must poll and assert that it + // becomes available "soon". + + char buf[sizeof(kExpected)] = {}; + ExpectReadable(master_, sizeof(buf) - 1, buf); + + EXPECT_EQ(memcmp(buf, kExpected, sizeof(kExpected)), 0); +} + +// Both the master and slave report the standard default termios settings. +// +// Note that TCGETS on the master actually redirects to the slave (see comment +// on MasterTermiosUnchangable). +TEST_F(PtyTest, DefaultTermios) { + struct kernel_termios t = {}; + EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds()); + EXPECT_EQ(t, DefaultTermios()); + + EXPECT_THAT(ioctl(master_.get(), TCGETS, &t), SyscallSucceeds()); + EXPECT_EQ(t, DefaultTermios()); +} + +// Changing termios from the master actually affects the slave. +// +// TCSETS on the master actually redirects to the slave (see comment on +// MasterTermiosUnchangable). +TEST_F(PtyTest, TermiosAffectsSlave) { + struct kernel_termios master_termios = {}; + EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds()); + master_termios.c_lflag ^= ICANON; + EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds()); + + struct kernel_termios slave_termios = {}; + EXPECT_THAT(ioctl(slave_.get(), TCGETS, &slave_termios), SyscallSucceeds()); + EXPECT_EQ(master_termios, slave_termios); +} + +// The master end of the pty has termios: +// +// struct kernel_termios t = { +// .c_iflag = 0; +// .c_oflag = 0; +// .c_cflag = B38400 | CS8 | CREAD; +// .c_lflag = 0; +// .c_cc = /* same as DefaultTermios */ +// } +// +// (From drivers/tty/pty.c:unix98_pty_init) +// +// All termios control ioctls on the master actually redirect to the slave +// (drivers/tty/tty_ioctl.c:tty_mode_ioctl), making it impossible to change the +// master termios. +// +// Verify this by setting ICRNL (which rewrites input \r to \n) and verify that +// it has no effect on the master. +TEST_F(PtyTest, MasterTermiosUnchangable) { + char c = '\r'; + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + ExpectReadable(master_, 1, &c); + EXPECT_EQ(c, '\r'); // ICRNL had no effect! + + ExpectFinished(master_); +} + +// ICRNL rewrites input \r to \n. +TEST_F(PtyTest, TermiosICRNL) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= ICRNL; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + char c = '\r'; + ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + ExpectReadable(slave_, 1, &c); + EXPECT_EQ(c, '\n'); + + ExpectFinished(slave_); +} + +// ONLCR rewrites output \n to \r\n. +TEST_F(PtyTest, TermiosONLCR) { + struct kernel_termios t = DefaultTermios(); + t.c_oflag |= ONLCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + char c = '\n'; + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + // Extra byte for NUL for EXPECT_STREQ. + char buf[3] = {}; + ExpectReadable(master_, 2, buf); + EXPECT_STREQ(buf, "\r\n"); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, TermiosIGNCR) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= IGNCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + char c = '\r'; + ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + // Nothing to read. + ASSERT_THAT(PollAndReadFd(slave_.get(), &c, 1, kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); +} + +// Test that we can successfully poll for readable data from the slave. +TEST_F(PtyTest, TermiosPollSlave) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= IGNCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + absl::Notification notify; + int sfd = slave_.get(); + ScopedThread th([sfd, ¬ify]() { + notify.Notify(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {sfd, POLLIN, 0}; + EXPECT_THAT( + RetryEINTR(poll)(&poll_fd, 1, absl::ToInt64Milliseconds(kTimeout)), + SyscallSucceedsWithValue(1)); + + // Should trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); + }); + + notify.WaitForNotification(); + // Sleep ensures that poll begins waiting before we write to the FD. + absl::SleepFor(absl::Seconds(1)); + + char s[] = "foo\n"; + ASSERT_THAT(WriteFd(master_.get(), s, strlen(s) + 1), SyscallSucceeds()); +} + +// Test that we can successfully poll for readable data from the master. +TEST_F(PtyTest, TermiosPollMaster) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= IGNCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(master_.get(), TCSETS, &t), SyscallSucceeds()); + + absl::Notification notify; + int mfd = master_.get(); + ScopedThread th([mfd, ¬ify]() { + notify.Notify(); + + // Poll on the reader fd with POLLIN event. + struct pollfd poll_fd = {mfd, POLLIN, 0}; + EXPECT_THAT( + RetryEINTR(poll)(&poll_fd, 1, absl::ToInt64Milliseconds(kTimeout)), + SyscallSucceedsWithValue(1)); + + // Should trigger POLLIN event. + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); + }); + + notify.WaitForNotification(); + // Sleep ensures that poll begins waiting before we write to the FD. + absl::SleepFor(absl::Seconds(1)); + + char s[] = "foo\n"; + ASSERT_THAT(WriteFd(slave_.get(), s, strlen(s) + 1), SyscallSucceeds()); +} + +TEST_F(PtyTest, TermiosINLCR) { + struct kernel_termios t = DefaultTermios(); + t.c_iflag |= INLCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + char c = '\n'; + ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + ExpectReadable(slave_, 1, &c); + EXPECT_EQ(c, '\r'); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, TermiosONOCR) { + struct kernel_termios t = DefaultTermios(); + t.c_oflag |= ONOCR; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + // The terminal is at column 0, so there should be no CR to read. + char c = '\r'; + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + // Nothing to read. + ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + + // This time the column is greater than 0, so we should be able to read the CR + // out of the other end. + constexpr char kInput[] = "foo\r"; + constexpr int kInputSize = sizeof(kInput) - 1; + ASSERT_THAT(WriteFd(slave_.get(), kInput, kInputSize), + SyscallSucceedsWithValue(kInputSize)); + + char buf[kInputSize] = {}; + ExpectReadable(master_, kInputSize, buf); + + EXPECT_EQ(memcmp(buf, kInput, kInputSize), 0); + + ExpectFinished(master_); + + // Terminal should be at column 0 again, so no CR can be read. + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + // Nothing to read. + ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); +} + +TEST_F(PtyTest, TermiosOCRNL) { + struct kernel_termios t = DefaultTermios(); + t.c_oflag |= OCRNL; + t.c_lflag &= ~ICANON; // for byte-by-byte reading. + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + + // The terminal is at column 0, so there should be no CR to read. + char c = '\r'; + ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); + + ExpectReadable(master_, 1, &c); + EXPECT_EQ(c, '\n'); + + ExpectFinished(master_); +} + +// Tests that VEOL is disabled when we start, and that we can set it to enable +// it. +TEST_F(PtyTest, VEOLTermination) { + // Write a few bytes ending with '\0', and confirm that we can't read. + constexpr char kInput[] = "hello"; + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + char buf[sizeof(kInput)] = {}; + ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + + // Set the EOL character to '=' and write it. + constexpr char delim = '='; + struct kernel_termios t = DefaultTermios(); + t.c_cc[VEOL] = delim; + ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds()); + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + + // Now we can read, as sending EOL caused the line to become available. + ExpectReadable(slave_, sizeof(kInput), buf); + EXPECT_EQ(memcmp(buf, kInput, sizeof(kInput)), 0); + + ExpectReadable(slave_, 1, buf); + EXPECT_EQ(buf[0], '='); + + ExpectFinished(slave_); +} + +// Tests that we can write more than the 4096 character limit, then a +// terminating character, then read out just the first 4095 bytes plus the +// terminator. +TEST_F(PtyTest, CanonBigWrite) { + constexpr int kWriteLen = kMaxLineSize + 4; + char input[kWriteLen]; + memset(input, 'M', kWriteLen - 1); + input[kWriteLen - 1] = '\n'; + ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen), + SyscallSucceedsWithValue(kWriteLen)); + + // We can read the line. + char buf[kMaxLineSize] = {}; + ExpectReadable(slave_, kMaxLineSize, buf); + + ExpectFinished(slave_); +} + +// Tests that data written in canonical mode can be read immediately once +// switched to noncanonical mode. +TEST_F(PtyTest, SwitchCanonToNoncanon) { + // Write a few bytes without a terminating character, switch to noncanonical + // mode, and read them. + constexpr char kInput[] = "hello"; + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + + // Nothing available yet. + char buf[sizeof(kInput)] = {}; + ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + + DisableCanonical(); + + ExpectReadable(slave_, sizeof(kInput), buf); + EXPECT_STREQ(buf, kInput); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchCanonToNonCanonNewline) { + // Write a few bytes with a terminating character. + constexpr char kInput[] = "hello\n"; + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + + DisableCanonical(); + + // We can read the line. + char buf[sizeof(kInput)] = {}; + ExpectReadable(slave_, sizeof(kInput), buf); + EXPECT_STREQ(buf, kInput); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchNoncanonToCanonNewlineBig) { + DisableCanonical(); + + // Write more than the maximum line size, then write a delimiter. + constexpr int kWriteLen = 4100; + char input[kWriteLen]; + memset(input, 'M', kWriteLen); + ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen), + SyscallSucceedsWithValue(kWriteLen)); + // Wait for the input queue to fill. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1)); + constexpr char delim = '\n'; + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + + EnableCanonical(); + + // We can read the line. + char buf[kMaxLineSize] = {}; + ExpectReadable(slave_, kMaxLineSize - 1, buf); + + // We can also read the remaining characters. + ExpectReadable(slave_, 6, buf); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchNoncanonToCanonNoNewline) { + DisableCanonical(); + + // Write a few bytes without a terminating character. + // mode, and read them. + constexpr char kInput[] = "hello"; + ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput) - 1), + SyscallSucceedsWithValue(sizeof(kInput) - 1)); + + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput) - 1)); + EnableCanonical(); + + // We can read the line. + char buf[sizeof(kInput)] = {}; + ExpectReadable(slave_, sizeof(kInput) - 1, buf); + EXPECT_STREQ(buf, kInput); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchNoncanonToCanonNoNewlineBig) { + DisableCanonical(); + + // Write a few bytes without a terminating character. + // mode, and read them. + constexpr int kWriteLen = 4100; + char input[kWriteLen]; + memset(input, 'M', kWriteLen); + ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen), + SyscallSucceedsWithValue(kWriteLen)); + + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1)); + EnableCanonical(); + + // We can read the line. + char buf[kMaxLineSize] = {}; + ExpectReadable(slave_, kMaxLineSize - 1, buf); + + ExpectFinished(slave_); +} + +// Tests that we can write over the 4095 noncanonical limit, then read out +// everything. +TEST_F(PtyTest, NoncanonBigWrite) { + DisableCanonical(); + + // Write well over the 4095 internal buffer limit. + constexpr char kInput = 'M'; + constexpr int kInputSize = kMaxLineSize * 2; + for (int i = 0; i < kInputSize; i++) { + // This makes too many syscalls for save/restore. + const DisableSave ds; + ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + } + + // We should be able to read out everything. Sleep a bit so that Linux has a + // chance to move data from the master to the slave. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1)); + for (int i = 0; i < kInputSize; i++) { + // This makes too many syscalls for save/restore. + const DisableSave ds; + char c; + ExpectReadable(slave_, 1, &c); + ASSERT_EQ(c, kInput); + } + + ExpectFinished(slave_); +} + +// ICANON doesn't make input available until a line delimiter is typed. +// +// Test newline. +TEST_F(PtyTest, TermiosICANONNewline) { + char input[3] = {'a', 'b', 'c'}; + ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)), + SyscallSucceedsWithValue(sizeof(input))); + + // Extra bytes for newline (written later) and NUL for EXPECT_STREQ. + char buf[5] = {}; + + // Nothing available yet. + ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + + char delim = '\n'; + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + + // Now it is available. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(input) + 1)); + ExpectReadable(slave_, sizeof(input) + 1, buf); + EXPECT_STREQ(buf, "abc\n"); + + ExpectFinished(slave_); +} + +// ICANON doesn't make input available until a line delimiter is typed. +// +// Test EOF (^D). +TEST_F(PtyTest, TermiosICANONEOF) { + char input[3] = {'a', 'b', 'c'}; + ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)), + SyscallSucceedsWithValue(sizeof(input))); + + // Extra byte for NUL for EXPECT_STREQ. + char buf[4] = {}; + + // Nothing available yet. + ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout), + PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out"))); + char delim = ControlCharacter('D'); + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + + // Now it is available. Note that ^D is not included. + ExpectReadable(slave_, sizeof(input), buf); + EXPECT_STREQ(buf, "abc"); + + ExpectFinished(slave_); +} + +// ICANON limits us to 4096 bytes including a terminating character. Anything +// after and 4095th character is discarded (although still processed for +// signals and echoing). +TEST_F(PtyTest, CanonDiscard) { + constexpr char kInput = 'M'; + constexpr int kInputSize = 4100; + constexpr int kIter = 3; + + // A few times write more than the 4096 character maximum, then a newline. + constexpr char delim = '\n'; + for (int i = 0; i < kIter; i++) { + // This makes too many syscalls for save/restore. + const DisableSave ds; + for (int i = 0; i < kInputSize; i++) { + ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)), + SyscallSucceedsWithValue(sizeof(kInput))); + } + ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1)); + } + + // There should be multiple truncated lines available to read. + for (int i = 0; i < kIter; i++) { + char buf[kInputSize] = {}; + ExpectReadable(slave_, kMaxLineSize, buf); + EXPECT_EQ(buf[kMaxLineSize - 1], delim); + EXPECT_EQ(buf[kMaxLineSize - 2], kInput); + } + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, CanonMultiline) { + constexpr char kInput1[] = "GO\n"; + constexpr char kInput2[] = "BLUE\n"; + + // Write both lines. + ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1), + SyscallSucceedsWithValue(sizeof(kInput1) - 1)); + ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1), + SyscallSucceedsWithValue(sizeof(kInput2) - 1)); + + // Get the first line. + char line1[8] = {}; + ExpectReadable(slave_, sizeof(kInput1) - 1, line1); + EXPECT_STREQ(line1, kInput1); + + // Get the second line. + char line2[8] = {}; + ExpectReadable(slave_, sizeof(kInput2) - 1, line2); + EXPECT_STREQ(line2, kInput2); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchNoncanonToCanonMultiline) { + DisableCanonical(); + + constexpr char kInput1[] = "GO\n"; + constexpr char kInput2[] = "BLUE\n"; + constexpr char kExpected[] = "GO\nBLUE\n"; + + // Write both lines. + ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1), + SyscallSucceedsWithValue(sizeof(kInput1) - 1)); + ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1), + SyscallSucceedsWithValue(sizeof(kInput2) - 1)); + + ASSERT_NO_ERRNO( + WaitUntilReceived(slave_.get(), sizeof(kInput1) + sizeof(kInput2) - 2)); + EnableCanonical(); + + // Get all together as one line. + char line[9] = {}; + ExpectReadable(slave_, 8, line); + EXPECT_STREQ(line, kExpected); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, SwitchTwiceMultiline) { + std::string kInputs[] = {"GO\n", "BLUE\n", "!"}; + std::string kExpected = "GO\nBLUE\n!"; + + // Write each line. + for (std::string input : kInputs) { + ASSERT_THAT(WriteFd(master_.get(), input.c_str(), input.size()), + SyscallSucceedsWithValue(input.size())); + } + + DisableCanonical(); + // All written characters have to make it into the input queue before + // canonical mode is re-enabled. If the final '!' character hasn't been + // enqueued before canonical mode is re-enabled, it won't be readable. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kExpected.size())); + EnableCanonical(); + + // Get all together as one line. + char line[10] = {}; + ExpectReadable(slave_, 9, line); + EXPECT_STREQ(line, kExpected.c_str()); + + ExpectFinished(slave_); +} + +TEST_F(PtyTest, QueueSize) { + // Write the line. + constexpr char kInput1[] = "GO\n"; + ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1), + SyscallSucceedsWithValue(sizeof(kInput1) - 1)); + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1)); + + // Ensure that writing more (beyond what is readable) does not impact the + // readable size. + char input[kMaxLineSize]; + memset(input, 'M', kMaxLineSize); + ASSERT_THAT(WriteFd(master_.get(), input, kMaxLineSize), + SyscallSucceedsWithValue(kMaxLineSize)); + int inputBufSize = ASSERT_NO_ERRNO_AND_VALUE( + WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1)); + EXPECT_EQ(inputBufSize, sizeof(kInput1) - 1); +} + +TEST_F(PtyTest, PartialBadBuffer) { + // Allocate 2 pages. + void* addr = mmap(nullptr, 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(addr, MAP_FAILED); + char* buf = reinterpret_cast<char*>(addr); + + // Guard the 2nd page for our read to run into. + ASSERT_THAT( + mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, PROT_NONE), + SyscallSucceeds()); + + // Leave only one free byte in the buffer. + char* bad_buffer = buf + kPageSize - 1; + + // Write to the master. + constexpr char kBuf[] = "hello\n"; + constexpr size_t size = sizeof(kBuf) - 1; + EXPECT_THAT(WriteFd(master_.get(), kBuf, size), + SyscallSucceedsWithValue(size)); + + // Read from the slave into bad_buffer. + ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), size)); + EXPECT_THAT(ReadFd(slave_.get(), bad_buffer, size), + SyscallFailsWithErrno(EFAULT)); + + EXPECT_THAT(munmap(addr, 2 * kPageSize), SyscallSucceeds()) << addr; +} + +TEST_F(PtyTest, SimpleEcho) { + constexpr char kInput[] = "Mr. Eko"; + EXPECT_THAT(WriteFd(master_.get(), kInput, strlen(kInput)), + SyscallSucceedsWithValue(strlen(kInput))); + + char buf[100] = {}; + ExpectReadable(master_, strlen(kInput), buf); + + EXPECT_STREQ(buf, kInput); + ExpectFinished(master_); +} + +TEST_F(PtyTest, GetWindowSize) { + struct winsize ws; + ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &ws), SyscallSucceeds()); + EXPECT_EQ(ws.ws_row, 0); + EXPECT_EQ(ws.ws_col, 0); +} + +TEST_F(PtyTest, SetSlaveWindowSize) { + constexpr uint16_t kRows = 343; + constexpr uint16_t kCols = 2401; + struct winsize ws = {.ws_row = kRows, .ws_col = kCols}; + ASSERT_THAT(ioctl(slave_.get(), TIOCSWINSZ, &ws), SyscallSucceeds()); + + struct winsize retrieved_ws = {}; + ASSERT_THAT(ioctl(master_.get(), TIOCGWINSZ, &retrieved_ws), + SyscallSucceeds()); + EXPECT_EQ(retrieved_ws.ws_row, kRows); + EXPECT_EQ(retrieved_ws.ws_col, kCols); +} + +TEST_F(PtyTest, SetMasterWindowSize) { + constexpr uint16_t kRows = 343; + constexpr uint16_t kCols = 2401; + struct winsize ws = {.ws_row = kRows, .ws_col = kCols}; + ASSERT_THAT(ioctl(master_.get(), TIOCSWINSZ, &ws), SyscallSucceeds()); + + struct winsize retrieved_ws = {}; + ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &retrieved_ws), + SyscallSucceeds()); + EXPECT_EQ(retrieved_ws.ws_row, kRows); + EXPECT_EQ(retrieved_ws.ws_col, kCols); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/pwrite64.cc b/test/syscalls/linux/pwrite64.cc new file mode 100644 index 000000000..60ae6de1f --- /dev/null +++ b/test/syscalls/linux/pwrite64.cc @@ -0,0 +1,79 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// This test is currently very rudimentary. +// +// TODO: +// * bad buffer states (EFAULT). +// * bad fds (wrong permission, wrong type of file, EBADF). +// * check offset is not incremented. +// * check for EOF. +// * writing to pipes, symlinks, special files. +class Pwrite64 : public ::testing::Test { + void SetUp() override { + name_ = NewTempAbsPath(); + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_CREAT, 0644), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + } + + void TearDown() override { unlink(name_.c_str()); } + + public: + std::string name_; +}; + +TEST_F(Pwrite64, AppendOnly) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds()); + constexpr int64_t kBufSize = 1024; + std::vector<char> buf(kBufSize); + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), 0), + SyscallSucceedsWithValue(buf.size())); + EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(Pwrite64, InvalidArgs) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds()); + constexpr int64_t kBufSize = 1024; + std::vector<char> buf(kBufSize); + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), -1), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/read.cc b/test/syscalls/linux/read.cc new file mode 100644 index 000000000..eb1b5bc10 --- /dev/null +++ b/test/syscalls/linux/read.cc @@ -0,0 +1,117 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <unistd.h> +#include <vector> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class ReadTest : public ::testing::Test { + void SetUp() override { + name_ = NewTempAbsPath(); + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_CREAT, 0644), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + } + + void TearDown() override { unlink(name_.c_str()); } + + public: + std::string name_; +}; + +TEST_F(ReadTest, ZeroBuffer) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds()); + + char msg[] = "hello world"; + EXPECT_THAT(PwriteFd(fd, msg, strlen(msg), 0), + SyscallSucceedsWithValue(strlen(msg))); + + char buf[10]; + EXPECT_THAT(ReadFd(fd, buf, 0), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(ReadTest, EmptyFileReturnsZeroAtEOF) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds()); + + char eof_buf[10]; + EXPECT_THAT(ReadFd(fd, eof_buf, 10), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(ReadTest, EofAfterRead) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds()); + + // Write some bytes to be read. + constexpr char kMessage[] = "hello world"; + EXPECT_THAT(PwriteFd(fd, kMessage, sizeof(kMessage), 0), + SyscallSucceedsWithValue(sizeof(kMessage))); + + // Read all of the bytes at once. + char buf[sizeof(kMessage)]; + EXPECT_THAT(ReadFd(fd, buf, sizeof(kMessage)), + SyscallSucceedsWithValue(sizeof(kMessage))); + + // Read again with a non-zero buffer and expect EOF. + char eof_buf[10]; + EXPECT_THAT(ReadFd(fd, eof_buf, 10), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(ReadTest, DevNullReturnsEof) { + int fd; + ASSERT_THAT(fd = open("/dev/null", O_RDONLY), SyscallSucceeds()); + std::vector<char> buf(1); + EXPECT_THAT(ReadFd(fd, buf.data(), 1), SyscallSucceedsWithValue(0)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +const int kReadSize = 128 * 1024; + +// Do not allow random save as it could lead to partial reads. +TEST_F(ReadTest, CanReadFullyFromDevZero_NoRandomSave) { + int fd; + ASSERT_THAT(fd = open("/dev/zero", O_RDONLY), SyscallSucceeds()); + + std::vector<char> buf(kReadSize, 1); + EXPECT_THAT(ReadFd(fd, buf.data(), kReadSize), + SyscallSucceedsWithValue(kReadSize)); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_EQ(std::vector<char>(kReadSize, 0), buf); +} + +TEST_F(ReadTest, ReadDirectoryFails) { + const FileDescriptor file = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY)); + std::vector<char> buf(1); + EXPECT_THAT(ReadFd(file.get(), buf.data(), 1), SyscallFailsWithErrno(EISDIR)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/readv.cc b/test/syscalls/linux/readv.cc new file mode 100644 index 000000000..0b933673a --- /dev/null +++ b/test/syscalls/linux/readv.cc @@ -0,0 +1,293 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/syscalls/linux/readv_common.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class ReadvTest : public FileTest { + void SetUp() override { + FileTest::SetUp(); + + ASSERT_THAT(write(test_file_fd_.get(), kReadvTestData, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + ASSERT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(write(test_pipe_[1], kReadvTestData, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + } +}; + +TEST_F(ReadvTest, ReadOneBufferPerByte_File) { + ReadOneBufferPerByte(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadOneBufferPerByte_Pipe) { + ReadOneBufferPerByte(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadOneHalfAtATime_File) { + ReadOneHalfAtATime(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadOneHalfAtATime_Pipe) { + ReadOneHalfAtATime(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadAllOneBuffer_File) { + ReadAllOneBuffer(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadAllOneBuffer_Pipe) { ReadAllOneBuffer(test_pipe_[0]); } + +TEST_F(ReadvTest, ReadAllOneLargeBuffer_File) { + ReadAllOneLargeBuffer(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadAllOneLargeBuffer_Pipe) { + ReadAllOneLargeBuffer(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadBuffersOverlapping_File) { + ReadBuffersOverlapping(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadBuffersOverlapping_Pipe) { + ReadBuffersOverlapping(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadBuffersDiscontinuous_File) { + ReadBuffersDiscontinuous(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadBuffersDiscontinuous_Pipe) { + ReadBuffersDiscontinuous(test_pipe_[0]); +} + +TEST_F(ReadvTest, ReadIovecsCompletelyFilled_File) { + ReadIovecsCompletelyFilled(test_file_fd_.get()); +} + +TEST_F(ReadvTest, ReadIovecsCompletelyFilled_Pipe) { + ReadIovecsCompletelyFilled(test_pipe_[0]); +} + +TEST_F(ReadvTest, BadFileDescriptor) { + char buffer[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = 1024; + + ASSERT_THAT(readv(-1, iov, 1024), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(ReadvTest, BadIovecsPointer_File) { + ASSERT_THAT(readv(test_file_fd_.get(), nullptr, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvTest, BadIovecsPointer_Pipe) { + ASSERT_THAT(readv(test_pipe_[0], nullptr, 1), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvTest, BadIovecBase_File) { + struct iovec iov[1]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvTest, BadIovecBase_Pipe) { + struct iovec iov[1]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvTest, ZeroIovecs_File) { + struct iovec iov[1]; + iov[0].iov_base = 0; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), SyscallSucceeds()); +} + +TEST_F(ReadvTest, ZeroIovecs_Pipe) { + struct iovec iov[1]; + iov[0].iov_base = 0; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallSucceeds()); +} + +TEST_F(ReadvTest, NotReadable_File) { + char buffer[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = 1024; + + std::string wronly_file = NewTempAbsPath(); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(wronly_file, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR)); + ASSERT_THAT(readv(fd.get(), iov, 1), SyscallFailsWithErrno(EBADF)); + fd.reset(); // Close before unlinking. + ASSERT_THAT(unlink(wronly_file.c_str()), SyscallSucceeds()); +} + +TEST_F(ReadvTest, NotReadable_Pipe) { + char buffer[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_pipe_[1], iov, 1), SyscallFailsWithErrno(EBADF)); +} + +TEST_F(ReadvTest, DirNotReadable) { + char buffer[1024]; + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = 1024; + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY)); + ASSERT_THAT(readv(fd.get(), iov, 1), SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(ReadvTest, OffsetIncremented) { + char* buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = kReadvTestDataSize; + + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + ASSERT_THAT(lseek(test_file_fd_.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(kReadvTestDataSize)); + + free(buffer); +} + +TEST_F(ReadvTest, EndOfFile) { + char* buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + struct iovec iov[1]; + iov[0].iov_base = buffer; + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + free(buffer); + + buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + iov[0].iov_base = buffer; + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), SyscallSucceedsWithValue(0)); + free(buffer); +} + +TEST_F(ReadvTest, WouldBlock_Pipe) { + struct iovec iov[1]; + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_pipe_[0], iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + free(iov[0].iov_base); + + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallFailsWithErrno(EAGAIN)); + free(iov[0].iov_base); +} + +TEST_F(ReadvTest, ZeroBuffer) { + char buf[10]; + struct iovec iov[1]; + iov[0].iov_base = buf; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallSucceedsWithValue(0)); +} + +TEST_F(ReadvTest, NullIovecInNonemptyArray) { + std::vector<char> buf(kReadvTestDataSize); + struct iovec iov[2]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 0; + iov[1].iov_base = buf.data(); + iov[1].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 2), + SyscallSucceedsWithValue(kReadvTestDataSize)); +} + +TEST_F(ReadvTest, IovecOutsideTaskAddressRangeInNonemptyArray) { + std::vector<char> buf(kReadvTestDataSize); + struct iovec iov[2]; + iov[0].iov_base = reinterpret_cast<void*>(~static_cast<uintptr_t>(0)); + iov[0].iov_len = 0; + iov[1].iov_base = buf.data(); + iov[1].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_file_fd_.get(), iov, 2), + SyscallFailsWithErrno(EFAULT)); +} + +// This test depends on the maximum extent of a single readv() syscall, so +// we can't tolerate interruption from saving. +TEST(ReadvTestNoFixture, TruncatedAtMax_NoRandomSave) { + // Ensure that we won't be interrupted by ITIMER_PROF. + struct itimerval itv = {}; + auto const cleanup_itimer = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_PROF, itv)); + + // From Linux's include/linux/fs.h. + size_t const MAX_RW_COUNT = INT_MAX & ~(kPageSize - 1); + + // Create an iovec array with 3 segments pointing to consecutive parts of a + // buffer. The first covers all but the last three pages, and should be + // written to in its entirety. The second covers the last page before + // MAX_RW_COUNT and the first page after; only the first page should be + // written to. The third covers the last page of the buffer, and should be + // skipped entirely. + size_t const kBufferSize = MAX_RW_COUNT + 2 * kPageSize; + size_t const kFirstOffset = MAX_RW_COUNT - kPageSize; + size_t const kSecondOffset = MAX_RW_COUNT + kPageSize; + // The buffer is too big to fit on the stack. + std::vector<char> buf(kBufferSize); + struct iovec iov[3]; + iov[0].iov_base = buf.data(); + iov[0].iov_len = kFirstOffset; + iov[1].iov_base = buf.data() + kFirstOffset; + iov[1].iov_len = kSecondOffset - kFirstOffset; + iov[2].iov_base = buf.data() + kSecondOffset; + iov[2].iov_len = kBufferSize - kSecondOffset; + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + EXPECT_THAT(readv(fd.get(), iov, 3), SyscallSucceedsWithValue(MAX_RW_COUNT)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/readv_common.cc b/test/syscalls/linux/readv_common.cc new file mode 100644 index 000000000..349b80d7f --- /dev/null +++ b/test/syscalls/linux/readv_common.cc @@ -0,0 +1,180 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +extern const char kReadvTestData[] = + "127.0.0.1 localhost" + "" + "# The following lines are desirable for IPv6 capable hosts" + "::1 ip6-localhost ip6-loopback" + "fe00::0 ip6-localnet" + "ff00::0 ip6-mcastprefix" + "ff02::1 ip6-allnodes" + "ff02::2 ip6-allrouters" + "ff02::3 ip6-allhosts" + "192.168.1.100 a" + "93.184.216.34 foo.bar.example.com xcpu"; +extern const size_t kReadvTestDataSize = sizeof(kReadvTestData); + +static void ReadAllOneProvidedBuffer(int fd, std::vector<char>* buffer) { + struct iovec iovs[1]; + iovs[0].iov_base = buffer->data(); + iovs[0].iov_len = kReadvTestDataSize; + + ASSERT_THAT(readv(fd, iovs, 1), SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs, 1); + EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); +} + +void ReadAllOneBuffer(int fd) { + std::vector<char> buffer(kReadvTestDataSize); + ReadAllOneProvidedBuffer(fd, &buffer); +} + +void ReadAllOneLargeBuffer(int fd) { + std::vector<char> buffer(10 * kReadvTestDataSize); + ReadAllOneProvidedBuffer(fd, &buffer); +} + +void ReadOneHalfAtATime(int fd) { + int len0 = kReadvTestDataSize / 2; + int len1 = kReadvTestDataSize - len0; + std::vector<char> buffer0(len0); + std::vector<char> buffer1(len1); + + struct iovec iovs[2]; + iovs[0].iov_base = buffer0.data(); + iovs[0].iov_len = len0; + iovs[1].iov_base = buffer1.data(); + iovs[1].iov_len = len1; + + ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs, 2); + EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); +} + +void ReadOneBufferPerByte(int fd) { + std::vector<char> buffer(kReadvTestDataSize); + std::vector<struct iovec> iovs(kReadvTestDataSize); + char* buffer_ptr = buffer.data(); + struct iovec* iovs_ptr = iovs.data(); + + for (int i = 0; i < static_cast<int>(kReadvTestDataSize); i++) { + struct iovec iov = { + .iov_base = &buffer_ptr[i], + .iov_len = 1, + }; + iovs_ptr[i] = iov; + } + + ASSERT_THAT(readv(fd, iovs_ptr, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs.data(), kReadvTestDataSize); + EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); +} + +void ReadBuffersOverlapping(int fd) { + // overlap the first overlap_bytes. + int overlap_bytes = 8; + std::vector<char> buffer(kReadvTestDataSize); + + // overlapping causes us to get more data. + int expected_size = kReadvTestDataSize + overlap_bytes; + std::vector<char> expected(expected_size); + char* expected_ptr = expected.data(); + memcpy(expected_ptr, &kReadvTestData[overlap_bytes], overlap_bytes); + memcpy(&expected_ptr[overlap_bytes], &kReadvTestData[overlap_bytes], + kReadvTestDataSize); + + struct iovec iovs[2]; + iovs[0].iov_base = buffer.data(); + iovs[0].iov_len = overlap_bytes; + iovs[1].iov_base = buffer.data(); + iovs[1].iov_len = kReadvTestDataSize; + + ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs, 2); + EXPECT_THAT(iovec_desc, MatchesStringLength(expected_size)); + EXPECT_THAT(iovec_desc, MatchesStringValue(expected_ptr)); +} + +void ReadBuffersDiscontinuous(int fd) { + // Each iov is 1 byte separated by 1 byte. + std::vector<char> buffer(kReadvTestDataSize * 2); + std::vector<struct iovec> iovs(kReadvTestDataSize); + + char* buffer_ptr = buffer.data(); + struct iovec* iovs_ptr = iovs.data(); + + for (int i = 0; i < static_cast<int>(kReadvTestDataSize); i++) { + struct iovec iov = { + .iov_base = &buffer_ptr[i * 2], + .iov_len = 1, + }; + iovs_ptr[i] = iov; + } + + ASSERT_THAT(readv(fd, iovs_ptr, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + + std::pair<struct iovec*, int> iovec_desc(iovs.data(), kReadvTestDataSize); + EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); +} + +void ReadIovecsCompletelyFilled(int fd) { + int half = kReadvTestDataSize / 2; + std::vector<char> buffer(kReadvTestDataSize); + char* buffer_ptr = buffer.data(); + memset(buffer.data(), '\0', kReadvTestDataSize); + + struct iovec iovs[2]; + iovs[0].iov_base = buffer.data(); + iovs[0].iov_len = half; + iovs[1].iov_base = &buffer_ptr[half]; + iovs[1].iov_len = half; + + ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(half * 2)); + + std::pair<struct iovec*, int> iovec_desc(iovs, 2); + EXPECT_THAT(iovec_desc, MatchesStringLength(half * 2)); + EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData)); + + char* str = static_cast<char*>(iovs[0].iov_base); + str[iovs[0].iov_len - 1] = '\0'; + ASSERT_EQ(half - 1, strlen(str)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/readv_common.h b/test/syscalls/linux/readv_common.h new file mode 100644 index 000000000..e261d545a --- /dev/null +++ b/test/syscalls/linux/readv_common.h @@ -0,0 +1,61 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_READV_COMMON_H_ +#define GVISOR_TEST_SYSCALLS_READV_COMMON_H_ + +#include <stddef.h> + +namespace gvisor { +namespace testing { + +// A NUL-terminated std::string containing the data used by tests using the following +// test helpers. +extern const char kReadvTestData[]; + +// The size of kReadvTestData, including the terminating NUL. +extern const size_t kReadvTestDataSize; + +// ReadAllOneBuffer asserts that it can read kReadvTestData from an fd using +// exactly one iovec. +void ReadAllOneBuffer(int fd); + +// ReadAllOneLargeBuffer asserts that it can read kReadvTestData from an fd +// using exactly one iovec containing an overly large buffer. +void ReadAllOneLargeBuffer(int fd); + +// ReadOneHalfAtATime asserts that it can read test_data_from an fd using +// exactly two iovecs that are roughly equivalent in size. +void ReadOneHalfAtATime(int fd); + +// ReadOneBufferPerByte asserts that it can read kReadvTestData from an fd +// using one iovec per byte. +void ReadOneBufferPerByte(int fd); + +// ReadBuffersOverlapping asserts that it can read kReadvTestData from an fd +// where two iovecs are overlapping. +void ReadBuffersOverlapping(int fd); + +// ReadBuffersDiscontinuous asserts that it can read kReadvTestData from an fd +// where each iovec is discontinuous from the next by 1 byte. +void ReadBuffersDiscontinuous(int fd); + +// ReadIovecsCompletelyFilled asserts that the previous iovec is completely +// filled before moving onto the next. +void ReadIovecsCompletelyFilled(int fd); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_READV_COMMON_H_ diff --git a/test/syscalls/linux/readv_socket.cc b/test/syscalls/linux/readv_socket.cc new file mode 100644 index 000000000..2c129b7e8 --- /dev/null +++ b/test/syscalls/linux/readv_socket.cc @@ -0,0 +1,182 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/syscalls/linux/readv_common.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class ReadvSocketTest : public SocketTest { + void SetUp() override { + SocketTest::SetUp(); + ASSERT_THAT( + write(test_unix_stream_socket_[1], kReadvTestData, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + ASSERT_THAT( + write(test_unix_dgram_socket_[1], kReadvTestData, kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + ASSERT_THAT(write(test_unix_seqpacket_socket_[1], kReadvTestData, + kReadvTestDataSize), + SyscallSucceedsWithValue(kReadvTestDataSize)); + // FIXME: Enable when possible. + // ASSERT_THAT(write(test_tcp_socket_[1], kReadvTestData, + // kReadvTestDataSize), + // SyscallSucceedsWithValue(kReadvTestDataSize)); + } +}; + +TEST_F(ReadvSocketTest, ReadOneBufferPerByte_StreamSocket) { + ReadOneBufferPerByte(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadOneBufferPerByte_DgramSocket) { + ReadOneBufferPerByte(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadOneBufferPerByte_SeqPacketSocket) { + ReadOneBufferPerByte(test_unix_seqpacket_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadOneHalfAtATime_StreamSocket) { + ReadOneHalfAtATime(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadOneHalfAtATime_DgramSocket) { + ReadOneHalfAtATime(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadAllOneBuffer_StreamSocket) { + ReadAllOneBuffer(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadAllOneBuffer_DgramSocket) { + ReadAllOneBuffer(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadAllOneLargeBuffer_StreamSocket) { + ReadAllOneLargeBuffer(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadAllOneLargeBuffer_DgramSocket) { + ReadAllOneLargeBuffer(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadBuffersOverlapping_StreamSocket) { + ReadBuffersOverlapping(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadBuffersOverlapping_DgramSocket) { + ReadBuffersOverlapping(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadBuffersDiscontinuous_StreamSocket) { + ReadBuffersDiscontinuous(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadBuffersDiscontinuous_DgramSocket) { + ReadBuffersDiscontinuous(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadIovecsCompletelyFilled_StreamSocket) { + ReadIovecsCompletelyFilled(test_unix_stream_socket_[0]); +} + +TEST_F(ReadvSocketTest, ReadIovecsCompletelyFilled_DgramSocket) { + ReadIovecsCompletelyFilled(test_unix_dgram_socket_[0]); +} + +TEST_F(ReadvSocketTest, BadIovecsPointer_StreamSocket) { + ASSERT_THAT(readv(test_unix_stream_socket_[0], nullptr, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvSocketTest, BadIovecsPointer_DgramSocket) { + ASSERT_THAT(readv(test_unix_dgram_socket_[0], nullptr, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvSocketTest, BadIovecBase_StreamSocket) { + struct iovec iov[1]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvSocketTest, BadIovecBase_DgramSocket) { + struct iovec iov[1]; + iov[0].iov_base = nullptr; + iov[0].iov_len = 1024; + ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(ReadvSocketTest, ZeroIovecs_StreamSocket) { + struct iovec iov[1]; + iov[0].iov_base = 0; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), SyscallSucceeds()); +} + +TEST_F(ReadvSocketTest, ZeroIovecs_DgramSocket) { + struct iovec iov[1]; + iov[0].iov_base = 0; + iov[0].iov_len = 0; + ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), SyscallSucceeds()); +} + +TEST_F(ReadvSocketTest, WouldBlock_StreamSocket) { + struct iovec iov[1]; + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + free(iov[0].iov_base); + + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), + SyscallFailsWithErrno(EAGAIN)); + free(iov[0].iov_base); +} + +TEST_F(ReadvSocketTest, WouldBlock_DgramSocket) { + struct iovec iov[1]; + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + iov[0].iov_len = kReadvTestDataSize; + ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), + SyscallSucceedsWithValue(kReadvTestDataSize)); + free(iov[0].iov_base); + + iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize)); + ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), + SyscallFailsWithErrno(EAGAIN)); + free(iov[0].iov_base); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rename.cc b/test/syscalls/linux/rename.cc new file mode 100644 index 000000000..f4c877a00 --- /dev/null +++ b/test/syscalls/linux/rename.cc @@ -0,0 +1,373 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdio.h> +#include <string> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(RenameTest, RootToAnything) { + ASSERT_THAT(rename("/", "/bin"), SyscallFailsWithErrno(EBUSY)); +} + +TEST(RenameTest, AnythingToRoot) { + ASSERT_THAT(rename("/bin", "/"), SyscallFailsWithErrno(EBUSY)); +} + +TEST(RenameTest, SourceIsAncestorOfTarget) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto subdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + ASSERT_THAT(rename(dir.path().c_str(), subdir.path().c_str()), + SyscallFailsWithErrno(EINVAL)); + + // Try an even deeper directory. + auto deep_subdir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(subdir.path())); + ASSERT_THAT(rename(dir.path().c_str(), deep_subdir.path().c_str()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(RenameTest, TargetIsAncestorOfSource) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto subdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + ASSERT_THAT(rename(subdir.path().c_str(), dir.path().c_str()), + SyscallFailsWithErrno(ENOTEMPTY)); + + // Try an even deeper directory. + auto deep_subdir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(subdir.path())); + ASSERT_THAT(rename(deep_subdir.path().c_str(), dir.path().c_str()), + SyscallFailsWithErrno(ENOTEMPTY)); +} + +TEST(RenameTest, FileToSelf) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + EXPECT_THAT(rename(f.path().c_str(), f.path().c_str()), SyscallSucceeds()); +} + +TEST(RenameTest, DirectoryToSelf) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(rename(f.path().c_str(), f.path().c_str()), SyscallSucceeds()); +} + +TEST(RenameTest, FileToSameDirectory) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + std::string const newpath = NewTempAbsPath(); + ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = f.release(); + f.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, DirectoryToSameDirectory) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::string const newpath = NewTempAbsPath(); + ASSERT_THAT(rename(dir.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = dir.release(); + dir.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, FileToParentDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path())); + std::string const newpath = NewTempAbsPathInDir(dir1.path()); + ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = f.release(); + f.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, DirectoryToParentDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + auto dir3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir2.path())); + EXPECT_THAT(IsDirectory(dir3.path()), IsPosixErrorOkAndHolds(true)); + std::string const newpath = NewTempAbsPathInDir(dir1.path()); + ASSERT_THAT(rename(dir3.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = dir3.release(); + dir3.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); + EXPECT_THAT(IsDirectory(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, FileToChildDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = f.release(); + f.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, DirectoryToChildDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + auto dir3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + ASSERT_THAT(rename(dir3.path().c_str(), newpath.c_str()), SyscallSucceeds()); + std::string const oldpath = dir3.release(); + dir3.reset(newpath); + EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false)); + EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true)); + EXPECT_THAT(IsDirectory(newpath), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, DirectoryToOwnChildDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path())); + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + ASSERT_THAT(rename(dir1.path().c_str(), newpath.c_str()), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(RenameTest, FileOverwritesFile) { + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "first", TempPath::kDefaultFileMode)); + auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), "second", TempPath::kDefaultFileMode)); + ASSERT_THAT(rename(f1.path().c_str(), f2.path().c_str()), SyscallSucceeds()); + EXPECT_THAT(Exists(f1.path()), IsPosixErrorOkAndHolds(false)); + + f1.release(); + std::string f2_contents; + ASSERT_NO_ERRNO(GetContents(f2.path(), &f2_contents)); + EXPECT_EQ("first", f2_contents); +} + +TEST(RenameTest, FileDoesNotExist) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string source = JoinPath(dir.path(), "source"); + const std::string dest = JoinPath(dir.path(), "dest"); + ASSERT_THAT(rename(source.c_str(), dest.c_str()), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(RenameTest, FileDoesNotOverwriteDirectory) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(rename(f.path().c_str(), dir.path().c_str()), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(RenameTest, DirectoryDoesNotOverwriteFile) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + ASSERT_THAT(rename(dir.path().c_str(), f.path().c_str()), + SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(RenameTest, DirectoryOverwritesEmptyDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()), + SyscallSucceeds()); + EXPECT_THAT(Exists(dir1.path()), IsPosixErrorOkAndHolds(false)); + dir1.release(); + EXPECT_THAT(Exists(JoinPath(dir2.path(), Basename(f.path()))), + IsPosixErrorOkAndHolds(true)); + f.release(); +} + +TEST(RenameTest, FailsWithDots) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto dir1_dot = absl::StrCat(dir1.path(), "/."); + auto dir2_dot = absl::StrCat(dir2.path(), "/."); + auto dir1_dot_dot = absl::StrCat(dir1.path(), "/.."); + auto dir2_dot_dot = absl::StrCat(dir2.path(), "/.."); + + // Try with dot paths in the first argument + EXPECT_THAT(rename(dir1_dot.c_str(), dir2.path().c_str()), + SyscallFailsWithErrno(EBUSY)); + EXPECT_THAT(rename(dir1_dot_dot.c_str(), dir2.path().c_str()), + SyscallFailsWithErrno(EBUSY)); + + // Try with dot paths in the second argument + EXPECT_THAT(rename(dir1.path().c_str(), dir2_dot.c_str()), + SyscallFailsWithErrno(EBUSY)); + EXPECT_THAT(rename(dir1.path().c_str(), dir2_dot_dot.c_str()), + SyscallFailsWithErrno(EBUSY)); +} + +TEST(RenameTest, DirectoryDoesNotOverwriteNonemptyDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path())); + ASSERT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()), + SyscallFailsWithErrno(ENOTEMPTY)); +} + +TEST(RenameTest, FailsWhenOldParentNotWritable) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + // dir1 is not writable. + ASSERT_THAT(chmod(dir1.path().c_str(), 0555), SyscallSucceeds()); + + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + EXPECT_THAT(rename(f1.path().c_str(), newpath.c_str()), + SyscallFailsWithErrno(EACCES)); +} + +TEST(RenameTest, FailsWhenNewParentNotWritable) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + // dir2 is not writable. + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555)); + + std::string const newpath = NewTempAbsPathInDir(dir2.path()); + EXPECT_THAT(rename(f1.path().c_str(), newpath.c_str()), + SyscallFailsWithErrno(EACCES)); +} + +// Equivalent to FailsWhenNewParentNotWritable, but with a destination file +// to overwrite. +TEST(RenameTest, OverwriteFailsWhenNewParentNotWritable) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + + // dir2 is not writable. + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path())); + ASSERT_THAT(chmod(dir2.path().c_str(), 0555), SyscallSucceeds()); + + EXPECT_THAT(rename(f1.path().c_str(), f2.path().c_str()), + SyscallFailsWithErrno(EACCES)); +} + +// If the parent directory of source is not accessible, rename returns EACCES +// because the user cannot determine if source exists. +TEST(RenameTest, FileDoesNotExistWhenNewParentNotExecutable) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + // No execute permission. + auto dir = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0400)); + + const std::string source = JoinPath(dir.path(), "source"); + const std::string dest = JoinPath(dir.path(), "dest"); + ASSERT_THAT(rename(source.c_str(), dest.c_str()), + SyscallFailsWithErrno(EACCES)); +} + +TEST(RenameTest, DirectoryWithOpenFdOverwritesEmptyDirectory) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Get an fd on dir1 + int fd; + ASSERT_THAT(fd = open(dir1.path().c_str(), O_DIRECTORY), SyscallSucceeds()); + auto close_f = Cleanup([fd] { + // Close the fd on f. + EXPECT_THAT(close(fd), SyscallSucceeds()); + }); + + EXPECT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()), + SyscallSucceeds()); + + const std::string new_f_path = JoinPath(dir2.path(), Basename(f.path())); + + auto remove_f = Cleanup([&] { + // Delete f in its new location. + ASSERT_NO_ERRNO(Delete(new_f_path)); + f.release(); + }); + + EXPECT_THAT(Exists(dir1.path()), IsPosixErrorOkAndHolds(false)); + dir1.release(); + EXPECT_THAT(Exists(new_f_path), IsPosixErrorOkAndHolds(true)); +} + +TEST(RenameTest, FileWithOpenFd) { + TempPath root_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath dir1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path())); + TempPath dir2 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path())); + TempPath dir3 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path())); + + // Create file in dir1. + constexpr char kContents[] = "foo"; + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir1.path(), kContents, TempPath::kDefaultFileMode)); + + // Get fd on file. + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + + // Move f to dir2. + const std::string path2 = NewTempAbsPathInDir(dir2.path()); + ASSERT_THAT(rename(f.path().c_str(), path2.c_str()), SyscallSucceeds()); + + // Read f's kContents. + char buf[sizeof(kContents)]; + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(kContents), 0), + SyscallSucceedsWithValue(sizeof(kContents) - 1)); + EXPECT_EQ(absl::string_view(buf, sizeof(buf) - 1), kContents); + + // Move f to dir3. + const std::string path3 = NewTempAbsPathInDir(dir3.path()); + ASSERT_THAT(rename(path2.c_str(), path3.c_str()), SyscallSucceeds()); + + // Read f's kContents. + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(kContents), 0), + SyscallSucceedsWithValue(sizeof(kContents) - 1)); + EXPECT_EQ(absl::string_view(buf, sizeof(buf) - 1), kContents); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rlimits.cc b/test/syscalls/linux/rlimits.cc new file mode 100644 index 000000000..0072285f9 --- /dev/null +++ b/test/syscalls/linux/rlimits.cc @@ -0,0 +1,61 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/resource.h> +#include <sys/time.h> + +#include "test/util/capability_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(RlimitTest, SetRlimitHigher) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))); + SKIP_IF(!IsRunningOnGvisor()); + + struct rlimit rl = {}; + EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + // Even with CAP_SYS_RESOURCE, gVisor does not allow setting a higher rlimit. + rl.rlim_max++; + EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallFailsWithErrno(EPERM)); +} + +TEST(RlimitTest, UnprivilegedSetRlimit) { + // Drop privileges if necessary. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))) { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_RESOURCE, false)); + } + + struct rlimit rl = {}; + rl.rlim_cur = 1000; + rl.rlim_max = 20000; + EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds()); + + struct rlimit rl2 = {}; + EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl2), SyscallSucceeds()); + EXPECT_EQ(rl.rlim_cur, rl2.rlim_cur); + EXPECT_EQ(rl.rlim_max, rl2.rlim_max); + + rl.rlim_max = 100000; + EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallFailsWithErrno(EPERM)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rtsignal.cc b/test/syscalls/linux/rtsignal.cc new file mode 100644 index 000000000..1f2fed7cc --- /dev/null +++ b/test/syscalls/linux/rtsignal.cc @@ -0,0 +1,172 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <cerrno> +#include <csignal> + +#include "gtest/gtest.h" +#include "test/util/cleanup.h" +#include "test/util/logging.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// saved_info is set by the handler. +siginfo_t saved_info; + +// has_saved_info is set to true by the handler. +volatile bool has_saved_info; + +void SigHandler(int sig, siginfo_t* info, void* context) { + // Copy to the given info. + saved_info = *info; + has_saved_info = true; +} + +void ClearSavedInfo() { + // Clear the cached info. + memset(&saved_info, 0, sizeof(saved_info)); + has_saved_info = false; +} + +PosixErrorOr<Cleanup> SetupSignalHandler(int sig) { + struct sigaction sa; + sa.sa_sigaction = SigHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + return ScopedSigaction(sig, sa); +} + +class RtSignalTest : public ::testing::Test { + protected: + void SetUp() override { + action_cleanup_ = ASSERT_NO_ERRNO_AND_VALUE(SetupSignalHandler(SIGUSR1)); + mask_cleanup_ = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGUSR1)); + } + + void TearDown() override { ClearSavedInfo(); } + + private: + Cleanup action_cleanup_; + Cleanup mask_cleanup_; +}; + +static int rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t* uinfo) { + int ret; + do { + // NOTE: rt_sigqueueinfo(2) could return EAGAIN for RT signals. + ret = syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo); + } while (ret == -1 && errno == EAGAIN); + return ret; +} + +TEST_F(RtSignalTest, InvalidTID) { + siginfo_t uinfo; + // Depending on the kernel version, these calls may fail with + // ESRCH (goobunutu machines) or EPERM (production machines). Thus, + // the test simply ensures that they do fail. + EXPECT_THAT(rt_sigqueueinfo(-1, SIGUSR1, &uinfo), SyscallFails()); + EXPECT_FALSE(has_saved_info); + EXPECT_THAT(rt_sigqueueinfo(0, SIGUSR1, &uinfo), SyscallFails()); + EXPECT_FALSE(has_saved_info); +} + +TEST_F(RtSignalTest, InvalidCodes) { + siginfo_t uinfo; + + // We need a child for the code checks to apply. If the process is delivering + // to itself, then it can use whatever codes it wants and they will go + // through. + pid_t child = fork(); + if (child == 0) { + _exit(1); + } + ASSERT_THAT(child, SyscallSucceeds()); + + // These are not allowed for child processes. + uinfo.si_code = 0; // SI_USER. + EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), + SyscallFailsWithErrno(EPERM)); + uinfo.si_code = 0x80; // SI_KERNEL. + EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), + SyscallFailsWithErrno(EPERM)); + uinfo.si_code = -6; // SI_TKILL. + EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), + SyscallFailsWithErrno(EPERM)); + uinfo.si_code = -1; // SI_QUEUE (allowed). + EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), SyscallSucceeds()); + + // Join the child process. + EXPECT_THAT(waitpid(child, nullptr, 0), SyscallSucceeds()); +} + +TEST_F(RtSignalTest, ValueDelivered) { + siginfo_t uinfo; + uinfo.si_code = -1; // SI_QUEUE (allowed). + uinfo.si_errno = 0x1234; + + EXPECT_EQ(saved_info.si_errno, 0x0); + EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR1, &uinfo), SyscallSucceeds()); + EXPECT_TRUE(has_saved_info); + EXPECT_EQ(saved_info.si_errno, 0x1234); +} + +TEST_F(RtSignalTest, SignoMatch) { + auto action2_cleanup = ASSERT_NO_ERRNO_AND_VALUE(SetupSignalHandler(SIGUSR2)); + auto mask2_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGUSR2)); + + siginfo_t uinfo; + uinfo.si_code = -1; // SI_QUEUE (allowed). + + EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR1, &uinfo), SyscallSucceeds()); + EXPECT_TRUE(has_saved_info); + EXPECT_EQ(saved_info.si_signo, SIGUSR1); + + ClearSavedInfo(); + + EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR2, &uinfo), SyscallSucceeds()); + EXPECT_TRUE(has_saved_info); + EXPECT_EQ(saved_info.si_signo, SIGUSR2); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // These tests depend on delivering SIGUSR1/2 to the main thread (so they can + // synchronously check has_saved_info). Block these so that any other threads + // created by TestInit will also have them blocked. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + sigaddset(&set, SIGUSR2); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/sched.cc b/test/syscalls/linux/sched.cc new file mode 100644 index 000000000..60cb6c443 --- /dev/null +++ b/test/syscalls/linux/sched.cc @@ -0,0 +1,71 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sched.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// In linux, pid is limited to 29 bits because how futex is implemented. +constexpr int kImpossiblePID = (1 << 29) + 1; + +TEST(SchedGetparamTest, ReturnsZero) { + struct sched_param param; + EXPECT_THAT(sched_getparam(getpid(), ¶m), SyscallSucceeds()); + EXPECT_EQ(param.sched_priority, 0); + EXPECT_THAT(sched_getparam(/*pid=*/0, ¶m), SyscallSucceeds()); + EXPECT_EQ(param.sched_priority, 0); +} + +TEST(SchedGetparamTest, InvalidPIDReturnsEINVAL) { + struct sched_param param; + EXPECT_THAT(sched_getparam(/*pid=*/-1, ¶m), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SchedGetparamTest, ImpossiblePIDReturnsESRCH) { + struct sched_param param; + EXPECT_THAT(sched_getparam(kImpossiblePID, ¶m), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(SchedGetparamTest, NullParamReturnsEINVAL) { + EXPECT_THAT(sched_getparam(0, nullptr), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SchedGetschedulerTest, ReturnsSchedOther) { + EXPECT_THAT(sched_getscheduler(getpid()), + SyscallSucceedsWithValue(SCHED_OTHER)); + EXPECT_THAT(sched_getscheduler(/*pid=*/0), + SyscallSucceedsWithValue(SCHED_OTHER)); +} + +TEST(SchedGetschedulerTest, ReturnsEINVAL) { + EXPECT_THAT(sched_getscheduler(/*pid=*/-1), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SchedGetschedulerTest, ReturnsESRCH) { + EXPECT_THAT(sched_getscheduler(kImpossiblePID), SyscallFailsWithErrno(ESRCH)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sched_yield.cc b/test/syscalls/linux/sched_yield.cc new file mode 100644 index 000000000..fc45aa5c2 --- /dev/null +++ b/test/syscalls/linux/sched_yield.cc @@ -0,0 +1,33 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SchedYieldTest, Success) { + EXPECT_THAT(sched_yield(), SyscallSucceeds()); + EXPECT_THAT(sched_yield(), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc new file mode 100644 index 000000000..d6ac166a4 --- /dev/null +++ b/test/syscalls/linux/seccomp.cc @@ -0,0 +1,374 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <linux/audit.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <time.h> +#include <unistd.h> +#include <atomic> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/proc_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +#ifndef SYS_SECCOMP +#define SYS_SECCOMP 1 +#endif + +namespace gvisor { +namespace testing { + +namespace { + +// A syscall not implemented by Linux that we don't expect to be called. +constexpr uint32_t kFilteredSyscall = SYS_vserver; + +// Applies a seccomp-bpf filter that returns `filtered_result` for +// `sysno` and allows all other syscalls. Async-signal-safe. +void ApplySeccompFilter(uint32_t sysno, uint32_t filtered_result, + uint32_t flags = 0) { + // "Prior to [PR_SET_SECCOMP], the task must call prctl(PR_SET_NO_NEW_PRIVS, + // 1) or run with CAP_SYS_ADMIN privileges in its namespace." - + // Documentation/prctl/seccomp_filter.txt + // + // prctl(PR_SET_NO_NEW_PRIVS, 1) may be called repeatedly; calls after the + // first are no-ops. + TEST_PCHECK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0); + MaybeSave(); + + struct sock_filter filter[] = { + // A = seccomp_data.arch + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 4), + // if (A != AUDIT_ARCH_X86_64) goto kill + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4), + // A = seccomp_data.nr + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0), + // if (A != sysno) goto allow + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, sysno, 0, 1), + // return filtered_result + BPF_STMT(BPF_RET | BPF_K, filtered_result), + // allow: return SECCOMP_RET_ALLOW + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + // kill: return SECCOMP_RET_KILL + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog; + prog.len = ABSL_ARRAYSIZE(filter); + prog.filter = filter; + if (flags) { + TEST_CHECK(syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, flags, &prog) == + 0); + } else { + TEST_PCHECK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == 0); + } + MaybeSave(); +} + +// Wrapper for sigaction. Async-signal-safe. +void RegisterSignalHandler(int signum, + void (*handler)(int, siginfo_t*, void*)) { + struct sigaction sa = {}; + sa.sa_sigaction = handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + TEST_PCHECK(sigaction(signum, &sa, nullptr) == 0); + MaybeSave(); +} + +// All of the following tests execute in a subprocess to ensure that each test +// is run in a separate process. This avoids cross-contamination of seccomp +// state between tests, and is necessary to ensure that test processes killed +// by SECCOMP_RET_KILL are single-threaded (since SECCOMP_RET_KILL only kills +// the offending thread, not the whole thread group). + +TEST(SeccompTest, RetKillCausesDeathBySIGSYS) { + pid_t const pid = fork(); + if (pid == 0) { + // Register a signal handler for SIGSYS that we don't expect to be invoked. + RegisterSignalHandler(SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); + syscall(kFilteredSyscall); + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS) + << "status " << status; +} + +TEST(SeccompTest, RetKillOnlyKillsOneThread) { + Mapping stack = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + + pid_t const pid = fork(); + if (pid == 0) { + // Register a signal handler for SIGSYS that we don't expect to be invoked. + RegisterSignalHandler(SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); + // Pass CLONE_VFORK to block the original thread in the child process until + // the clone thread exits with SIGSYS. + // + // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's + // x86_64 implementation is safe. See glibc + // sysdeps/unix/sysv/linux/x86_64/clone.S. + clone( + +[](void* arg) { + syscall(kFilteredSyscall); // should kill the thread + _exit(1); // should be unreachable + return 2; // should be very unreachable, shut up the compiler + }, + stack.endptr(), + CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM | + CLONE_VFORK, + nullptr); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +TEST(SeccompTest, RetTrapCausesSIGSYS) { + pid_t const pid = fork(); + if (pid == 0) { + constexpr uint16_t kTrapValue = 0xdead; + RegisterSignalHandler(SIGSYS, +[](int signo, siginfo_t* info, void*) { + // This is a signal handler, so we must stay async-signal-safe. + TEST_CHECK(info->si_signo == SIGSYS); + TEST_CHECK(info->si_code == SYS_SECCOMP); + TEST_CHECK(info->si_errno == kTrapValue); + TEST_CHECK(info->si_call_addr != nullptr); + TEST_CHECK(info->si_syscall == kFilteredSyscall); + TEST_CHECK(info->si_arch == AUDIT_ARCH_X86_64); + _exit(0); + }); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRAP | kTrapValue); + syscall(kFilteredSyscall); + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; + +time_t vsyscall_time(time_t* t) { + return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t); +} + +TEST(SeccompTest, SeccompAppliesToVsyscall) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + pid_t const pid = fork(); + if (pid == 0) { + constexpr uint16_t kTrapValue = 0xdead; + RegisterSignalHandler(SIGSYS, +[](int signo, siginfo_t* info, void*) { + // This is a signal handler, so we must stay async-signal-safe. + TEST_CHECK(info->si_signo == SIGSYS); + TEST_CHECK(info->si_code == SYS_SECCOMP); + TEST_CHECK(info->si_errno == kTrapValue); + TEST_CHECK(info->si_call_addr != nullptr); + TEST_CHECK(info->si_syscall == SYS_time); + TEST_CHECK(info->si_arch == AUDIT_ARCH_X86_64); + _exit(0); + }); + ApplySeccompFilter(SYS_time, SECCOMP_RET_TRAP | kTrapValue); + vsyscall_time(nullptr); // Should result in death. + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +TEST(SeccompTest, RetTraceWithoutPtracerReturnsENOSYS) { + pid_t const pid = fork(); + if (pid == 0) { + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRACE); + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +TEST(SeccompTest, RetErrnoReturnsErrno) { + pid_t const pid = fork(); + if (pid == 0) { + // ENOTNAM: "Not a XENIX named type file" + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM); + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOTNAM); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +TEST(SeccompTest, RetAllowAllowsSyscall) { + pid_t const pid = fork(); + if (pid == 0) { + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ALLOW); + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +// This test will validate that TSYNC will apply to all threads. +TEST(SeccompTest, TsyncAppliesToAllThreads) { + Mapping stack = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + + // We don't want to apply this policy to other test runner threads, so fork. + const pid_t pid = fork(); + + if (pid == 0) { + // First check that we receive a ENOSYS before the policy is applied. + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS); + + // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's + // x86_64 implementation is safe. See glibc + // sysdeps/unix/sysv/linux/x86_64/clone.S. + clone( + +[](void* arg) { + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM, + SECCOMP_FILTER_FLAG_TSYNC); + return 0; + }, + stack.endptr(), + CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM | + CLONE_VFORK, + nullptr); + + // Because we're using CLONE_VFORK this thread will be blocked until + // the second thread has released resources to our virtual memory, since + // we're not execing that will happen on _exit. + + // Now verify that the policy applied to this thread too. + TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOTNAM); + _exit(0); + } + + ASSERT_THAT(pid, SyscallSucceeds()); + int status = 0; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +// This test will validate that seccomp(2) rejects unsupported flags. +TEST(SeccompTest, SeccompRejectsUnknownFlags) { + constexpr uint32_t kInvalidFlag = 123; + ASSERT_THAT( + syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, kInvalidFlag, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SeccompTest, LeastPermissiveFilterReturnValueApplies) { + // This is RetKillCausesDeathBySIGSYS, plus extra filters before and after the + // one that causes the kill that should be ignored. + pid_t const pid = fork(); + if (pid == 0) { + RegisterSignalHandler(SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRACE); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM); + syscall(kFilteredSyscall); + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS) + << "status " << status; +} + +// Passed as argv[1] to cause the test binary to invoke kFilteredSyscall and +// exit. Not a real flag since flag parsing happens during initialization, +// which may create threads. +constexpr char kInvokeFilteredSyscallFlag[] = "--seccomp_test_child"; + +TEST(SeccompTest, FiltersPreservedAcrossForkAndExecve) { + ExecveArray const grandchild_argv( + {"/proc/self/exe", kInvokeFilteredSyscallFlag}); + + pid_t const pid = fork(); + if (pid == 0) { + ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); + pid_t const grandchild_pid = fork(); + if (grandchild_pid == 0) { + execve(grandchild_argv.get()[0], grandchild_argv.get(), + /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "execve failed"); + } + int status; + TEST_PCHECK(waitpid(grandchild_pid, &status, 0) == grandchild_pid); + TEST_CHECK(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS); + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status " << status; +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + if (argc >= 2 && + strcmp(argv[1], gvisor::testing::kInvokeFilteredSyscallFlag) == 0) { + syscall(gvisor::testing::kFilteredSyscall); + exit(0); + } + + gvisor::testing::TestInit(&argc, &argv); + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/select.cc b/test/syscalls/linux/select.cc new file mode 100644 index 000000000..6b6fa9217 --- /dev/null +++ b/test/syscalls/linux/select.cc @@ -0,0 +1,128 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <limits.h> +#include <signal.h> +#include <sys/select.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/base_poll_test.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class SelectTest : public BasePollTest { + protected: + void SetUp() override { BasePollTest::SetUp(); } + void TearDown() override { BasePollTest::TearDown(); } +}; + +// See that when there are no FD sets, select behaves like sleep. +TEST_F(SelectTest, NullFds) { + struct timeval timeout = absl::ToTimeval(absl::Milliseconds(10)); + ASSERT_THAT(select(0, nullptr, nullptr, nullptr, &timeout), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_usec, 0); + + timeout = absl::ToTimeval(absl::Milliseconds(10)); + ASSERT_THAT(select(1, nullptr, nullptr, nullptr, &timeout), + SyscallSucceeds()); + EXPECT_EQ(timeout.tv_sec, 0); + EXPECT_EQ(timeout.tv_usec, 0); +} + +TEST_F(SelectTest, NegativeNfds) { + EXPECT_THAT(select(-1, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(select(-100000, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(select(INT_MIN, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(SelectTest, ClosedFds) { + fd_set read_set; + FD_ZERO(&read_set); + int fd; + ASSERT_THAT(fd = dup(1), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + FD_SET(fd, &read_set); + struct timeval timeout = absl::ToTimeval(absl::Milliseconds(10)); + EXPECT_THAT(select(fd + 1, &read_set, nullptr, nullptr, &timeout), + SyscallFailsWithErrno(EBADF)); +} + +TEST_F(SelectTest, ZeroTimeout) { + struct timeval timeout = {}; + EXPECT_THAT(select(1, nullptr, nullptr, nullptr, &timeout), + SyscallSucceeds()); + // Ignore timeout as its value is now undefined. +} + +// If random S/R interrupts the select, SIGALRM may be delivered before select +// restarts, causing the select to hang forever. +TEST_F(SelectTest, NoTimeout_NoRandomSave) { + // When there's no timeout, select may never return so set a timer. + SetTimer(absl::Milliseconds(100)); + // See that we get interrupted by the timer. + ASSERT_THAT(select(1, nullptr, nullptr, nullptr, nullptr), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); +} + +TEST_F(SelectTest, InvalidTimeoutNegative) { + struct timeval timeout = absl::ToTimeval(absl::Microseconds(-1)); + EXPECT_THAT(select(1, nullptr, nullptr, nullptr, &timeout), + SyscallFailsWithErrno(EINVAL)); + // Ignore timeout as its value is now undefined. +} + +// Verify that a signal interrupts select. +// +// If random S/R interrupts the select, SIGALRM may be delivered before select +// restarts, causing the select to hang forever. +TEST_F(SelectTest, InterruptedBySignal_NoRandomSave) { + absl::Duration duration(absl::Seconds(5)); + struct timeval timeout = absl::ToTimeval(duration); + SetTimer(absl::Milliseconds(100)); + ASSERT_FALSE(TimerFired()); + ASSERT_THAT(select(1, nullptr, nullptr, nullptr, &timeout), + SyscallFailsWithErrno(EINTR)); + EXPECT_TRUE(TimerFired()); + // Ignore timeout as its value is now undefined. +} + +TEST_F(SelectTest, IgnoreBitsAboveNfds) { + // fd_set is a bit array with at least FD_SETSIZE bits. Test that bits + // corresponding to file descriptors above nfds are ignored. + fd_set read_set; + FD_ZERO(&read_set); + constexpr int kNfds = 1; + for (int fd = kNfds; fd < FD_SETSIZE; fd++) { + FD_SET(fd, &read_set); + } + // Pass a zero timeout so that select returns immediately. + struct timeval timeout = {}; + EXPECT_THAT(select(kNfds, &read_set, nullptr, nullptr, &timeout), + SyscallSucceedsWithValue(0)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/semaphore.cc b/test/syscalls/linux/semaphore.cc new file mode 100644 index 000000000..12e33732d --- /dev/null +++ b/test/syscalls/linux/semaphore.cc @@ -0,0 +1,438 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/ipc.h> +#include <sys/sem.h> +#include <sys/types.h> +#include <atomic> +#include <cerrno> +#include <ctime> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/synchronization/mutex.h" +#include "absl/time/clock.h" +#include "test/util/capability_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class AutoSem { + public: + explicit AutoSem(int id) : id_(id) {} + ~AutoSem() { + if (id_ >= 0) { + EXPECT_THAT(semctl(id_, 0, IPC_RMID), SyscallSucceeds()); + } + } + + int release() { + int old = id_; + id_ = -1; + return old; + } + + int get() { return id_; } + + private: + int id_ = -1; +}; + +TEST(SemaphoreTest, SemGet) { + // Test creation and lookup. + AutoSem sem(semget(1, 10, IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + EXPECT_THAT(semget(1, 10, IPC_CREAT), SyscallSucceedsWithValue(sem.get())); + EXPECT_THAT(semget(1, 9, IPC_CREAT), SyscallSucceedsWithValue(sem.get())); + + // Creation and lookup failure cases. + EXPECT_THAT(semget(1, 11, IPC_CREAT), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(semget(1, -1, IPC_CREAT), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(semget(1, 10, IPC_CREAT | IPC_EXCL), + SyscallFailsWithErrno(EEXIST)); + EXPECT_THAT(semget(2, 1, 0), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(semget(2, 0, IPC_CREAT), SyscallFailsWithErrno(EINVAL)); + + // Private semaphores never conflict. + AutoSem sem2(semget(IPC_PRIVATE, 1, 0)); + AutoSem sem3(semget(IPC_PRIVATE, 1, 0)); + ASSERT_THAT(sem2.get(), SyscallSucceeds()); + EXPECT_NE(sem.get(), sem2.get()); + ASSERT_THAT(sem3.get(), SyscallSucceeds()); + EXPECT_NE(sem3.get(), sem2.get()); +} + +// Tests simple operations that shouldn't block in a single-thread. +TEST(SemaphoreTest, SemOpSingleNoBlock) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + struct sembuf buf = {}; + buf.sem_op = 1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + + buf.sem_op = -1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + + buf.sem_op = 0; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + + // Error cases with invalid values. + ASSERT_THAT(semop(sem.get() + 1, &buf, 1), SyscallFailsWithErrno(EINVAL)); + + buf.sem_num = 1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EFBIG)); + + ASSERT_THAT(semop(sem.get(), nullptr, 0), SyscallFailsWithErrno(EINVAL)); +} + +// Tests multiple operations that shouldn't block in a single-thread. +TEST(SemaphoreTest, SemOpMultiNoBlock) { + AutoSem sem(semget(IPC_PRIVATE, 4, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + struct sembuf bufs[5] = {}; + bufs[0].sem_num = 0; + bufs[0].sem_op = 10; + bufs[0].sem_flg = 0; + + bufs[1].sem_num = 1; + bufs[1].sem_op = 2; + bufs[1].sem_flg = 0; + + bufs[2].sem_num = 2; + bufs[2].sem_op = 3; + bufs[2].sem_flg = 0; + + bufs[3].sem_num = 0; + bufs[3].sem_op = -5; + bufs[3].sem_flg = 0; + + bufs[4].sem_num = 2; + bufs[4].sem_op = 2; + bufs[4].sem_flg = 0; + + ASSERT_THAT(semop(sem.get(), bufs, ABSL_ARRAYSIZE(bufs)), SyscallSucceeds()); + + ASSERT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(5)); + ASSERT_THAT(semctl(sem.get(), 1, GETVAL), SyscallSucceedsWithValue(2)); + ASSERT_THAT(semctl(sem.get(), 2, GETVAL), SyscallSucceedsWithValue(5)); + ASSERT_THAT(semctl(sem.get(), 3, GETVAL), SyscallSucceedsWithValue(0)); + + for (auto& b : bufs) { + b.sem_op = -b.sem_op; + } + // 0 and 3 order must be reversed, otherwise it will block. + std::swap(bufs[0].sem_op, bufs[3].sem_op); + ASSERT_THAT(RetryEINTR(semop)(sem.get(), bufs, ABSL_ARRAYSIZE(bufs)), + SyscallSucceeds()); + + // All semaphores should be back to 0 now. + for (size_t i = 0; i < 4; ++i) { + ASSERT_THAT(semctl(sem.get(), i, GETVAL), SyscallSucceedsWithValue(0)); + } +} + +// Makes a best effort attempt to ensure that operation would block. +TEST(SemaphoreTest, SemOpBlock) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + std::atomic<int> blocked = ATOMIC_VAR_INIT(1); + ScopedThread th([&sem, &blocked] { + absl::SleepFor(absl::Milliseconds(100)); + ASSERT_EQ(blocked.load(), 1); + + struct sembuf buf = {}; + buf.sem_op = 1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + }); + + struct sembuf buf = {}; + buf.sem_op = -1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + blocked.store(0); +} + +// Tests that IPC_NOWAIT returns with no wait. +TEST(SemaphoreTest, SemOpNoBlock) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + struct sembuf buf = {}; + buf.sem_flg = IPC_NOWAIT; + + buf.sem_op = -1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EAGAIN)); + + buf.sem_op = 1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + + buf.sem_op = 0; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EAGAIN)); +} + +// Test runs 2 threads, one signals the other waits the same number of times. +TEST(SemaphoreTest, SemOpSimple) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + constexpr size_t kLoops = 100; + ScopedThread th([&sem] { + struct sembuf buf = {}; + buf.sem_op = 1; + for (size_t i = 0; i < kLoops; i++) { + // Sleep to prevent making all increments in one shot without letting + // the waiter wait. + absl::SleepFor(absl::Milliseconds(1)); + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + } + }); + + struct sembuf buf = {}; + buf.sem_op = -1; + for (size_t i = 0; i < kLoops; i++) { + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + } +} + +// Tests that semaphore can be removed while there are waiters. +// NoRandomSave: Test relies on timing that random save throws off. +TEST(SemaphoreTest, SemOpRemoveWithWaiter_NoRandomSave) { + AutoSem sem(semget(IPC_PRIVATE, 2, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + ScopedThread th([&sem] { + absl::SleepFor(absl::Milliseconds(250)); + ASSERT_THAT(semctl(sem.release(), 0, IPC_RMID), SyscallSucceeds()); + }); + + // This must happen before IPC_RMID runs above. Otherwise it fails with EINVAL + // instead because the semaphire has already been removed. + struct sembuf buf = {}; + buf.sem_op = -1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), + SyscallFailsWithErrno(EIDRM)); +} + +// Semaphore isn't fair. It will execute any waiter that can satisfy the +// request even if it gets in front of other waiters. +TEST(SemaphoreTest, SemOpBestFitExecution) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + ScopedThread th([&sem] { + struct sembuf buf = {}; + buf.sem_op = -2; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallFails()); + // Ensure that wait will only unblock when the semaphore is removed. On + // EINTR retry it may race with deletion and return EINVAL. + ASSERT_TRUE(errno == EIDRM || errno == EINVAL) << "errno=" << errno; + }); + + // Ensures that '-1' below will unblock even though '-10' above is waiting + // for the same semaphore. + for (size_t i = 0; i < 10; ++i) { + struct sembuf buf = {}; + buf.sem_op = 1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + + absl::SleepFor(absl::Milliseconds(10)); + + buf.sem_op = -1; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + } + + ASSERT_THAT(semctl(sem.release(), 0, IPC_RMID), SyscallSucceeds()); +} + +// Executes random operations in multiple threads and verify correctness. +TEST(SemaphoreTest, SemOpRandom) { + // Don't do cooperative S/R tests because there are too many syscalls in + // this test, + const DisableSave ds; + + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + // Protects the seed below. + absl::Mutex mutex; + uint32_t seed = time(nullptr); + + int count = 0; // Tracks semaphore value. + bool done = false; // Tells waiters to stop after signal threads are done. + + // These threads will wait in a loop. + std::unique_ptr<ScopedThread> decs[5]; + for (auto& dec : decs) { + dec = absl::make_unique<ScopedThread>([&sem, &mutex, &count, &seed, &done] { + for (size_t i = 0; i < 500; ++i) { + int16_t val; + { + absl::MutexLock l(&mutex); + if (done) { + return; + } + val = (rand_r(&seed) % 10 + 1); // Rand between 1 and 10. + count -= val; + } + struct sembuf buf = {}; + buf.sem_op = -val; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + absl::SleepFor(absl::Milliseconds(val * 2)); + } + }); + } + + // These threads will wait for zero in a loop. + std::unique_ptr<ScopedThread> zeros[5]; + for (auto& zero : zeros) { + zero = absl::make_unique<ScopedThread>([&sem, &mutex, &done] { + for (size_t i = 0; i < 500; ++i) { + { + absl::MutexLock l(&mutex); + if (done) { + return; + } + } + struct sembuf buf = {}; + buf.sem_op = 0; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + absl::SleepFor(absl::Milliseconds(10)); + } + }); + } + + // These threads will signal in a loop. + std::unique_ptr<ScopedThread> incs[5]; + for (auto& inc : incs) { + inc = absl::make_unique<ScopedThread>([&sem, &mutex, &count, &seed] { + for (size_t i = 0; i < 500; ++i) { + int16_t val; + { + absl::MutexLock l(&mutex); + val = (rand_r(&seed) % 10 + 1); // Rand between 1 and 10. + count += val; + } + struct sembuf buf = {}; + buf.sem_op = val; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + absl::SleepFor(absl::Milliseconds(val * 2)); + } + }); + } + + // First wait for signal threads to be done. + for (auto& inc : incs) { + inc->Join(); + } + + // Now there could be waiters blocked (remember operations are random). + // Notify waiters that we're done and signal semaphore just the right amount. + { + absl::MutexLock l(&mutex); + done = true; + struct sembuf buf = {}; + buf.sem_op = -count; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds()); + } + + // Now all waiters should unblock and exit. + for (auto& dec : decs) { + dec->Join(); + } + for (auto& zero : zeros) { + zero->Join(); + } +} + +TEST(SemaphoreTest, SemOpNamespace) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + AutoSem sem(semget(123, 1, 0600 | IPC_CREAT | IPC_EXCL)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + ScopedThread([]() { + EXPECT_THAT(unshare(CLONE_NEWIPC), SyscallSucceeds()); + AutoSem sem(semget(123, 1, 0600 | IPC_CREAT | IPC_EXCL)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + }); +} + +TEST(SemaphoreTest, SemCtlVal) { + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + // Semaphore must start with 0. + EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(0)); + + // Increase value and ensure waiters are woken up. + ScopedThread th([&sem] { + struct sembuf buf = {}; + buf.sem_op = -10; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + }); + + ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 9), SyscallSucceeds()); + EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(9)); + + ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 20), SyscallSucceeds()); + const int value = semctl(sem.get(), 0, GETVAL); + // 10 or 20 because it could have raced with waiter above. + EXPECT_TRUE(value == 10 || value == 20) << "value=" << value; + th.Join(); + + // Set it back to 0 and ensure that waiters are woken up. + ScopedThread thZero([&sem] { + struct sembuf buf = {}; + buf.sem_op = 0; + ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds()); + }); + ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 0), SyscallSucceeds()); + EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(0)); + thZero.Join(); +} + +TEST(SemaphoreTest, SemIpcSet) { + // Drop CAP_IPC_OWNER which allows us to bypass semaphore permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_OWNER, false)); + + AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT)); + ASSERT_THAT(sem.get(), SyscallSucceeds()); + + struct semid_ds semid = {}; + semid.sem_perm.uid = getuid(); + semid.sem_perm.gid = getgid(); + + // Make semaphore readonly and check that signal fails. + semid.sem_perm.mode = 0400; + EXPECT_THAT(semctl(sem.get(), 0, IPC_SET, &semid), SyscallSucceeds()); + struct sembuf buf = {}; + buf.sem_op = 1; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EACCES)); + + // Make semaphore writeonly and check that wait for zero fails. + semid.sem_perm.mode = 0200; + EXPECT_THAT(semctl(sem.get(), 0, IPC_SET, &semid), SyscallSucceeds()); + buf.sem_op = 0; + ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EACCES)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc new file mode 100644 index 000000000..92b7b9478 --- /dev/null +++ b/test/syscalls/linux/sendfile.cc @@ -0,0 +1,409 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/sendfile.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SendFileTest, SendZeroBytes) { + // Create temp files. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, 0), + SyscallSucceedsWithValue(0)); +} + +TEST(SendFileTest, SendTrivially) { + // Create temp files. + constexpr char kData[] = "To be, or not to be, that is the question:"; + constexpr int kDataSize = sizeof(kData) - 1; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT(bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallSucceedsWithValue(kDataSize)); + + // Close outf to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kDataSize)); + EXPECT_EQ(kData, absl::string_view(actual, bytes_sent)); +} + +TEST(SendFileTest, SendTriviallyWithBothFilesReadWrite) { + // Create temp files. + constexpr char kData[] = "Whether 'tis nobler in the mind to suffer"; + constexpr int kDataSize = sizeof(kData) - 1; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as readwrite. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR)); + + // Open the output file as readwrite. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT(bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallSucceedsWithValue(kDataSize)); + + // Close outf to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kDataSize)); + EXPECT_EQ(kData, absl::string_view(actual, bytes_sent)); +} + +TEST(SendFileTest, SendAndUpdateFileOffset) { + // Create temp files. + // Test input std::string length must be > 2 AND even. + constexpr char kData[] = "The slings and arrows of outrageous fortune,"; + constexpr int kDataSize = sizeof(kData) - 1; + constexpr int kHalfDataSize = kDataSize / 2; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + + // Close outf to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kHalfDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ(absl::string_view(kData, kHalfDataSize), + absl::string_view(actual, bytes_sent)); + + // Verify that the input file offset has been updated + ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ( + absl::string_view(kData + kDataSize - bytes_sent, kDataSize - bytes_sent), + absl::string_view(actual, kHalfDataSize)); +} + +TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) { + // Create temp files. + // Test input std::string length must be > 2 AND divisible by 4. + constexpr char kData[] = "The slings and arrows of outrageous fortune,"; + constexpr int kDataSize = sizeof(kData) - 1; + constexpr int kHalfDataSize = kDataSize / 2; + constexpr int kQuarterDataSize = kHalfDataSize / 2; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Read a quarter of the data from the infile which should update the file + // offset, we don't actually care about the data so it goes into the garbage. + char garbage[kQuarterDataSize]; + ASSERT_THAT(read(inf.get(), &garbage, kQuarterDataSize), + SyscallSucceedsWithValue(kQuarterDataSize)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + + // Close out_fd to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kHalfDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize), + absl::string_view(actual, bytes_sent)); + + // Verify that the input file offset has been updated + ASSERT_THAT(read(inf.get(), &actual, kQuarterDataSize), + SyscallSucceedsWithValue(kQuarterDataSize)); + + EXPECT_EQ( + absl::string_view(kData + kDataSize - kQuarterDataSize, kQuarterDataSize), + absl::string_view(actual, kQuarterDataSize)); +} + +TEST(SendFileTest, SendAndUpdateGivenOffset) { + // Create temp files. + // Test input std::string length must be >= 4 AND divisible by 4. + constexpr char kData[] = "Or to take Arms against a Sea of troubles,"; + constexpr int kDataSize = sizeof(kData) + 1; + constexpr int kHalfDataSize = kDataSize / 2; + constexpr int kQuarterDataSize = kHalfDataSize / 2; + constexpr int kThreeFourthsDataSize = 3 * kDataSize / 4; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Create offset for sending. + off_t offset = kQuarterDataSize; + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), &offset, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + + // Close out_fd to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kHalfDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize), + absl::string_view(actual, bytes_sent)); + + // Verify that the input file offset has NOT been updated. + ASSERT_THAT(read(inf.get(), &actual, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ(absl::string_view(kData, kHalfDataSize), + absl::string_view(actual, kHalfDataSize)); + + // Verify that the offset pointer has been updated. + EXPECT_EQ(offset, kThreeFourthsDataSize); +} + +TEST(SendFileTest, DoNotSendfileIfOutfileIsAppendOnly) { + // Create temp files. + constexpr char kData[] = "And by opposing end them: to die, to sleep"; + constexpr int kDataSize = sizeof(kData) - 1; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as append only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_APPEND)); + + // Send data and verify that sendfile returns the correct errno. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallFailsWithErrno(EBADF)); +} + +TEST(SendFileTest, DoNotSendfileIfOutfileIsNotWritable) { + // Create temp files. + constexpr char kData[] = "No more; and by a sleep, to say we end"; + constexpr int kDataSize = sizeof(kData) - 1; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as read only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Send data and verify that sendfile returns the correct errno. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallFailsWithErrno(EBADF)); +} + +TEST(SendFileTest, DoNotSendfileIfInfileIsNotReadable) { + // Create temp files. + constexpr char kData[] = "the heart-ache, and the thousand natural shocks"; + constexpr int kDataSize = sizeof(kData) - 1; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as write only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_WRONLY)); + + // Open the output file as write only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct errno. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize), + SyscallFailsWithErrno(EBADF)); +} + +TEST(SendFileTest, DoNotSendANegativeNumberOfBytes) { + // Create temp files. + constexpr char kData[] = "that Flesh is heir to? 'Tis a consummation"; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct errno. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, -1), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SendFileTest, SendTheCorrectNumberOfBytesEvenIfWeTryToSendTooManyBytes) { + // Create temp files. + constexpr char kData[] = "devoutly to be wished. To die, to sleep,"; + constexpr int kDataSize = sizeof(kData) - 1; + + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file as write only. + FileDescriptor outf; + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize + 100), + SyscallSucceedsWithValue(kDataSize)); + + // Close outf to avoid leak. + outf.reset(); + + // Open the output file as read only. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + + // Verify that the output file has the correct data. + char actual[kDataSize]; + ASSERT_THAT(read(outf.get(), &actual, bytes_sent), + SyscallSucceedsWithValue(kDataSize)); + EXPECT_EQ(kData, absl::string_view(actual, bytes_sent)); +} + +TEST(SendFileTest, SendToNotARegularFile) { + // Make temp input directory and open as read only. + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY)); + + // Make temp output file and open as write only. + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Receive an error since a directory is not a regular file. + EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, 0), + SyscallFailsWithErrno(EINVAL)); +} +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sendfile_socket.cc b/test/syscalls/linux/sendfile_socket.cc new file mode 100644 index 000000000..7010dc211 --- /dev/null +++ b/test/syscalls/linux/sendfile_socket.cc @@ -0,0 +1,156 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/sendfile.h> +#include <sys/socket.h> +#include <unistd.h> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { +namespace { + +// Sends large file to exercise the path that read and writes data multiple +// times, esp. when more data is read than can be written. +TEST(SendFileTest, SendMultiple) { + std::vector<char> data(5 * 1024 * 1024); + RandomizeBuffer(data.data(), data.size()); + + // Create temp files. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::string_view(data.data(), data.size()), + TempPath::kDefaultFileMode)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + // Use a socket for target file to make the write window small. + const FileDescriptor server(socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT(server.get(), SyscallSucceeds()); + + struct sockaddr_in server_addr = {}; + server_addr.sin_family = AF_INET; + server_addr.sin_addr.s_addr = INADDR_ANY; + ASSERT_THAT( + bind(server.get(), reinterpret_cast<struct sockaddr *>(&server_addr), + sizeof(server_addr)), + SyscallSucceeds()); + ASSERT_THAT(listen(server.get(), 1), SyscallSucceeds()); + + // Thread that reads data from socket and dumps to a file. + ScopedThread th([&server, &out_file, &server_addr] { + socklen_t addrlen = sizeof(server_addr); + const FileDescriptor fd(RetryEINTR(accept)( + server.get(), reinterpret_cast<struct sockaddr *>(&server_addr), + &addrlen)); + ASSERT_THAT(fd.get(), SyscallSucceeds()); + + FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Read until socket is closed. + char buf[10240]; + for (int cnt = 0;; cnt++) { + int r = RetryEINTR(read)(fd.get(), buf, sizeof(buf)); + // We cannot afford to save on every read() call. + if (cnt % 1000 == 0) { + ASSERT_THAT(r, SyscallSucceeds()); + } else { + const DisableSave ds; + ASSERT_THAT(r, SyscallSucceeds()); + } + if (r == 0) { + // EOF + break; + } + int w = RetryEINTR(write)(outf.get(), buf, r); + // We cannot afford to save on every write() call. + if (cnt % 1010 == 0) { + ASSERT_THAT(w, SyscallSucceedsWithValue(r)); + } else { + const DisableSave ds; + ASSERT_THAT(w, SyscallSucceedsWithValue(r)); + } + } + }); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + FileDescriptor outf(socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT(outf.get(), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = sizeof(server_addr); + ASSERT_THAT(getsockname(server.get(), + reinterpret_cast<sockaddr *>(&server_addr), &addrlen), + SyscallSucceeds()); + + struct sockaddr_in addr = {}; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + addr.sin_port = server_addr.sin_port; + LOG(INFO) << "Connecting on port=" << server_addr.sin_port; + ASSERT_THAT( + RetryEINTR(connect)( + outf.get(), reinterpret_cast<struct sockaddr *>(&addr), sizeof(addr)), + SyscallSucceeds()); + + int cnt = 0; + for (size_t sent = 0; sent < data.size(); cnt++) { + const size_t remain = data.size() - sent; + LOG(INFO) << "sendfile, size=" << data.size() << ", sent=" << sent + << ", remain=" << remain; + + // Send data and verify that sendfile returns the correct value. + int res = sendfile(outf.get(), inf.get(), nullptr, remain); + // We cannot afford to save on every sendfile() call. + if (cnt % 120 == 0) { + MaybeSave(); + } + if (res == 0) { + // EOF + break; + } + if (res > 0) { + sent += res; + } else { + ASSERT_TRUE(errno == EINTR || errno == EAGAIN) << "errno=" << errno; + } + } + + // Close socket to stop thread. + outf.reset(); + th.Join(); + + // Verify that the output file has the correct data. + outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY)); + std::vector<char> actual(data.size(), '\0'); + ASSERT_THAT(RetryEINTR(read)(outf.get(), actual.data(), actual.size()), + SyscallSucceedsWithValue(actual.size())); + ASSERT_EQ(memcmp(data.data(), actual.data(), data.size()), 0); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc new file mode 100644 index 000000000..9f57476c9 --- /dev/null +++ b/test/syscalls/linux/shm.cc @@ -0,0 +1,445 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> + +#include <sys/ipc.h> +#include <sys/mman.h> +#include <sys/shm.h> +#include <sys/types.h> + +#include "absl/time/clock.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +using ::testing::_; + +const uint64_t kAllocSize = kPageSize * 128ULL; + +PosixErrorOr<int> Shmget(key_t key, size_t size, int shmflg) { + int id = shmget(key, size, shmflg); + if (id == -1) { + return PosixError(errno, "shmget() failed"); + } + return id; +} + +PosixErrorOr<char*> Shmat(int shmid, const void* shmaddr, int shmflg) { + const intptr_t addr = + reinterpret_cast<intptr_t>(shmat(shmid, shmaddr, shmflg)); + if (addr == -1) { + return PosixError(errno, "shmat() failed"); + } + return reinterpret_cast<char*>(addr); +} + +PosixError Shmdt(const char* shmaddr) { + const int ret = shmdt(shmaddr); + if (ret == -1) { + return PosixError(errno, "shmdt() failed"); + } + return NoError(); +} + +template <typename T> +PosixErrorOr<int> Shmctl(int shmid, int cmd, T* buf) { + int ret = shmctl(shmid, cmd, reinterpret_cast<struct shmid_ds*>(buf)); + if (ret == -1) { + return PosixError(errno, "shmctl() failed"); + } + return ret; +} + +TEST(ShmTest, AttachDetach) { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_segsz, kAllocSize); + EXPECT_EQ(attr.shm_nattch, 0); + + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_nattch, 1); + + const char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_nattch, 2); + + ASSERT_NO_ERRNO(Shmdt(addr)); + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_nattch, 1); + + ASSERT_NO_ERRNO(Shmdt(addr2)); + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + EXPECT_EQ(attr.shm_nattch, 0); +} + +TEST(ShmTest, LookupByKey) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + const int id = + ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777)); + const int id2 = ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, 0777)); + EXPECT_EQ(id, id2); +} + +TEST(ShmTest, DetachedSegmentsPersist) { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + addr[0] = 'x'; + ASSERT_NO_ERRNO(Shmdt(addr)); + + // We should be able to re-attach to the same segment and get our data back. + addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + EXPECT_EQ(addr[0], 'x'); + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +TEST(ShmTest, MultipleDetachFails) { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + ASSERT_NO_ERRNO(Shmdt(addr)); + EXPECT_THAT(Shmdt(addr), PosixErrorIs(EINVAL, _)); +} + +TEST(ShmTest, IpcStat) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + + const time_t start = time(nullptr); + + const int id = + ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777)); + + const uid_t uid = getuid(); + const gid_t gid = getgid(); + const pid_t pid = getpid(); + + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + + EXPECT_EQ(attr.shm_perm.__key, key); + EXPECT_EQ(attr.shm_perm.uid, uid); + EXPECT_EQ(attr.shm_perm.gid, gid); + EXPECT_EQ(attr.shm_perm.cuid, uid); + EXPECT_EQ(attr.shm_perm.cgid, gid); + EXPECT_EQ(attr.shm_perm.mode, 0777); + + EXPECT_EQ(attr.shm_segsz, kAllocSize); + + EXPECT_EQ(attr.shm_atime, 0); + EXPECT_EQ(attr.shm_dtime, 0); + + // Change time is set on creation. + EXPECT_GE(attr.shm_ctime, start); + + EXPECT_EQ(attr.shm_cpid, pid); + EXPECT_EQ(attr.shm_lpid, 0); + + EXPECT_EQ(attr.shm_nattch, 0); + + // The timestamps only have a resolution of seconds; slow down so we actually + // see the timestamps change. + absl::SleepFor(absl::Seconds(1)); + const time_t pre_attach = time(nullptr); + + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + + EXPECT_GE(attr.shm_atime, pre_attach); + EXPECT_EQ(attr.shm_dtime, 0); + EXPECT_LT(attr.shm_ctime, pre_attach); + EXPECT_EQ(attr.shm_lpid, pid); + EXPECT_EQ(attr.shm_nattch, 1); + + absl::SleepFor(absl::Seconds(1)); + const time_t pre_detach = time(nullptr); + + ASSERT_NO_ERRNO(Shmdt(addr)); + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + + EXPECT_LT(attr.shm_atime, pre_detach); + EXPECT_GE(attr.shm_dtime, pre_detach); + EXPECT_LT(attr.shm_ctime, pre_detach); + EXPECT_EQ(attr.shm_lpid, pid); + EXPECT_EQ(attr.shm_nattch, 0); +} + +TEST(ShmTest, ShmStat) { + // This test relies on the segment we create to be the first one on the + // system, causing it to occupy slot 1. We can't reasonably expect this on a + // general Linux host. + SKIP_IF(!IsRunningOnGvisor()); + + ASSERT_NO_ERRNO(Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(1, SHM_STAT, &attr)); + // This does the same thing as IPC_STAT, so only test that the syscall + // succeeds here. +} + +TEST(ShmTest, IpcInfo) { + struct shminfo info; + ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info)); + + EXPECT_EQ(info.shmmin, 1); // This is always 1, according to the man page. + EXPECT_GT(info.shmmax, info.shmmin); + EXPECT_GT(info.shmmni, 0); + EXPECT_GT(info.shmseg, 0); + EXPECT_GT(info.shmall, 0); +} + +TEST(ShmTest, ShmInfo) { + struct shm_info info; + + // We generally can't know what other processes on a linux machine + // does with shared memory segments, so we can't test specific + // numbers on Linux. When running under gvisor, we're guaranteed to + // be the only ones using shm, so we can easily verify machine-wide + // numbers. + if (IsRunningOnGvisor()) { + ASSERT_NO_ERRNO(Shmctl(0, SHM_INFO, &info)); + EXPECT_EQ(info.used_ids, 0); + EXPECT_EQ(info.shm_tot, 0); + EXPECT_EQ(info.shm_rss, 0); + EXPECT_EQ(info.shm_swp, 0); + } + + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + + ASSERT_NO_ERRNO(Shmctl(1, SHM_INFO, &info)); + + if (IsRunningOnGvisor()) { + ASSERT_NO_ERRNO(Shmctl(id, SHM_INFO, &info)); + EXPECT_EQ(info.used_ids, 1); + EXPECT_EQ(info.shm_tot, kAllocSize / kPageSize); + EXPECT_EQ(info.shm_rss, kAllocSize / kPageSize); + EXPECT_EQ(info.shm_swp, 0); // Gvisor currently never swaps. + } + + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +TEST(ShmTest, ShmCtlSet) { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + ASSERT_EQ(attr.shm_perm.mode, 0777); + + attr.shm_perm.mode = 0766; + ASSERT_NO_ERRNO(Shmctl(id, IPC_SET, &attr)); + + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + ASSERT_EQ(attr.shm_perm.mode, 0766); + + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +TEST(ShmTest, RemovedSegmentsAreMarkedDeleted) { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr)); + struct shmid_ds attr; + ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr)); + EXPECT_NE(attr.shm_perm.mode & SHM_DEST, 0); + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +TEST(ShmTest, RemovedSegmentsAreDestroyed) { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + + const uint64_t alloc_pages = kAllocSize / kPageSize; + + struct shm_info info; + ASSERT_NO_ERRNO(Shmctl(1, SHM_INFO, &info)); + const uint64_t before = info.shm_tot; + + ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr)); + ASSERT_NO_ERRNO(Shmdt(addr)); + + ASSERT_NO_ERRNO(Shmctl(1, SHM_INFO, &info)); + const uint64_t after = info.shm_tot; + EXPECT_EQ(after, before - alloc_pages); +} + +TEST(ShmTest, AllowsAttachToRemovedSegmentWithRefs) { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr)); + const char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + ASSERT_NO_ERRNO(Shmdt(addr)); + ASSERT_NO_ERRNO(Shmdt(addr2)); +} + +TEST(ShmTest, RemovedSegmentsAreNotDiscoverable) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + const int id = + ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777)); + ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr)); + EXPECT_THAT(Shmget(key, kAllocSize, 0777), PosixErrorIs(ENOENT, _)); +} + +TEST(ShmDeathTest, ReadonlySegment) { + SetupGvisorDeathTest(); + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, SHM_RDONLY)); + // Reading succeeds. + static_cast<void>(addr[0]); + // Writing fails. + EXPECT_EXIT(addr[0] = 'x', ::testing::KilledBySignal(SIGSEGV), ""); +} + +TEST(ShmDeathTest, SegmentNotAccessibleAfterDetach) { + // This test is susceptible to races with concurrent mmaps running in parallel + // gtest threads since the test relies on the address freed during a shm + // segment destruction to remain unused. We run the test body in a forked + // child to guarantee a single-threaded context to avoid this. + + SetupGvisorDeathTest(); + + const auto rest = [&] { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + addr[0] = 'x'; + ASSERT_NO_ERRNO(Shmdt(addr)); + + // This access should cause a SIGSEGV. + addr[0] = 'x'; + }; + + EXPECT_THAT(InForkedProcess(rest), + IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV))); +} + +TEST(ShmTest, RequestingSegmentSmallerThanSHMMINFails) { + struct shminfo info; + ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info)); + const uint64_t size = info.shmmin - 1; + EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777), + PosixErrorIs(EINVAL, _)); +} + +TEST(ShmTest, RequestingSegmentLargerThanSHMMAXFails) { + struct shminfo info; + ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info)); + const uint64_t size = info.shmmax + kPageSize; + EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777), + PosixErrorIs(EINVAL, _)); +} + +TEST(ShmTest, RequestingUnalignedSizeSucceeds) { + EXPECT_NO_ERRNO(Shmget(IPC_PRIVATE, 4097, IPC_CREAT | 0777)); +} + +TEST(ShmTest, RequestingDuplicateCreationFails) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + ASSERT_NO_ERRNO_AND_VALUE( + Shmget(key, kAllocSize, IPC_CREAT | IPC_EXCL | 0777)); + EXPECT_THAT(Shmget(key, kAllocSize, IPC_CREAT | IPC_EXCL | 0777), + PosixErrorIs(EEXIST, _)); +} + +TEST(ShmTest, SegmentsSizeFixedOnCreation) { + const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const key_t key = ftok(keyfile.path().c_str(), 1); + + // Base segment. + const int id = + ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777)); + + // Ask for the same segment at half size. This succeeds. + const int id2 = ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize / 2, 0777)); + + // Ask for the same segment at double size. + EXPECT_THAT(Shmget(key, kAllocSize * 2, 0777), PosixErrorIs(EINVAL, _)); + + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id2, nullptr, 0)); + + // We have 2 different maps... + EXPECT_NE(addr, addr2); + + // ... And both maps are kAllocSize bytes; despite asking for a half-sized + // segment for the second map. + addr[kAllocSize - 1] = 'x'; + addr2[kAllocSize - 1] = 'x'; + + ASSERT_NO_ERRNO(Shmdt(addr)); + ASSERT_NO_ERRNO(Shmdt(addr2)); +} + +TEST(ShmTest, PartialUnmap) { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + EXPECT_THAT(munmap(addr + (kAllocSize / 4), kAllocSize / 2), + SyscallSucceeds()); + ASSERT_NO_ERRNO(Shmdt(addr)); +} + +// Check that sentry does not panic when asked for a zero-length private shm +// segment. +TEST(ShmTest, GracefullyFailOnZeroLenSegmentCreation) { + EXPECT_THAT(Shmget(IPC_PRIVATE, 0, 0), PosixErrorIs(EINVAL, _)); +} + +TEST(ShmTest, NoDestructionOfAttachedSegmentWithMultipleRmid) { + const int id = ASSERT_NO_ERRNO_AND_VALUE( + Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); + char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0)); + + // There should be 2 refs to the segment from the 2 attachments, and a single + // self-reference. Mark the segment as destroyed more than 3 times through + // shmctl(RMID). If there's a bug with the ref counting, this should cause the + // count to drop to zero. + for (int i = 0; i < 6; ++i) { + ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr)); + } + + // Segment should remain accessible. + addr[0] = 'x'; + ASSERT_NO_ERRNO(Shmdt(addr)); + + // Segment should remain accessible even after one of the two attachments are + // detached. + addr2[0] = 'x'; + ASSERT_NO_ERRNO(Shmdt(addr2)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sigaction.cc b/test/syscalls/linux/sigaction.cc new file mode 100644 index 000000000..cdd2dbf31 --- /dev/null +++ b/test/syscalls/linux/sigaction.cc @@ -0,0 +1,70 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SigactionTest, GetLessThanOrEqualToZeroFails) { + struct sigaction act; + memset(&act, 0, sizeof(act)); + ASSERT_THAT(sigaction(-1, NULL, &act), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(sigaction(0, NULL, &act), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, SetLessThanOrEqualToZeroFails) { + struct sigaction act; + memset(&act, 0, sizeof(act)); + ASSERT_THAT(sigaction(0, &act, NULL), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(sigaction(0, &act, NULL), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, GetGreaterThanMaxFails) { + struct sigaction act; + memset(&act, 0, sizeof(act)); + ASSERT_THAT(sigaction(SIGRTMAX + 1, NULL, &act), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, SetGreaterThanMaxFails) { + struct sigaction act; + memset(&act, 0, sizeof(act)); + ASSERT_THAT(sigaction(SIGRTMAX + 1, &act, NULL), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, SetSigkillFails) { + struct sigaction act; + memset(&act, 0, sizeof(act)); + ASSERT_THAT(sigaction(SIGKILL, NULL, &act), SyscallSucceeds()); + ASSERT_THAT(sigaction(SIGKILL, &act, NULL), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, SetSigstopFails) { + struct sigaction act; + memset(&act, 0, sizeof(act)); + ASSERT_THAT(sigaction(SIGSTOP, NULL, &act), SyscallSucceeds()); + ASSERT_THAT(sigaction(SIGSTOP, &act, NULL), SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sigaltstack.cc b/test/syscalls/linux/sigaltstack.cc new file mode 100644 index 000000000..fa991545c --- /dev/null +++ b/test/syscalls/linux/sigaltstack.cc @@ -0,0 +1,274 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include <functional> +#include <vector> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/util/cleanup.h" +#include "test/util/fs_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<Cleanup> ScopedSigaltstack(stack_t const& stack) { + stack_t old_stack; + int rc = sigaltstack(&stack, &old_stack); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "sigaltstack failed"); + } + return Cleanup([old_stack] { + EXPECT_THAT(sigaltstack(&old_stack, nullptr), SyscallSucceeds()); + }); +} + +volatile bool got_signal = false; +volatile int sigaltstack_errno = 0; +volatile int ss_flags = 0; + +void sigaltstack_handler(int sig, siginfo_t* siginfo, void* arg) { + got_signal = true; + + stack_t stack; + int ret = sigaltstack(nullptr, &stack); + MaybeSave(); + if (ret < 0) { + sigaltstack_errno = errno; + return; + } + ss_flags = stack.ss_flags; +} + +TEST(SigaltstackTest, Success) { + std::vector<char> stack_mem(SIGSTKSZ); + stack_t stack = {}; + stack.ss_sp = stack_mem.data(); + stack.ss_size = stack_mem.size(); + auto const cleanup_sigstack = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack)); + + struct sigaction sa = {}; + sa.sa_sigaction = sigaltstack_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + auto const cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa)); + + // Send signal to this thread, as sigaltstack is per-thread. + EXPECT_THAT(tgkill(getpid(), gettid(), SIGUSR1), SyscallSucceeds()); + + EXPECT_TRUE(got_signal); + EXPECT_EQ(sigaltstack_errno, 0); + EXPECT_NE(0, ss_flags & SS_ONSTACK); +} + +TEST(SigaltstackTest, ResetByExecve) { + std::vector<char> stack_mem(SIGSTKSZ); + stack_t stack = {}; + stack.ss_sp = stack_mem.data(); + stack.ss_size = stack_mem.size(); + auto const cleanup_sigstack = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack)); + + std::string full_path; + char* test_src = getenv("TEST_SRCDIR"); + if (test_src) { + full_path = JoinPath(test_src, "../../linux/sigaltstack_check"); + } + ASSERT_FALSE(full_path.empty()); + + pid_t child_pid = -1; + int execve_errno = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(full_path, {"sigaltstack_check"}, {}, nullptr, &child_pid, + &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + +volatile bool badhandler_on_sigaltstack = true; // Set by the handler. +char* volatile badhandler_low_water_mark = nullptr; // Set by the handler. +volatile uint8_t badhandler_recursive_faults = 0; // Consumed by the handler. + +void badhandler(int sig, siginfo_t* siginfo, void* arg) { + char stack_var = 0; + char* current_ss = &stack_var; + + stack_t stack; + int ret = sigaltstack(nullptr, &stack); + if (ret < 0 || (stack.ss_flags & SS_ONSTACK) != SS_ONSTACK) { + // We should always be marked as being on the stack. Don't allow this to hit + // the bottom if this is ever not true (the main test will fail as a + // result, but we still need to unwind the recursive faults). + badhandler_on_sigaltstack = false; + } + if (current_ss < badhandler_low_water_mark) { + // Record the low point for the signal stack. We never expected this to be + // before stack bottom, but this is asserted in the actual test. + badhandler_low_water_mark = current_ss; + } + if (badhandler_recursive_faults > 0) { + badhandler_recursive_faults--; + Fault(); + } + FixupFault(reinterpret_cast<ucontext*>(arg)); +} + +TEST(SigaltstackTest, WalksOffBottom) { + // This test marks the upper half of the stack_mem array as the signal stack. + // It asserts that when a fault occurs in the handler (already on the signal + // stack), we eventually continue to fault our way off the stack. We should + // not revert to the top of the signal stack when we fall off the bottom and + // the signal stack should remain "in use". When we fall off the signal stack, + // we should have an unconditional signal delivered and not start using the + // first part of the stack_mem array. + std::vector<char> stack_mem(SIGSTKSZ * 2); + stack_t stack = {}; + stack.ss_sp = stack_mem.data() + SIGSTKSZ; // See above: upper half. + stack.ss_size = SIGSTKSZ; // Only one half the array. + auto const cleanup_sigstack = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack)); + + // Setup the handler: this must be for SIGSEGV, and it must allow proper + // nesting (no signal mask, no defer) so that we can trigger multiple times. + // + // When we walk off the bottom of the signal stack and force signal delivery + // of a SIGSEGV, the handler will revert to the default behavior (kill). + struct sigaction sa = {}; + sa.sa_sigaction = badhandler; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_NODEFER; + auto const cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa)); + + // Trigger a single fault. + badhandler_low_water_mark = + reinterpret_cast<char*>(&stack.ss_sp) + SIGSTKSZ; // Expected top. + badhandler_recursive_faults = 0; // Disable refault. + Fault(); + EXPECT_TRUE(badhandler_on_sigaltstack); + EXPECT_THAT(sigaltstack(nullptr, &stack), SyscallSucceeds()); + EXPECT_EQ(stack.ss_flags & SS_ONSTACK, 0); + EXPECT_LT(badhandler_low_water_mark, + reinterpret_cast<char*>(stack.ss_sp) + 2 * SIGSTKSZ); + EXPECT_GT(badhandler_low_water_mark, reinterpret_cast<char*>(stack.ss_sp)); + + // Trigger two faults. + char* prev_low_water_mark = badhandler_low_water_mark; // Previous top. + badhandler_recursive_faults = 1; // One refault. + Fault(); + ASSERT_TRUE(badhandler_on_sigaltstack); + EXPECT_THAT(sigaltstack(nullptr, &stack), SyscallSucceeds()); + EXPECT_EQ(stack.ss_flags & SS_ONSTACK, 0); + EXPECT_LT(badhandler_low_water_mark, prev_low_water_mark); + EXPECT_GT(badhandler_low_water_mark, reinterpret_cast<char*>(stack.ss_sp)); + + // Calculate the stack growth for a fault, and set the recursive faults to + // ensure that the signal handler stack required exceeds our marked stack area + // by a minimal amount. It should remain in the valid stack_mem area so that + // we can test the signal is forced merely by going out of the signal stack + // bounds, not by a genuine fault. + uintptr_t frame_size = + static_cast<uintptr_t>(prev_low_water_mark - badhandler_low_water_mark); + badhandler_recursive_faults = (SIGSTKSZ + frame_size) / frame_size; + EXPECT_EXIT(Fault(), ::testing::KilledBySignal(SIGSEGV), ""); +} + +volatile int setonstack_retval = 0; // Set by the handler. +volatile int setonstack_errno = 0; // Set by the handler. + +void setonstack(int sig, siginfo_t* siginfo, void* arg) { + char stack_mem[SIGSTKSZ]; + stack_t stack = {}; + stack.ss_sp = &stack_mem[0]; + stack.ss_size = SIGSTKSZ; + setonstack_retval = sigaltstack(&stack, nullptr); + setonstack_errno = errno; + FixupFault(reinterpret_cast<ucontext*>(arg)); +} + +TEST(SigaltstackTest, SetWhileOnStack) { + // Reserve twice as much stack here, since the handler will allocate a vector + // of size SIGTKSZ and attempt to set the sigaltstack to that value. + std::vector<char> stack_mem(2 * SIGSTKSZ); + stack_t stack = {}; + stack.ss_sp = stack_mem.data(); + stack.ss_size = stack_mem.size(); + auto const cleanup_sigstack = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack)); + + // See above. + struct sigaction sa = {}; + sa.sa_sigaction = setonstack; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + auto const cleanup_sa = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa)); + + // Trigger a fault. + Fault(); + + // The set should have failed. + EXPECT_EQ(setonstack_retval, -1); + EXPECT_EQ(setonstack_errno, EPERM); +} + +TEST(SigaltstackTest, SetCurrentStack) { + // This is executed as an exit test because once the signal stack is set to + // the local stack, there's no good way to unwind. We don't want to taint the + // test of any other tests that might run within this process. + EXPECT_EXIT( + { + char stack_value = 0; + stack_t stack = {}; + stack.ss_sp = &stack_value - kPageSize; // Lower than current level. + stack.ss_size = 2 * kPageSize; // => &stack_value +/- kPageSize. + TEST_CHECK(sigaltstack(&stack, nullptr) == 0); + TEST_CHECK(sigaltstack(nullptr, &stack) == 0); + TEST_CHECK((stack.ss_flags & SS_ONSTACK) != 0); + + // Should not be able to change the stack (even no-op). + TEST_CHECK(sigaltstack(&stack, nullptr) == -1 && errno == EPERM); + + // Should not be able to disable the stack. + stack.ss_flags = SS_DISABLE; + TEST_CHECK(sigaltstack(&stack, nullptr) == -1 && errno == EPERM); + exit(0); + }, + ::testing::ExitedWithCode(0), ""); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sigaltstack_check.cc b/test/syscalls/linux/sigaltstack_check.cc new file mode 100644 index 000000000..b71f812a8 --- /dev/null +++ b/test/syscalls/linux/sigaltstack_check.cc @@ -0,0 +1,33 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Checks that there is no alternate signal stack by default. +// +// Used by a test in sigaltstack.cc. +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "test/util/logging.h" + +int main(int /* argc */, char** /* argv */) { + stack_t stack; + TEST_CHECK(sigaltstack(nullptr, &stack) >= 0); + TEST_CHECK(stack.ss_flags == SS_DISABLE); + TEST_CHECK(stack.ss_sp == 0); + TEST_CHECK(stack.ss_size == 0); + return 0; +} diff --git a/test/syscalls/linux/sigiret.cc b/test/syscalls/linux/sigiret.cc new file mode 100644 index 000000000..1b7cecccb --- /dev/null +++ b/test/syscalls/linux/sigiret.cc @@ -0,0 +1,137 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/types.h> +#include <sys/ucontext.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr uint64_t kOrigRcx = 0xdeadbeeffacefeed; +constexpr uint64_t kOrigR11 = 0xfacefeedbaad1dea; + +volatile int gotvtalrm, ready; + +void sigvtalrm(int sig, siginfo_t* siginfo, void* _uc) { + ucontext_t* uc = reinterpret_cast<ucontext_t*>(_uc); + + // Verify that: + // - test is in the busy-wait loop waiting for signal. + // - %rcx and %r11 values in mcontext_t match kOrigRcx and kOrigR11. + if (ready && + static_cast<uint64_t>(uc->uc_mcontext.gregs[REG_RCX]) == kOrigRcx && + static_cast<uint64_t>(uc->uc_mcontext.gregs[REG_R11]) == kOrigR11) { + // Modify the values %rcx and %r11 in the ucontext. These are the + // values seen by the application after the signal handler returns. + uc->uc_mcontext.gregs[REG_RCX] = ~kOrigRcx; + uc->uc_mcontext.gregs[REG_R11] = ~kOrigR11; + gotvtalrm = 1; + } +} + +TEST(SigIretTest, CheckRcxR11) { + // Setup signal handler for SIGVTALRM. + struct sigaction sa = {}; + sigfillset(&sa.sa_mask); + sa.sa_sigaction = sigvtalrm; + sa.sa_flags = SA_SIGINFO; + auto const action_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGVTALRM, sa)); + + auto const mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGVTALRM)); + + // Setup itimer to fire after 500 msecs. + struct itimerval itimer = {}; + itimer.it_value.tv_usec = 500 * 1000; // 500 msecs. + auto const timer_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_VIRTUAL, itimer)); + + // Initialize %rcx and %r11 and spin until the signal handler returns. + uint64_t rcx = kOrigRcx; + uint64_t r11 = kOrigR11; + asm volatile( + "movq %[rcx], %%rcx;" // %rcx = rcx + "movq %[r11], %%r11;" // %r11 = r11 + "movl $1, %[ready];" // ready = 1 + "1: pause; cmpl $0, %[gotvtalrm]; je 1b;" // while (!gotvtalrm); + "movq %%rcx, %[rcx];" // rcx = %rcx + "movq %%r11, %[r11];" // r11 = %r11 + : [ready] "=m"(ready), [rcx] "+m"(rcx), [r11] "+m"(r11) + : [gotvtalrm] "m"(gotvtalrm) + : "cc", "memory", "rcx", "r11"); + + // If sigreturn(2) returns via 'sysret' then %rcx and %r11 will be + // clobbered and set to 'ptregs->rip' and 'ptregs->rflags' respectively. + // + // The following check verifies that %rcx and %r11 were not clobbered + // when returning from the signal handler (via sigreturn(2)). + EXPECT_EQ(rcx, ~kOrigRcx); + EXPECT_EQ(r11, ~kOrigR11); +} + +constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000; + +// Test that a non-canonical signal handler faults as expected. +TEST(SigIretTest, BadHandler) { + struct sigaction sa = {}; + sa.sa_sigaction = + reinterpret_cast<void (*)(int, siginfo_t*, void*)>(kNonCanonicalRip); + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa)); + + pid_t pid = fork(); + if (pid == 0) { + // Child, wait for signal. + while (1) { + pause(); + } + } + ASSERT_THAT(pid, SyscallSucceeds()); + + EXPECT_THAT(kill(pid, SIGUSR1), SyscallSucceeds()); + + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) + << "status = " << status; +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // SigIretTest.CheckRcxR11 depends on delivering SIGVTALRM to the main thread. + // Block SIGVTALRM so that any other threads created by TestInit will also + // have SIGVTALRM blocked. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGVTALRM); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/sigprocmask.cc b/test/syscalls/linux/sigprocmask.cc new file mode 100644 index 000000000..d8b918446 --- /dev/null +++ b/test/syscalls/linux/sigprocmask.cc @@ -0,0 +1,272 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <stddef.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Signals numbers used for testing. +static constexpr int kTestSignal1 = SIGUSR1; +static constexpr int kTestSignal2 = SIGUSR2; + +static int raw_sigprocmask(int how, const sigset_t* set, sigset_t* oldset) { + return syscall(SYS_rt_sigprocmask, how, set, oldset, _NSIG / 8); +} + +// count of the number of signals received +int signal_count[kMaxSignal + 1]; + +// signal handler increments the signal counter +void SigHandler(int sig, siginfo_t* info, void* context) { + TEST_CHECK(sig > 0 && sig <= kMaxSignal); + signal_count[sig] += 1; +} + +// The test fixture saves and restores the signal mask and +// sets up handlers for kTestSignal1 and kTestSignal2. +class SigProcMaskTest : public ::testing::Test { + protected: + void SetUp() override { + // Save the current signal mask. + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &mask_), + SyscallSucceeds()); + + // Setup signal handlers for kTestSignal1 and kTestSignal2. + struct sigaction sa; + sa.sa_sigaction = SigHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + EXPECT_THAT(sigaction(kTestSignal1, &sa, &sa_test_sig_1_), + SyscallSucceeds()); + EXPECT_THAT(sigaction(kTestSignal2, &sa, &sa_test_sig_2_), + SyscallSucceeds()); + + // Clear the signal counters. + memset(signal_count, 0, sizeof(signal_count)); + } + + void TearDown() override { + // Restore the signal mask. + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &mask_, nullptr), + SyscallSucceeds()); + + // Restore the signal handlers for kTestSignal1 and kTestSignal2. + EXPECT_THAT(sigaction(kTestSignal1, &sa_test_sig_1_, nullptr), + SyscallSucceeds()); + EXPECT_THAT(sigaction(kTestSignal2, &sa_test_sig_2_, nullptr), + SyscallSucceeds()); + } + + private: + sigset_t mask_; + struct sigaction sa_test_sig_1_; + struct sigaction sa_test_sig_2_; +}; + +// Both sigsets nullptr should succeed and do nothing. +TEST_F(SigProcMaskTest, NullAddress) { + EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, nullptr, NULL), SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, nullptr, NULL), SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, NULL), SyscallSucceeds()); +} + +// Bad address for either sigset should fail with EFAULT. +TEST_F(SigProcMaskTest, BadAddress) { + sigset_t* bad_addr = reinterpret_cast<sigset_t*>(-1); + + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, bad_addr, nullptr), + SyscallFailsWithErrno(EFAULT)); + + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, bad_addr), + SyscallFailsWithErrno(EFAULT)); +} + +// Bad value of the "how" parameter should fail with EINVAL. +TEST_F(SigProcMaskTest, BadParameter) { + int bad_param_1 = -1; + int bad_param_2 = 42; + + sigset_t set1; + sigemptyset(&set1); + + EXPECT_THAT(raw_sigprocmask(bad_param_1, &set1, nullptr), + SyscallFailsWithErrno(EINVAL)); + + EXPECT_THAT(raw_sigprocmask(bad_param_2, &set1, nullptr), + SyscallFailsWithErrno(EINVAL)); +} + +// Check that we can get the current signal mask. +TEST_F(SigProcMaskTest, GetMask) { + sigset_t set1; + sigset_t set2; + + sigemptyset(&set1); + sigfillset(&set2); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &set1), SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &set2), SyscallSucceeds()); + EXPECT_THAT(set1, EqualsSigset(set2)); +} + +// Check that we can set the signal mask. +TEST_F(SigProcMaskTest, SetMask) { + sigset_t actual; + sigset_t expected; + + // Try to mask all signals + sigfillset(&expected); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + // sigprocmask() should have silently ignored SIGKILL and SIGSTOP. + sigdelset(&expected, SIGSTOP); + sigdelset(&expected, SIGKILL); + EXPECT_THAT(actual, EqualsSigset(expected)); + + // Try to clear the signal mask + sigemptyset(&expected); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + EXPECT_THAT(actual, EqualsSigset(expected)); + + // Try to set a mask with one signal. + sigemptyset(&expected); + sigaddset(&expected, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + EXPECT_THAT(actual, EqualsSigset(expected)); +} + +// Check that we can add and remove signals. +TEST_F(SigProcMaskTest, BlockUnblock) { + sigset_t actual; + sigset_t expected; + + // Try to set a mask with one signal. + sigemptyset(&expected); + sigaddset(&expected, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + EXPECT_THAT(actual, EqualsSigset(expected)); + + // Try to add another signal. + sigset_t block; + sigemptyset(&block); + sigaddset(&block, kTestSignal2); + EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, &block, nullptr), SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + sigaddset(&expected, kTestSignal2); + EXPECT_THAT(actual, EqualsSigset(expected)); + + // Try to remove a signal. + sigset_t unblock; + sigemptyset(&unblock); + sigaddset(&unblock, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, &unblock, nullptr), + SyscallSucceeds()); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual), + SyscallSucceeds()); + sigdelset(&expected, kTestSignal1); + EXPECT_THAT(actual, EqualsSigset(expected)); +} + +// Test that the signal mask actually blocks signals. +TEST_F(SigProcMaskTest, SignalHandler) { + sigset_t mask; + + // clear the signal mask + sigemptyset(&mask); + EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, nullptr), SyscallSucceeds()); + + // Check the initial signal counts. + EXPECT_EQ(0, signal_count[kTestSignal1]); + EXPECT_EQ(0, signal_count[kTestSignal2]); + + // Check that both kTestSignal1 and kTestSignal2 are not blocked. + raise(kTestSignal1); + raise(kTestSignal2); + EXPECT_EQ(1, signal_count[kTestSignal1]); + EXPECT_EQ(1, signal_count[kTestSignal2]); + + // Block kTestSignal1. + sigaddset(&mask, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, &mask, nullptr), SyscallSucceeds()); + + // Check that kTestSignal1 is blocked. + raise(kTestSignal1); + raise(kTestSignal2); + EXPECT_EQ(1, signal_count[kTestSignal1]); + EXPECT_EQ(2, signal_count[kTestSignal2]); + + // Unblock kTestSignal1. + sigaddset(&mask, kTestSignal1); + EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, &mask, nullptr), SyscallSucceeds()); + + // Check that the unblocked kTestSignal1 has been delivered. + // TODO: gvisor currently drops masked signals on the floor. + if (!IsRunningOnGvisor()) { + EXPECT_EQ(2, signal_count[kTestSignal1]); + } + EXPECT_EQ(2, signal_count[kTestSignal2]); +} + +// Check that sigprocmask correctly handles aliasing of the set and oldset +// pointers. +TEST_F(SigProcMaskTest, AliasedSets) { + sigset_t mask; + + // Set a mask in which only kTestSignal1 is blocked. + sigset_t mask1; + sigemptyset(&mask1); + sigaddset(&mask1, kTestSignal1); + mask = mask1; + ASSERT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, nullptr), SyscallSucceeds()); + + // Exchange it with a mask in which only kTestSignal2 is blocked. + sigset_t mask2; + sigemptyset(&mask2); + sigaddset(&mask2, kTestSignal2); + mask = mask2; + ASSERT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, &mask), SyscallSucceeds()); + + // Check that the exchange succeeeded: + // mask should now contain the previously-set mask blocking only kTestSignal1. + EXPECT_THAT(mask, EqualsSigset(mask1)); + // The current mask should block only kTestSignal2. + ASSERT_THAT(raw_sigprocmask(0, nullptr, &mask), SyscallSucceeds()); + EXPECT_THAT(mask, EqualsSigset(mask2)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sigstop.cc b/test/syscalls/linux/sigstop.cc new file mode 100644 index 000000000..e21d23d51 --- /dev/null +++ b/test/syscalls/linux/sigstop.cc @@ -0,0 +1,150 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <stdlib.h> +#include <sys/select.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +DEFINE_bool(sigstop_test_child, false, + "If true, run the SigstopTest child workload."); + +namespace gvisor { +namespace testing { + +namespace { + +constexpr absl::Duration kChildStartupDelay = absl::Seconds(5); +constexpr absl::Duration kChildMainThreadDelay = absl::Seconds(10); +constexpr absl::Duration kChildExtraThreadDelay = absl::Seconds(15); +constexpr absl::Duration kPostSIGSTOPDelay = absl::Seconds(20); + +// Comparisons on absl::Duration aren't yet constexpr (2017-07-14), so we +// can't just use static_assert. +TEST(SigstopTest, TimesAreRelativelyConsistent) { + EXPECT_LT(kChildStartupDelay, kChildMainThreadDelay) + << "Child process will exit before the parent process attempts to stop " + "it"; + EXPECT_LT(kChildMainThreadDelay, kChildExtraThreadDelay) + << "Secondary thread in child process will exit before main thread, " + "causing it to exit with the wrong code"; + EXPECT_LT(kChildExtraThreadDelay, kPostSIGSTOPDelay) + << "Parent process stops waiting before child process may exit if " + "improperly stopped, rendering the test ineffective"; +} + +// Exit codes communicated from the child workload to the parent test process. +constexpr int kChildMainThreadExitCode = 10; +constexpr int kChildExtraThreadExitCode = 11; + +TEST(SigstopTest, Correctness) { + pid_t child_pid = -1; + int execve_errno = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec("/proc/self/exe", {"/proc/self/exe", "--sigstop_test_child"}, + {}, nullptr, &child_pid, &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + // Wait for the child subprocess to start the second thread before stopping + // it. + absl::SleepFor(kChildStartupDelay); + ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds()); + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, WUNTRACED), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFSTOPPED(status)); + EXPECT_EQ(SIGSTOP, WSTOPSIG(status)); + + // Sleep for longer than either of the sleeps in the child subprocess, + // expecting the child to stay alive because it's stopped. + absl::SleepFor(kPostSIGSTOPDelay); + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, WNOHANG), + SyscallSucceedsWithValue(0)); + + // Resume the child. + ASSERT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds()); + + EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, WCONTINUED), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFCONTINUED(status)); + + // Expect it to die. + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), kChildMainThreadExitCode); +} + +// Like base:SleepFor, but tries to avoid counting time spent stopped due to a +// stop signal toward the sleep. +// +// This is required due to an inconsistency in how nanosleep(2) and stop signals +// interact on Linux. When nanosleep is interrupted, it writes the remaining +// time back to its second timespec argument, so that if nanosleep is +// interrupted by a signal handler then userspace can immediately call nanosleep +// again with that timespec. However, if nanosleep is automatically restarted +// (because it's interrupted by a signal that is not delivered to a handler, +// such as a stop signal), it's restarted based on the timer's former *absolute* +// expiration time (via ERESTART_RESTARTBLOCK => SYS_restart_syscall => +// hrtimer_nanosleep_restart). This means that time spent stopped is effectively +// counted as time spent sleeping, resulting in less time spent sleeping than +// expected. +// +// Dividing the sleep into multiple smaller sleeps limits the impact of this +// effect to the length of each sleep during which a stop occurs; for example, +// if a sleeping process is only stopped once, SleepIgnoreStopped can +// under-sleep by at most 100ms. +void SleepIgnoreStopped(absl::Duration d) { + absl::Duration const max_sleep = absl::Milliseconds(100); + while (d > absl::ZeroDuration()) { + absl::Duration to_sleep = std::min(d, max_sleep); + absl::SleepFor(to_sleep); + d -= to_sleep; + } +} + +void RunChild() { + // Start another thread that attempts to call exit_group with a different + // error code, in order to verify that SIGSTOP stops this thread as well. + ScopedThread t([] { + SleepIgnoreStopped(kChildExtraThreadDelay); + exit(kChildExtraThreadExitCode); + }); + SleepIgnoreStopped(kChildMainThreadDelay); + exit(kChildMainThreadExitCode); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (FLAGS_sigstop_test_child) { + gvisor::testing::RunChild(); + return 1; + } + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/sigtimedwait.cc b/test/syscalls/linux/sigtimedwait.cc new file mode 100644 index 000000000..3a350fc28 --- /dev/null +++ b/test/syscalls/linux/sigtimedwait.cc @@ -0,0 +1,248 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/wait.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// N.B. main() blocks SIGALRM and SIGCHLD on all threads. + +constexpr int kAlarmSecs = 12; + +void NoopHandler(int sig, siginfo_t* info, void* context) {} + +TEST(SigtimedwaitTest, InvalidTimeout) { + sigset_t mask; + sigemptyset(&mask); + struct timespec timeout = {0, 1000000001}; + EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EINVAL)); + timeout = {-1, 0}; + EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EINVAL)); + timeout = {0, -1}; + EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EINVAL)); +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and wait. +TEST(SigtimedwaitTest, AlarmReturnsAlarm_NoRandomSave) { + struct itimerval itv = {}; + itv.it_value.tv_sec = kAlarmSecs; + const auto itimer_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv)); + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGALRM); + siginfo_t info = {}; + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, &info, nullptr), + SyscallSucceedsWithValue(SIGALRM)); + EXPECT_EQ(SIGALRM, info.si_signo); +} + +// No random save as the test relies on alarm timing. Cooperative save tests +// already cover the save between alarm and wait. +TEST(SigtimedwaitTest, NullTimeoutReturnsEINTR_NoRandomSave) { + struct sigaction sa; + sa.sa_sigaction = NoopHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + const auto action_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa)); + + const auto mask_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM)); + + struct itimerval itv = {}; + itv.it_value.tv_sec = kAlarmSecs; + const auto itimer_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv)); + + sigset_t mask; + sigemptyset(&mask); + EXPECT_THAT(sigtimedwait(&mask, nullptr, nullptr), + SyscallFailsWithErrno(EINTR)); +} + +TEST(SigtimedwaitTest, LegitTimeoutReturnsEAGAIN) { + sigset_t mask; + sigemptyset(&mask); + struct timespec timeout = {1, 0}; // 1 second + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(SigtimedwaitTest, ZeroTimeoutReturnsEAGAIN) { + sigset_t mask; + sigemptyset(&mask); + struct timespec timeout = {0, 0}; // 0 second + EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(SigtimedwaitTest, KillGeneratedSIGCHLD) { + EXPECT_THAT(kill(getpid(), SIGCHLD), SyscallSucceeds()); + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + struct timespec ts = {5, 0}; + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts), + SyscallSucceedsWithValue(SIGCHLD)); +} + +TEST(SigtimedwaitTest, ChildExitGeneratedSIGCHLD) { + pid_t pid = fork(); + if (pid == 0) { + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status; + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + struct timespec ts = {5, 0}; + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts), + SyscallSucceedsWithValue(SIGCHLD)); +} + +TEST(SigtimedwaitTest, ChildExitGeneratedSIGCHLDWithHandler) { + // Setup handler for SIGCHLD, but don't unblock it. + struct sigaction sa; + sa.sa_sigaction = NoopHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + const auto action_cleanup = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa)); + + pid_t pid = fork(); + if (pid == 0) { + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + struct timespec ts = {5, 0}; + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts), + SyscallSucceedsWithValue(SIGCHLD)); + + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status; +} + +TEST(SigtimedwaitTest, IgnoredUnmaskedSignal) { + constexpr int kSigno = SIGUSR1; + constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2); + constexpr auto kSigtimedwaitTimeout = absl::Seconds(5); + ASSERT_GT(kSigtimedwaitTimeout, kSigtimedwaitSetupTime); + + // Ensure that kSigno is ignored, and unmasked on this thread. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + const auto scoped_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa)); + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, mask)); + + // Create a thread which will send us kSigno while we are blocked in + // sigtimedwait. + pid_t tid = gettid(); + ScopedThread sigthread([&] { + absl::SleepFor(kSigtimedwaitSetupTime); + EXPECT_THAT(tgkill(getpid(), tid, kSigno), SyscallSucceeds()); + }); + + // sigtimedwait should not observe kSigno since it is ignored and already + // unmasked, causing it to be dropped before it is enqueued. + struct timespec timeout_ts = absl::ToTimespec(kSigtimedwaitTimeout); + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout_ts), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(SigtimedwaitTest, IgnoredMaskedSignal) { + constexpr int kSigno = SIGUSR1; + constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2); + constexpr auto kSigtimedwaitTimeout = absl::Seconds(5); + ASSERT_GT(kSigtimedwaitTimeout, kSigtimedwaitSetupTime); + + // Ensure that kSigno is ignored, and masked on this thread. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + const auto scoped_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa)); + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask)); + + // Create a thread which will send us kSigno while we are blocked in + // sigtimedwait. + pid_t tid = gettid(); + ScopedThread sigthread([&] { + absl::SleepFor(kSigtimedwaitSetupTime); + EXPECT_THAT(tgkill(getpid(), tid, kSigno), SyscallSucceeds()); + }); + + // sigtimedwait should observe kSigno since it is normally masked, causing it + // to be enqueued despite being ignored. + struct timespec timeout_ts = absl::ToTimespec(kSigtimedwaitTimeout); + EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout_ts), + SyscallSucceedsWithValue(kSigno)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + // These tests depend on delivering SIGALRM/SIGCHLD to the main thread or in + // sigtimedwait. Block them so that any other threads created by TestInit will + // also have them blocked. + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigaddset(&set, SIGCHLD); + TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); + + gvisor::testing::TestInit(&argc, &argv); + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/socket_abstract.cc b/test/syscalls/linux/socket_abstract.cc new file mode 100644 index 000000000..7b111a2dd --- /dev/null +++ b/test/syscalls/linux/socket_abstract.cc @@ -0,0 +1,43 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_filesystem.cc b/test/syscalls/linux/socket_filesystem.cc new file mode 100644 index 000000000..eea6f2810 --- /dev/null +++ b/test/syscalls/linux/socket_filesystem.cc @@ -0,0 +1,43 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc new file mode 100644 index 000000000..fbc3bebed --- /dev/null +++ b/test/syscalls/linux/socket_generic.cc @@ -0,0 +1,403 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_generic.h" + +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +// This file is a generic socket test file. It must be built with another file +// that provides the test types. + +namespace gvisor { +namespace testing { + +TEST_P(AllSocketPairTest, BasicReadWrite) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[20]; + const std::string data = "abc"; + ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), 3), + SyscallSucceedsWithValue(3)); + ASSERT_THAT(ReadFd(sockets->second_fd(), buf, 3), + SyscallSucceedsWithValue(3)); + EXPECT_EQ(data, absl::string_view(buf, 3)); +} + +TEST_P(AllSocketPairTest, BasicSendRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(AllSocketPairTest, BasicSendmmsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[200]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + std::vector<struct mmsghdr> msgs(10); + std::vector<struct iovec> iovs(msgs.size()); + const int chunk_size = sizeof(sent_data) / msgs.size(); + for (size_t i = 0; i < msgs.size(); i++) { + iovs[i].iov_len = chunk_size; + iovs[i].iov_base = &sent_data[i * chunk_size]; + msgs[i].msg_hdr.msg_iov = &iovs[i]; + msgs[i].msg_hdr.msg_iovlen = 1; + } + + ASSERT_THAT( + RetryEINTR(sendmmsg)(sockets->first_fd(), &msgs[0], msgs.size(), 0), + SyscallSucceedsWithValue(msgs.size())); + + for (const struct mmsghdr& msg : msgs) { + EXPECT_EQ(chunk_size, msg.msg_len); + } + + char received_data[sizeof(sent_data)]; + for (size_t i = 0; i < msgs.size(); i++) { + ASSERT_THAT(ReadFd(sockets->second_fd(), &received_data[i * chunk_size], + chunk_size), + SyscallSucceedsWithValue(chunk_size)); + } + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(AllSocketPairTest, BasicRecvmmsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[200]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + char received_data[sizeof(sent_data)]; + std::vector<struct mmsghdr> msgs(10); + std::vector<struct iovec> iovs(msgs.size()); + const int chunk_size = sizeof(sent_data) / msgs.size(); + for (size_t i = 0; i < msgs.size(); i++) { + iovs[i].iov_len = chunk_size; + iovs[i].iov_base = &received_data[i * chunk_size]; + msgs[i].msg_hdr.msg_iov = &iovs[i]; + msgs[i].msg_hdr.msg_iovlen = 1; + } + + for (size_t i = 0; i < msgs.size(); i++) { + ASSERT_THAT( + WriteFd(sockets->first_fd(), &sent_data[i * chunk_size], chunk_size), + SyscallSucceedsWithValue(chunk_size)); + } + + ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->second_fd(), &msgs[0], msgs.size(), + 0, nullptr), + SyscallSucceedsWithValue(msgs.size())); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + for (const struct mmsghdr& msg : msgs) { + EXPECT_EQ(chunk_size, msg.msg_len); + } +} + +TEST_P(AllSocketPairTest, SendmsgRecvmsg10KB) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + std::vector<char> sent_data(10 * 1024); + RandomizeBuffer(sent_data.data(), sent_data.size()); + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size())); + + std::vector<char> received_data(sent_data.size()); + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->second_fd(), received_data.data(), + received_data.size())); + + EXPECT_EQ(0, + memcmp(sent_data.data(), received_data.data(), sent_data.size())); +} + +// This test validates that a sendmsg/recvmsg w/ MSG_CTRUNC is a no-op on +// input flags. +TEST_P(AllSocketPairTest, SendmsgRecvmsgMsgCtruncNoop) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + std::vector<char> sent_data(10 * 1024); + RandomizeBuffer(sent_data.data(), sent_data.size()); + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size())); + + std::vector<char> received_data(sent_data.size()); + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(struct ucred))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = &received_data[0]; + iov.iov_len = received_data.size(); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + // MSG_CTRUNC should be a no-op. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CTRUNC), + SyscallSucceedsWithValue(received_data.size())); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_EQ(cmsg, nullptr); + EXPECT_EQ(msg.msg_controllen, 0); + EXPECT_EQ(0, + memcmp(sent_data.data(), received_data.data(), sent_data.size())); +} + +TEST_P(AllSocketPairTest, SendmsgRecvmsg16KB) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + std::vector<char> sent_data(16 * 1024); + RandomizeBuffer(sent_data.data(), sent_data.size()); + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size())); + + std::vector<char> received_data(sent_data.size()); + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->second_fd(), received_data.data(), + received_data.size())); + + EXPECT_EQ(0, + memcmp(sent_data.data(), received_data.data(), sent_data.size())); +} + +TEST_P(AllSocketPairTest, RecvmmsgInvalidTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[10]; + struct mmsghdr msg = {}; + struct iovec iov = {}; + iov.iov_len = sizeof(buf); + iov.iov_base = buf; + msg.msg_hdr.msg_iov = &iov; + msg.msg_hdr.msg_iovlen = 1; + struct timespec timeout = {-1, -1}; + ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->first_fd(), &msg, 1, 0, &timeout), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(AllSocketPairTest, RecvmmsgTimeoutBeforeRecv) { + // There is a known bug in the Linux recvmmsg(2) causing it to block forever + // if the timeout expires while blocking for the first message. + SKIP_IF(!IsRunningOnGvisor()); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[10]; + struct mmsghdr msg = {}; + struct iovec iov = {}; + iov.iov_len = sizeof(buf); + iov.iov_base = buf; + msg.msg_hdr.msg_iov = &iov; + msg.msg_hdr.msg_iovlen = 1; + struct timespec timeout = {}; + ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->first_fd(), &msg, 1, 0, &timeout), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, MsgPeek) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[50]; + memset(&sent_data, 0, sizeof(sent_data)); + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data)]; + for (int i = 0; i < 3; i++) { + memset(received_data, 0, sizeof(received_data)); + EXPECT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_PEEK), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); + } + + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); +} + +TEST_P(AllSocketPairTest, LingerSocketOption) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + struct linger got_linger = {-1, -1}; + socklen_t length = sizeof(struct linger); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER, + &got_linger, &length), + SyscallSucceedsWithValue(0)); + struct linger want_linger = {}; + EXPECT_EQ(0, memcmp(&want_linger, &got_linger, sizeof(struct linger))); + EXPECT_EQ(sizeof(struct linger), length); +} + +TEST_P(AllSocketPairTest, KeepAliveSocketOption) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + int keepalive = -1; + socklen_t length = sizeof(int); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, + &keepalive, &length), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(0, keepalive); + EXPECT_EQ(sizeof(int), length); +} + +TEST_P(AllSocketPairTest, RcvBufSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + int size = 0; + socklen_t size_size = sizeof(size); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &size, &size_size), + SyscallSucceeds()); + EXPECT_GT(size, 0); +} + +TEST_P(AllSocketPairTest, SndBufSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + int size = 0; + socklen_t size_size = sizeof(size); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &size, &size_size), + SyscallSucceeds()); + EXPECT_GT(size, 0); +} + +TEST_P(AllSocketPairTest, RecvTimeoutSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvTimeoutOneSecondSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 1, .tv_usec = 0 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + char buf[20] = {}; + EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvmsgTimeoutSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 10 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + struct msghdr msg = {}; + char buf[20] = {}; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, SoRcvTimeoIsSet) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 0, .tv_usec = 35 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, SoRcvTimeoIsSetLargerArg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval_with_extra { + struct timeval tv; + int64_t extra_data; + } ABSL_ATTRIBUTE_PACKED; + + timeval_with_extra tv_extra; + tv_extra.tv.tv_sec = 0; + tv_extra.tv.tv_usec = 25; + + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, + &tv_extra, sizeof(tv_extra)), + SyscallSucceeds()); +} + +TEST_P(AllSocketPairTest, RecvmsgTimeoutOneSecondSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval tv { + .tv_sec = 1, .tv_usec = 0 + }; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + struct msghdr msg = {}; + char buf[20] = {}; + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(AllSocketPairTest, RecvWaitAll) { + SKIP_IF(IsRunningOnGvisor()); // FIXME: Support MSG_WAITALL. + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_WAITALL), + SyscallSucceedsWithValue(sizeof(sent_data))); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_generic.h b/test/syscalls/linux/socket_generic.h new file mode 100644 index 000000000..cd826abcf --- /dev/null +++ b/test/syscalls/linux/socket_generic.h @@ -0,0 +1,30 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of blocking and non-blocking +// connected stream sockets. +using AllSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_ diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc new file mode 100644 index 000000000..7bdbd7797 --- /dev/null +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -0,0 +1,812 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <string.h> +#include <sys/socket.h> + +#include <string> +#include <tuple> +#include <utility> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) { + switch (family) { + case AF_INET: + return static_cast<uint16_t>( + reinterpret_cast<sockaddr_in const*>(&addr)->sin_port); + case AF_INET6: + return static_cast<uint16_t>( + reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) { + switch (family) { + case AF_INET: + reinterpret_cast<sockaddr_in*>(addr)->sin_port = port; + return NoError(); + case AF_INET6: + reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port; + return NoError(); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +struct TestAddress { + std::string description; + sockaddr_storage addr; + socklen_t addr_len; + + int family() const { return addr.ss_family; } + explicit TestAddress(std::string description = "") + : description(std::move(description)), addr(), addr_len() {} +}; + +TestAddress V4Any() { + TestAddress t("V4Any"); + t.addr.ss_family = AF_INET; + t.addr_len = sizeof(sockaddr_in); + reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = htonl(INADDR_ANY); + return t; +} + +TestAddress V4Loopback() { + TestAddress t("V4Loopback"); + t.addr.ss_family = AF_INET; + t.addr_len = sizeof(sockaddr_in); + reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = + htonl(INADDR_LOOPBACK); + return t; +} + +TestAddress V4MappedAny() { + TestAddress t("V4MappedAny"); + t.addr.ss_family = AF_INET6; + t.addr_len = sizeof(sockaddr_in6); + inet_pton(AF_INET6, "::ffff:0.0.0.0", + reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr.s6_addr); + return t; +} + +TestAddress V4MappedLoopback() { + TestAddress t("V4MappedLoopback"); + t.addr.ss_family = AF_INET6; + t.addr_len = sizeof(sockaddr_in6); + inet_pton(AF_INET6, "::ffff:127.0.0.1", + reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr.s6_addr); + return t; +} + +TestAddress V6Any() { + TestAddress t("V6Any"); + t.addr.ss_family = AF_INET6; + t.addr_len = sizeof(sockaddr_in6); + reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = in6addr_any; + return t; +} + +TestAddress V6Loopback() { + TestAddress t("V6Loopback"); + t.addr.ss_family = AF_INET6; + t.addr_len = sizeof(sockaddr_in6); + reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = in6addr_loopback; + return t; +} + +struct TestParam { + TestAddress listener; + TestAddress connector; +}; + +std::string DescribeTestParam(::testing::TestParamInfo<TestParam> const& info) { + return absl::StrCat("Listen", info.param.listener.description, "_Connect", + info.param.connector.description); +} + +using SocketInetLoopbackTest = ::testing::TestWithParam<TestParam>; + +TEST(BadSocketPairArgs, ValidateErrForBadCallsToSocketPair) { + int fd[2] = {}; + + // Valid AF but invalid for socketpair(2) return ESOCKTNOSUPPORT. + ASSERT_THAT(socketpair(AF_INET, 0, 0, fd), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + ASSERT_THAT(socketpair(AF_INET6, 0, 0, fd), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + + // Invalid AF will return ENOAFSUPPORT. + ASSERT_THAT(socketpair(AF_MAX, 0, 0, fd), + SyscallFailsWithErrno(EAFNOSUPPORT)); + ASSERT_THAT(socketpair(8675309, 0, 0, fd), + SyscallFailsWithErrno(EAFNOSUPPORT)); +} + +TEST_P(SocketInetLoopbackTest, TCP) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast<sockaddr*>(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast<sockaddr*>(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RDWR), SyscallSucceeds()); + + ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds()); +} + +INSTANTIATE_TEST_CASE_P( + All, SocketInetLoopbackTest, + ::testing::Values( + // Listeners bound to IPv4 addresses refuse connections using IPv6 + // addresses. + TestParam{V4Any(), V4Any()}, TestParam{V4Any(), V4Loopback()}, + TestParam{V4Any(), V4MappedAny()}, + TestParam{V4Any(), V4MappedLoopback()}, + TestParam{V4Loopback(), V4Any()}, TestParam{V4Loopback(), V4Loopback()}, + TestParam{V4Loopback(), V4MappedLoopback()}, + TestParam{V4MappedAny(), V4Any()}, + TestParam{V4MappedAny(), V4Loopback()}, + TestParam{V4MappedAny(), V4MappedAny()}, + TestParam{V4MappedAny(), V4MappedLoopback()}, + TestParam{V4MappedLoopback(), V4Any()}, + TestParam{V4MappedLoopback(), V4Loopback()}, + TestParam{V4MappedLoopback(), V4MappedLoopback()}, + + // Listeners bound to IN6ADDR_ANY accept all connections. + TestParam{V6Any(), V4Any()}, TestParam{V6Any(), V4Loopback()}, + TestParam{V6Any(), V4MappedAny()}, + TestParam{V6Any(), V4MappedLoopback()}, TestParam{V6Any(), V6Any()}, + TestParam{V6Any(), V6Loopback()}, + + // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4 + // addresses. + TestParam{V6Loopback(), V6Any()}, + TestParam{V6Loopback(), V6Loopback()}), + DescribeTestParam); + +struct ProtocolTestParam { + std::string description; + int type; +}; + +std::string DescribeProtocolTestParam( + ::testing::TestParamInfo<ProtocolTestParam> const& info) { + return info.param.description; +} + +using SocketMultiProtocolInetLoopbackTest = + ::testing::TestWithParam<ProtocolTestParam>; + +TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedLoopbackOnlyReservesV4) { + auto const& param = GetParam(); + + for (int i = 0; true; i++) { + // Bind the v4 loopback on a dual stack socket. + TestAddress const& test_addr_dual = V4MappedLoopback(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that we can still bind the v6 loopback on the same port. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + int ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len); + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + // Verify that binding the v4 loopback with the same port on a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedAnyOnlyReservesV4) { + auto const& param = GetParam(); + + for (int i = 0; true; i++) { + // Bind the v4 any on a dual stack socket. + TestAddress const& test_addr_dual = V4MappedAny(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that we can still bind the v6 loopback on the same port. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + int ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len); + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + // Verify that binding the v4 loopback with the same port on a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, DualStackV6AnyReservesEverything) { + auto const& param = GetParam(); + + // Bind the v6 any on a dual stack socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v6 loopback with the same port fails. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v6 socket + // fails. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + ASSERT_THAT( + bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped), + test_addr_v4_mapped.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V6OnlyV6AnyReservesV6) { + auto const& param = GetParam(); + + for (int i = 0; true; i++) { + // Bind the v6 any on a v6-only socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_dual.family(), param.type, 0)); + int one = 1; + EXPECT_THAT( + setsockopt(fd_dual.get(), IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast<sockaddr*>(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v6 loopback with the same port fails. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that we can still bind the v4 loopback on the same port. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + int ret = + bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped), + test_addr_v4_mapped.addr_len); + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) { + auto const& param = GetParam(); + + // FIXME + SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM); + + for (int i = 0; true; i++) { + // Bind the v6 loopback on a dual stack socket. + TestAddress const& test_addr = V6Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT( + connect(connected_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast<sockaddr*>(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr), + connected_addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v6 loopback with the same port fails. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 any with the same port fails. + TestAddress const& test_addr_v4_any = V4Any(); + sockaddr_storage addr_v4_any = test_addr_v4_any.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, ephemeral_port)); + const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_any.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any), + test_addr_v4_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that we can still bind the v4 loopback on the same port. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, + ephemeral_port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + int ret = + bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped), + test_addr_v4_mapped.addr_len); + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + EXPECT_THAT(ret, SyscallSucceeds()); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) { + auto const& param = GetParam(); + + // FIXME + SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM); + + for (int i = 0; true; i++) { + // Bind the v4 loopback on a dual stack socket. + TestAddress const& test_addr = V4MappedLoopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT( + connect(connected_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast<sockaddr*>(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr), + connected_addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v4.family(), &addr_v4, ephemeral_port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + EXPECT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v6 any on the same port with a dual-stack socket + // fails. + TestAddress const& test_addr_v6_any = V6Any(); + sockaddr_storage addr_v6_any = test_addr_v6_any.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6_any.family(), &addr_v6_any, ephemeral_port)); + const FileDescriptor fd_v6_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6_any.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any), + test_addr_v6_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // For some reason, binding the TCP v6-only any is flaky on Linux. Maybe we + // tend to run out of ephemeral ports? Regardless, binding the v6 loopback + // seems pretty reliable. Only try to bind the v6-only any on UDP and + // gVisor. + + int ret = -1; + + if (!IsRunningOnGvisor() && param.type == SOCK_STREAM) { + // Verify that we can still bind the v6 loopback on the same port. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port)); + const FileDescriptor fd_v6 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6.family(), param.type, 0)); + ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len); + } else { + // Verify that we can still bind the v6 any on the same port with a + // v6-only socket. + const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6_any.family(), param.type, 0)); + int one = 1; + EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY, + &one, sizeof(one)), + SyscallSucceeds()); + ret = + bind(fd_v6_only_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any), + test_addr_v6_any.addr_len); + } + + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + EXPECT_THAT(ret, SyscallSucceeds()); + + // No need to try again. + break; + } +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) { + auto const& param = GetParam(); + + // FIXME + SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM); + + for (int i = 0; true; i++) { + // Bind the v4 loopback on a v4 socket. + TestAddress const& test_addr = V4Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT( + connect(connected_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast<sockaddr*>(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr), + connected_addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v6 socket + // fails. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, + ephemeral_port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + EXPECT_THAT( + bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped), + test_addr_v4_mapped.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v6 any on the same port with a dual-stack socket + // fails. + TestAddress const& test_addr_v6_any = V6Any(); + sockaddr_storage addr_v6_any = test_addr_v6_any.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6_any.family(), &addr_v6_any, ephemeral_port)); + const FileDescriptor fd_v6_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6_any.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any), + test_addr_v6_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // For some reason, binding the TCP v6-only any is flaky on Linux. Maybe we + // tend to run out of ephemeral ports? Regardless, binding the v6 loopback + // seems pretty reliable. Only try to bind the v6-only any on UDP and + // gVisor. + + int ret = -1; + + if (!IsRunningOnGvisor() && param.type == SOCK_STREAM) { + // Verify that we can still bind the v6 loopback on the same port. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port)); + const FileDescriptor fd_v6 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6.family(), param.type, 0)); + ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6), + test_addr_v6.addr_len); + } else { + // Verify that we can still bind the v6 any on the same port with a + // v6-only socket. + const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v6_any.family(), param.type, 0)); + int one = 1; + EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY, + &one, sizeof(one)), + SyscallSucceeds()); + ret = + bind(fd_v6_only_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any), + test_addr_v6_any.addr_len); + } + + if (ret == -1 && errno == EADDRINUSE) { + // Port may have been in use. + ASSERT_LT(i, 100); // Give up after 100 tries. + continue; + } + EXPECT_THAT(ret, SyscallSucceeds()); + + // No need to try again. + break; + } +} + +INSTANTIATE_TEST_CASE_P(AllFamlies, SocketMultiProtocolInetLoopbackTest, + ::testing::Values(ProtocolTestParam{"TCP", SOCK_STREAM}, + ProtocolTestParam{"UDP", SOCK_DGRAM}), + DescribeProtocolTestParam); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc new file mode 100644 index 000000000..bb5a83c9a --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -0,0 +1,392 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_ip_tcp_generic.h" + +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/poll.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(TCPSocketPairTest, TcpInfoSucceedes) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct tcp_info opt = {}; + socklen_t optLen = sizeof(opt); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen), + SyscallSucceeds()); +} + +TEST_P(TCPSocketPairTest, ShortTcpInfoSucceedes) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct tcp_info opt = {}; + socklen_t optLen = 1; + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen), + SyscallSucceeds()); +} + +TEST_P(TCPSocketPairTest, ZeroTcpInfoSucceedes) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct tcp_info opt = {}; + socklen_t optLen = 0; + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen), + SyscallSucceeds()); +} + +// This test validates that an RST is sent instead of a FIN when data is +// unread on calls to close(2). +TEST_P(TCPSocketPairTest, RSTSentOnCloseWithUnreadData) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until t_ sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now close the connected without reading the data. + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + + // Wait for the other end to receive the RST (up to 20 seconds). + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // A shutdown with unread data will cause a RST to be sent instead + // of a FIN, per RFC 2525 section 2.17; this is also what Linux does. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(ECONNRESET)); +} + +// This test will validate that a RST will cause POLLHUP to trigger. +TEST_P(TCPSocketPairTest, RSTCausesPollHUP) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until second sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN); + + // Confirm we at least have one unread byte. + int bytes_available = 0; + ASSERT_THAT( + RetryEINTR(ioctl)(sockets->second_fd(), FIONREAD, &bytes_available), + SyscallSucceeds()); + EXPECT_GT(bytes_available, 0); + + // Now close the connected socket without reading the data from the second, + // this will cause a RST and we should see that with POLLHUP. + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + + // Wait for the other end to receive the RST (up to 20 seconds). + struct pollfd poll_fd3 = {sockets->first_fd(), POLLHUP, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd3, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + ASSERT_NE(poll_fd.revents & (POLLHUP | POLLIN), 0); +} + +// This test validates that even if a RST is sent the other end will not +// get an ECONNRESET until it's read all data. +TEST_P(TCPSocketPairTest, RSTSentOnCloseWithUnreadDataAllowsReadBuffered) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(RetryEINTR(write)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until second sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0}; + constexpr int kPollTimeoutMs = 30000; // Wait up to 30 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Wait until first sees the data on its side but don't read it. + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now close the connected socket without reading the data from the second. + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + + // Wait for the other end to receive the RST (up to 30 seconds). + struct pollfd poll_fd3 = {sockets->first_fd(), POLLHUP, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd3, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Since we also have data buffered we should be able to read it before + // the syscall will fail with ECONNRESET. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // A shutdown with unread data will cause a RST to be sent instead + // of a FIN, per RFC 2525 section 2.17; this is also what Linux does. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(ECONNRESET)); +} + +// This test will verify that a clean shutdown (FIN) is preformed when there +// is unread data but only the write side is closed. +TEST_P(TCPSocketPairTest, FINSentOnShutdownWrWithUnreadData) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until t_ sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now shutdown the write end leaving the read end open. + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_WR), SyscallSucceeds()); + + // Wait for the other end to receive the FIN (up to 20 seconds). + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Since we didn't shutdown the read end this will be a clean close. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); +} + +// This test will verify that when data is received by a socket, even if it's +// not read SHUT_RD will not cause any packets to be generated and data will +// remain in the buffer and can be read later. +TEST_P(TCPSocketPairTest, ShutdownRdShouldCauseNoPacketsWithUnreadData) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until t_ sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now shutdown the read end, this will generate no packets to the other end. + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds()); + + // We should not receive any events on the other side of the socket. + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollNoResponseTimeoutMs = 3000; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollNoResponseTimeoutMs), + SyscallSucceedsWithValue(0)); // Timeout. + + // Even though we did a SHUT_RD on the read end we can still read the data. + ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST_P(TCPSocketPairTest, ClosedReadNonBlockingSocket) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Set the read end to O_NONBLOCK. + int opts = 0; + ASSERT_THAT(opts = fcntl(sockets->second_fd(), F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(sockets->second_fd(), F_SETFL, opts | O_NONBLOCK), + SyscallSucceeds()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until second_fd sees the data and then recv it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0}; + constexpr int kPollTimeoutMs = 2000; // Wait up to 2 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); + + // Now shutdown the write end leaving the read end open. + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + + // Wait for close notification and recv again. + struct pollfd poll_fd2 = {sockets->second_fd(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(0)); +} + +TEST_P(TCPSocketPairTest, + ShutdownRdUnreadDataShouldCauseNoPacketsUnlessClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // Wait until t_ sees the data on its side but don't read it. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data. + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + + // Now shutdown the read end, this will generate no packets to the other end. + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds()); + + // We should not receive any events on the other side of the socket. + struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0}; + constexpr int kPollNoResponseTimeoutMs = 3000; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollNoResponseTimeoutMs), + SyscallSucceedsWithValue(0)); // Timeout. + + // Now since we've fully closed the connection it will generate a RST. + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); // The other end has closed. + + // A shutdown with unread data will cause a RST to be sent instead + // of a FIN, per RFC 2525 section 2.17; this is also what Linux does. + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(ECONNRESET)); +} + +TEST_P(TCPSocketPairTest, TCPCorkDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); +} + +TEST_P(TCPSocketPairTest, SetTCPCork) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(TCPSocketPairTest, TCPCork) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + constexpr char kData[] = "abc"; + ASSERT_THAT(WriteFd(sockets->first_fd(), kData, sizeof(kData)), + SyscallSucceedsWithValue(sizeof(kData))); + + ASSERT_NO_FATAL_FAILURE(RecvNoData(sockets->second_fd())); + + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + // Create a receive buffer larger than kData. + char buf[(sizeof(kData) + 1) * 2] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(kData))); + EXPECT_EQ(absl::string_view(kData, sizeof(kData)), + absl::string_view(buf, sizeof(kData))); +} + +TEST_P(TCPSocketPairTest, TCPQuickAckDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +TEST_P(TCPSocketPairTest, SetTCPQuickAck) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get, + &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_generic.h b/test/syscalls/linux/socket_ip_tcp_generic.h new file mode 100644 index 000000000..f38500d14 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_generic.h @@ -0,0 +1,29 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected TCP sockets. +using TCPSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_ diff --git a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc new file mode 100644 index 000000000..9e10dea30 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc @@ -0,0 +1,47 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/tcp.h> +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_ip_tcp_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVecToVec<SocketPairKind>( + std::vector<Middleware>{ + NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)}, + VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + IPv6TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + IPv4TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + DualStackTCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, TCPSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_loopback.cc b/test/syscalls/linux/socket_ip_tcp_loopback.cc new file mode 100644 index 000000000..f95061506 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_loopback.cc @@ -0,0 +1,43 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + IPv6TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + IPv4TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + DualStackTCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc new file mode 100644 index 000000000..bb419e3a8 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc @@ -0,0 +1,44 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/tcp.h> +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_stream_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVecToVec<SocketPairKind>( + std::vector<Middleware>{ + NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)}, + VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + IPv6TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + IPv4TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, BlockingStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc new file mode 100644 index 000000000..af6fd635e --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc @@ -0,0 +1,46 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/tcp.h> +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return ApplyVecToVec<SocketPairKind>( + std::vector<Middleware>{ + NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)}, + VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + IPv6TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + IPv4TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_udp_generic.cc b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc new file mode 100644 index 000000000..91d029985 --- /dev/null +++ b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc @@ -0,0 +1,78 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/poll.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of TCP and UDP sockets. +using TcpUdpSocketPairTest = SocketPairTest; + +TEST_P(TcpUdpSocketPairTest, ShutdownWrFollowedBySendIsError) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Now shutdown the write end of the first. + ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_WR), SyscallSucceeds()); + + char buf[10] = {}; + ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EPIPE)); +} + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + IPv6UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + IPv4UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + DualStackUDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + IPv6TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + IPv4TCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})), + ApplyVec<SocketPairKind>( + DualStackTCPAcceptBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK}))); +} + +INSTANTIATE_TEST_CASE_P( + AllTCPSockets, TcpUdpSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_loopback.cc b/test/syscalls/linux/socket_ip_udp_loopback.cc new file mode 100644 index 000000000..8a98fa8df --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_loopback.cc @@ -0,0 +1,48 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_non_stream.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + IPv6UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + IPv4UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + DualStackUDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc new file mode 100644 index 000000000..08ff3e656 --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc @@ -0,0 +1,40 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_non_stream_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + IPv6UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + IPv4UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, BlockingNonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc new file mode 100644 index 000000000..256bcfccf --- /dev/null +++ b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc @@ -0,0 +1,42 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + IPv6UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + IPv4UDPBidirectionalBindSocketPair, + AllBitwiseCombinations(List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc new file mode 100644 index 000000000..7bfb62a6f --- /dev/null +++ b/test/syscalls/linux/socket_netdevice.cc @@ -0,0 +1,182 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/sockios.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "gtest/gtest.h" +#include "absl/base/internal/endian.h" +#include "test/syscalls/linux/socket_netlink_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Tests for netdevice queries. + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +TEST(NetdeviceTest, Loopback) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + // Prepare the request. + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check for a non-zero interface index. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + + // Check that the loopback is zero hardware address. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFHWADDR, &ifr), SyscallSucceeds()); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[0], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[1], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[2], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[3], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[4], 0); + EXPECT_EQ(ifr.ifr_hwaddr.sa_data[5], 0); +} + +TEST(NetdeviceTest, Netmask) { + // We need an interface index to identify the loopback device. + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + + // Use a netlink socket to get the netmask, which we'll then compare to the + // netmask obtained via ioctl. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + // Iterate through messages until we find the one containing the prefix length + // (i.e. netmask) for the loopback device. + int prefixlen = -1; + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), [&](const struct nlmsghdr *hdr) { + EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWADDR), Eq(NLMSG_DONE))); + + EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI) + << std::hex << hdr->nlmsg_flags; + + EXPECT_EQ(hdr->nlmsg_seq, kSeq); + EXPECT_EQ(hdr->nlmsg_pid, port); + + if (hdr->nlmsg_type != RTM_NEWADDR) { + return; + } + + // RTM_NEWADDR contains at least the header and ifaddrmsg. + EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct ifaddrmsg)); + + struct ifaddrmsg *ifaddrmsg = + reinterpret_cast<struct ifaddrmsg *>(NLMSG_DATA(hdr)); + if (ifaddrmsg->ifa_index == static_cast<uint32_t>(ifr.ifr_ifindex) && + ifaddrmsg->ifa_family == AF_INET) { + prefixlen = ifaddrmsg->ifa_prefixlen; + } + })); + + ASSERT_GE(prefixlen, 0); + + // Netmask is stored big endian in struct sockaddr_in, so we do the same for + // comparison. + uint32_t mask = 0xffffffff << (32 - prefixlen); + mask = absl::gbswap_32(mask); + + // Check that the loopback interface has the correct subnet mask. + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + ASSERT_THAT(ioctl(sock.get(), SIOCGIFNETMASK, &ifr), SyscallSucceeds()); + EXPECT_EQ(ifr.ifr_netmask.sa_family, AF_INET); + struct sockaddr_in *sin = + reinterpret_cast<struct sockaddr_in *>(&ifr.ifr_netmask); + EXPECT_EQ(sin->sin_addr.s_addr, mask); +} + +TEST(NetdeviceTest, InterfaceName) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + // Prepare the request. + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check for a non-zero interface index. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + + // Check that SIOCGIFNAME finds the loopback interface. + snprintf(ifr.ifr_name, IFNAMSIZ, "foo"); + ASSERT_THAT(ioctl(sock.get(), SIOCGIFNAME, &ifr), SyscallSucceeds()); + EXPECT_STREQ(ifr.ifr_name, "lo"); +} + +TEST(NetdeviceTest, InterfaceFlags) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + // Prepare the request. + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check that SIOCGIFFLAGS marks the interface with IFF_LOOPBACK, IFF_UP, and + // IFF_RUNNING. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFFLAGS, &ifr), SyscallSucceeds()); + EXPECT_EQ(ifr.ifr_flags & IFF_UP, IFF_UP); + EXPECT_EQ(ifr.ifr_flags & IFF_RUNNING, IFF_RUNNING); +} + +TEST(NetdeviceTest, InterfaceMTU) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + // Prepare the request. + struct ifreq ifr = {}; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check that SIOCGIFMTU returns a nonzero MTU. + ASSERT_THAT(ioctl(sock.get(), SIOCGIFMTU, &ifr), SyscallSucceeds()); + EXPECT_GT(ifr.ifr_mtu, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc new file mode 100644 index 000000000..9fc695460 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -0,0 +1,314 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <ifaddrs.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <vector> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_netlink_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Tests for NETLINK_ROUTE sockets. + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +// Netlink sockets must be SOCK_DGRAM or SOCK_RAW. +TEST(NetlinkRouteTest, Types) { + EXPECT_THAT(socket(AF_NETLINK, SOCK_STREAM, NETLINK_ROUTE), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_SEQPACKET, NETLINK_ROUTE), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_RDM, NETLINK_ROUTE), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_DCCP, NETLINK_ROUTE), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_PACKET, NETLINK_ROUTE), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + + int fd; + EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(NetlinkRouteTest, AutomaticPort) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); + + struct sockaddr_nl addr = {}; + addr.nl_family = AF_NETLINK; + + EXPECT_THAT( + bind(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallSucceeds()); + + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, sizeof(addr)); + // This is the only netlink socket in the process, so it should get the PID as + // the port id. + // + // N.B. Another process could theoretically have explicitly reserved our pid + // as a port ID, but that is very unlikely. + EXPECT_EQ(addr.nl_pid, getpid()); +} + +// Calling connect automatically binds to an automatic port. +TEST(NetlinkRouteTest, ConnectBinds) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); + + struct sockaddr_nl addr = {}; + addr.nl_family = AF_NETLINK; + + EXPECT_THAT(connect(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, sizeof(addr)); + // This is the only netlink socket in the process, so it should get the PID as + // the port id. + // + // N.B. Another process could theoretically have explicitly reserved our pid + // as a port ID, but that is very unlikely. + EXPECT_EQ(addr.nl_pid, getpid()); + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + + // Connecting again is allowed, but keeps the same port. + EXPECT_THAT(connect(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + sizeof(addr)), + SyscallSucceeds()); + + addrlen = sizeof(addr); + EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, sizeof(addr)); + EXPECT_EQ(addr.nl_pid, getpid()); +} + +TEST(NetlinkRouteTest, GetPeerName) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); + + struct sockaddr_nl addr = {}; + socklen_t addrlen = sizeof(addr); + + EXPECT_THAT(getpeername(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, sizeof(addr)); + EXPECT_EQ(addr.nl_family, AF_NETLINK); + // Peer is the kernel if we didn't connect elsewhere. + EXPECT_EQ(addr.nl_pid, 0); +} + +using IntSockOptTest = ::testing::TestWithParam<int>; + +TEST_P(IntSockOptTest, GetSockOpt) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); + + int res; + socklen_t len = sizeof(res); + + EXPECT_THAT(getsockopt(fd.get(), SOL_SOCKET, GetParam(), &res, &len), + SyscallSucceeds()); + + EXPECT_EQ(len, sizeof(res)); + EXPECT_GT(res, 0); +} + +INSTANTIATE_TEST_CASE_P(NetlinkRouteTest, IntSockOptTest, + ::testing::Values(SO_SNDBUF, SO_RCVBUF)); + +// Validates the reponses to RTM_GETLINK + NLM_F_DUMP. +void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) { + EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWLINK), Eq(NLMSG_DONE))); + + EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI) + << std::hex << hdr->nlmsg_flags; + + EXPECT_EQ(hdr->nlmsg_seq, seq); + EXPECT_EQ(hdr->nlmsg_pid, port); + + if (hdr->nlmsg_type != RTM_NEWLINK) { + return; + } + + // RTM_NEWLINK contains at least the header and ifinfomsg. + EXPECT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); + + // TODO: Check ifinfomsg contents and following attrs. +} + +TEST(NetlinkRouteTest, GetLinkDump) { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + + // Loopback is common among all tests, check that it's found. + bool loopbackFound = false; + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) { + CheckGetLinkResponse(hdr, kSeq, port); + if (hdr->nlmsg_type != RTM_NEWLINK) { + return; + } + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); + const struct ifinfomsg* msg = + reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr)); + LOG(INFO) << "Found interface idx=" << msg->ifi_index + << ", type=" << std::hex << msg->ifi_type; + if (msg->ifi_type == ARPHRD_LOOPBACK) { + loopbackFound = true; + EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0); + } + })); + EXPECT_TRUE(loopbackFound); +} + +TEST(NetlinkRouteTest, ControlMessageIgnored) { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct request { + struct nlmsghdr control_hdr; + struct nlmsghdr message_hdr; + struct ifinfomsg ifm; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req = {}; + + // This control message is ignored. We still receive a response for the + // following RTM_GETLINK. + req.control_hdr.nlmsg_len = sizeof(req.control_hdr); + req.control_hdr.nlmsg_type = NLMSG_DONE; + req.control_hdr.nlmsg_seq = kSeq; + + req.message_hdr.nlmsg_len = sizeof(req.message_hdr) + sizeof(req.ifm); + req.message_hdr.nlmsg_type = RTM_GETLINK; + req.message_hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.message_hdr.nlmsg_seq = kSeq; + + req.ifm.ifi_family = AF_UNSPEC; + + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) { + CheckGetLinkResponse(hdr, kSeq, port); + })); +} + +TEST(NetlinkRouteTest, GetAddrDump) { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) { + EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWADDR), Eq(NLMSG_DONE))); + + EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI) + << std::hex << hdr->nlmsg_flags; + + EXPECT_EQ(hdr->nlmsg_seq, kSeq); + EXPECT_EQ(hdr->nlmsg_pid, port); + + if (hdr->nlmsg_type != RTM_NEWADDR) { + return; + } + + // RTM_NEWADDR contains at least the header and ifaddrmsg. + EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct ifaddrmsg)); + + // TODO: Check ifaddrmsg contents and following attrs. + })); +} + +TEST(NetlinkRouteTest, LookupAll) { + struct ifaddrs* if_addr_list = nullptr; + auto cleanup = Cleanup([&if_addr_list]() { freeifaddrs(if_addr_list); }); + + // Not a syscall but we can use the syscall matcher as glibc sets errno. + ASSERT_THAT(getifaddrs(&if_addr_list), SyscallSucceeds()); + + int count = 0; + for (struct ifaddrs* i = if_addr_list; i; i = i->ifa_next) { + if (!i->ifa_addr || (i->ifa_addr->sa_family != AF_INET && + i->ifa_addr->sa_family != AF_INET6)) { + continue; + } + count++; + } + ASSERT_GT(count, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc new file mode 100644 index 000000000..ee0e03966 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -0,0 +1,100 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <linux/if_arp.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include <sys/socket.h> +#include <vector> + +#include "absl/strings/str_cat.h" +#include "test/syscalls/linux/socket_netlink_util.h" +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +PosixErrorOr<FileDescriptor> NetlinkBoundSocket() { + FileDescriptor fd; + ASSIGN_OR_RETURN_ERRNO(fd, Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); + + struct sockaddr_nl addr = {}; + addr.nl_family = AF_NETLINK; + + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + MaybeSave(); + + return std::move(fd); +} + +PosixErrorOr<uint32_t> NetlinkPortID(int fd) { + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + + RETURN_ERROR_IF_SYSCALL_FAIL( + getsockname(fd, reinterpret_cast<struct sockaddr*>(&addr), &addrlen)); + MaybeSave(); + + return static_cast<uint32_t>(addr.nl_pid); +} + +PosixError NetlinkRequestResponse( + const FileDescriptor& fd, void* request, size_t len, + const std::function<void(const struct nlmsghdr* hdr)>& fn) { + struct iovec iov = {}; + iov.iov_base = request; + iov.iov_len = len; + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + // No destination required; it defaults to pid 0, the kernel. + + RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(sendmsg)(fd.get(), &msg, 0)); + + constexpr size_t kBufferSize = 4096; + std::vector<char> buf(kBufferSize); + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + // Response is a series of NLM_F_MULTI messages, ending with a NLMSG_DONE + // message. + int type = -1; + do { + int len; + RETURN_ERROR_IF_SYSCALL_FAIL(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0)); + + // We don't bother with the complexity of dealing with truncated messages. + // We must allocate a large enough buffer up front. + if ((msg.msg_flags & MSG_TRUNC) == MSG_TRUNC) { + return PosixError(EIO, + absl::StrCat("Received truncated message with flags: ", + msg.msg_flags)); + } + + for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data()); + NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) { + fn(hdr); + type = hdr->nlmsg_type; + } + } while (type != NLMSG_DONE && type != NLMSG_ERROR); + + EXPECT_EQ(type, NLMSG_DONE); + return NoError(); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h new file mode 100644 index 000000000..44b1f148c --- /dev/null +++ b/test/syscalls/linux/socket_netlink_util.h @@ -0,0 +1,42 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_ + +#include <linux/if_arp.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" + +namespace gvisor { +namespace testing { + +// Returns a bound NETLINK_ROUTE socket. +PosixErrorOr<FileDescriptor> NetlinkBoundSocket(); + +// Returns the port ID of the passed socket. +PosixErrorOr<uint32_t> NetlinkPortID(int fd); + +// Send the passed request and call fn will all response netlink messages. +PosixError NetlinkRequestResponse( + const FileDescriptor& fd, void* request, size_t len, + const std::function<void(const struct nlmsghdr* hdr)>& fn); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_ diff --git a/test/syscalls/linux/socket_non_blocking.cc b/test/syscalls/linux/socket_non_blocking.cc new file mode 100644 index 000000000..1bcc6fb7f --- /dev/null +++ b/test/syscalls/linux/socket_non_blocking.cc @@ -0,0 +1,63 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_non_blocking.h" + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +TEST_P(NonBlockingSocketPairTest, ReadNothingAvailable) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[20] = {}; + ASSERT_THAT(ReadFd(sockets->first_fd(), buf, sizeof(buf)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(NonBlockingSocketPairTest, RecvNothingAvailable) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[20] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(NonBlockingSocketPairTest, RecvMsgNothingAvailable) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct iovec iov; + char buf[20] = {}; + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EAGAIN)); +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_non_blocking.h b/test/syscalls/linux/socket_non_blocking.h new file mode 100644 index 000000000..287e096bb --- /dev/null +++ b/test/syscalls/linux/socket_non_blocking.h @@ -0,0 +1,29 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected non-blocking sockets. +using NonBlockingSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_ diff --git a/test/syscalls/linux/socket_non_stream.cc b/test/syscalls/linux/socket_non_stream.cc new file mode 100644 index 000000000..d49aab363 --- /dev/null +++ b/test/syscalls/linux/socket_non_stream.cc @@ -0,0 +1,174 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_non_stream.h" + +#include <stdio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(NonStreamSocketPairTest, SendMsgTooLarge) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int sndbuf; + socklen_t length = sizeof(sndbuf); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length), + SyscallSucceeds()); + + // Make the call too large to fit in the send buffer. + const int buffer_size = 3 * sndbuf; + + EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, false /* reader */), + SyscallFailsWithErrno(EMSGSIZE)); +} + +// Stream sockets allow data sent with a single (e.g. write, sendmsg) syscall +// to be read in pieces with multiple (e.g. read, recvmsg) syscalls. +// +// SplitRecv checks that control messages can only be read on the first (e.g. +// read, recvmsg) syscall, even if it doesn't provide space for the control +// message. +TEST_P(NonStreamSocketPairTest, SplitRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data) / 2]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +// Stream sockets allow data sent with multiple sends to be read in a single +// recv. Datagram sockets do not. +// +// SingleRecv checks that only a single message is readable in a single recv. +TEST_P(NonStreamSocketPairTest, SingleRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0), + SyscallSucceedsWithValue(sizeof(sent_data2))); + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); +} + +// Stream sockets allow data sent with multiple sends to be peeked at in a +// single recv. Datagram sockets (except for unix sockets) do not. +// +// SinglePeek checks that only a single message is peekable in a single recv. +TEST_P(NonStreamSocketPairTest, SinglePeek) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0), + SyscallSucceedsWithValue(sizeof(sent_data2))); + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + for (int i = 0; i < 3; i++) { + memset(received_data, 0, sizeof(received_data)); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_PEEK), + SyscallSucceedsWithValue(sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + } + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(sent_data1), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(sent_data2), 0), + SyscallSucceedsWithValue(sizeof(sent_data2))); + EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2))); +} + +TEST_P(NonStreamSocketPairTest, MsgTruncTruncation) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data) / 2, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + + // Check that we didn't get any extra data. + EXPECT_NE(0, memcmp(sent_data + sizeof(sent_data) / 2, + received_data + sizeof(received_data) / 2, + sizeof(sent_data) / 2)); +} + +TEST_P(NonStreamSocketPairTest, MsgTruncSameSize) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(NonStreamSocketPairTest, MsgTruncNotFull) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[2 * sizeof(sent_data)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_non_stream.h b/test/syscalls/linux/socket_non_stream.h new file mode 100644 index 000000000..02dd2a958 --- /dev/null +++ b/test/syscalls/linux/socket_non_stream.h @@ -0,0 +1,29 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected non-stream sockets. +using NonStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_ diff --git a/test/syscalls/linux/socket_non_stream_blocking.cc b/test/syscalls/linux/socket_non_stream_blocking.cc new file mode 100644 index 000000000..d64b181c9 --- /dev/null +++ b/test/syscalls/linux/socket_non_stream_blocking.cc @@ -0,0 +1,51 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_non_stream_blocking.h" + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(BlockingNonStreamSocketPairTest, RecvLessThanBufferWaitAll) { + SKIP_IF(IsRunningOnGvisor()); // FIXME: Support MSG_WAITALL. + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data) * 2] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_WAITALL), + SyscallSucceedsWithValue(sizeof(sent_data))); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_non_stream_blocking.h b/test/syscalls/linux/socket_non_stream_blocking.h new file mode 100644 index 000000000..bde355452 --- /dev/null +++ b/test/syscalls/linux/socket_non_stream_blocking.h @@ -0,0 +1,30 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of blocking connected non-stream +// sockets. +using BlockingNonStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_ diff --git a/test/syscalls/linux/socket_stream.cc b/test/syscalls/linux/socket_stream.cc new file mode 100644 index 000000000..32e9d958b --- /dev/null +++ b/test/syscalls/linux/socket_stream.cc @@ -0,0 +1,99 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_stream.h" + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(StreamSocketPairTest, SplitRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data) / 2]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data + sizeof(received_data), received_data, + sizeof(received_data))); +} + +// Stream sockets allow data sent with multiple sends to be read in a single +// recv. +// +// CoalescedRecv checks that multiple messages are readable in a single recv. +TEST_P(StreamSocketPairTest, CoalescedRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0), + SyscallSucceedsWithValue(sizeof(sent_data1))); + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0), + SyscallSucceedsWithValue(sizeof(sent_data2))); + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); +} + +TEST_P(StreamSocketPairTest, WriteOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + const char str[] = "abc"; + ASSERT_THAT(write(sockets->second_fd(), str, 3), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(StreamSocketPairTest, MsgTrunc) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)]; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data) / 2, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_stream.h b/test/syscalls/linux/socket_stream.h new file mode 100644 index 000000000..35e591e17 --- /dev/null +++ b/test/syscalls/linux/socket_stream.h @@ -0,0 +1,30 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of blocking and non-blocking +// connected stream sockets. +using StreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_ diff --git a/test/syscalls/linux/socket_stream_blocking.cc b/test/syscalls/linux/socket_stream_blocking.cc new file mode 100644 index 000000000..dd209c67c --- /dev/null +++ b/test/syscalls/linux/socket_stream_blocking.cc @@ -0,0 +1,131 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_stream_blocking.h" + +#include <stdio.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/timer_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(BlockingStreamSocketPairTest, BlockPartialWriteClosed) { + // FIXME: gVisor doesn't support SO_SNDBUF on UDS, nor does it + // enforce any limit; it will write arbitrary amounts of data without + // blocking. + SKIP_IF(IsRunningOnGvisor()); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int buffer_size; + socklen_t length = sizeof(buffer_size); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &buffer_size, &length), + SyscallSucceeds()); + + int wfd = sockets->first_fd(); + ScopedThread t([wfd, buffer_size]() { + std::vector<char> buf(2 * buffer_size); + // Write more than fits in the buffer. Blocks then returns partial write + // when the other end is closed. The next call returns EPIPE. + // + // N.B. writes occur in chunks, so we may see less than buffer_size from + // the first call. + ASSERT_THAT(write(wfd, buf.data(), buf.size()), + SyscallSucceedsWithValue(::testing::Gt(0))); + ASSERT_THAT(write(wfd, buf.data(), buf.size()), + ::testing::AnyOf(SyscallFailsWithErrno(EPIPE), + SyscallFailsWithErrno(ECONNRESET))); + }); + + // Leave time for write to become blocked. + absl::SleepFor(absl::Seconds(1.0)); + + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); +} + +TEST_P(BlockingStreamSocketPairTest, SendMsgTooLarge) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int sndbuf; + socklen_t length = sizeof(sndbuf); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length), + SyscallSucceeds()); + + // Make the call too large to fit in the send buffer. + const int buffer_size = 3 * sndbuf; + + EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, true /* reader */), + SyscallSucceedsWithValue(buffer_size)); +} + +TEST_P(BlockingStreamSocketPairTest, RecvLessThanBuffer) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[200] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); +} + +TEST_P(BlockingStreamSocketPairTest, RecvLessThanBufferWaitAll) { + SKIP_IF(IsRunningOnGvisor()); // FIXME: Support MSG_WAITALL. + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[100]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + constexpr auto kDuration = absl::Milliseconds(200); + auto before = Now(CLOCK_MONOTONIC); + + const ScopedThread t([&]() { + absl::SleepFor(kDuration); + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + }); + + char received_data[sizeof(sent_data) * 2] = {}; + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_WAITALL), + SyscallSucceedsWithValue(sizeof(received_data))); + + auto after = Now(CLOCK_MONOTONIC); + EXPECT_GE(after - before, kDuration); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_stream_blocking.h b/test/syscalls/linux/socket_stream_blocking.h new file mode 100644 index 000000000..06113ad03 --- /dev/null +++ b/test/syscalls/linux/socket_stream_blocking.h @@ -0,0 +1,30 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of blocking connected stream +// sockets. +using BlockingStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_ diff --git a/test/syscalls/linux/socket_stream_nonblock.cc b/test/syscalls/linux/socket_stream_nonblock.cc new file mode 100644 index 000000000..a3202ffe4 --- /dev/null +++ b/test/syscalls/linux/socket_stream_nonblock.cc @@ -0,0 +1,50 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_stream_nonblock.h" + +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +using ::testing::Le; + +TEST_P(NonBlockingStreamSocketPairTest, SendMsgTooLarge) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int sndbuf; + socklen_t length = sizeof(sndbuf); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length), + SyscallSucceeds()); + + // Make the call too large to fit in the send buffer. + const int buffer_size = 3 * sndbuf; + + EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, false /* reader */), + SyscallSucceedsWithValue(Le(buffer_size))); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_stream_nonblock.h b/test/syscalls/linux/socket_stream_nonblock.h new file mode 100644 index 000000000..491f53848 --- /dev/null +++ b/test/syscalls/linux/socket_stream_nonblock.h @@ -0,0 +1,30 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of non-blocking connected stream +// sockets. +using NonBlockingStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_ diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc new file mode 100644 index 000000000..80a59df7e --- /dev/null +++ b/test/syscalls/linux/socket_test_util.cc @@ -0,0 +1,660 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_test_util.h" + +#include <arpa/inet.h> +#include <poll.h> +#include <sys/socket.h> + +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/time/clock.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +Creator<SocketPair> SyscallSocketPairCreator(int domain, int type, + int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { + int pair[2]; + RETURN_ERROR_IF_SYSCALL_FAIL(socketpair(domain, type, protocol, pair)); + MaybeSave(); // Save on successful creation. + return absl::make_unique<FDSocketPair>(pair[0], pair[1]); + }; +} + +Creator<FileDescriptor> SyscallSocketCreator(int domain, int type, + int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> { + int fd = 0; + RETURN_ERROR_IF_SYSCALL_FAIL(fd = socket(domain, type, protocol)); + MaybeSave(); // Save on successful creation. + return absl::make_unique<FileDescriptor>(fd); + }; +} + +PosixErrorOr<struct sockaddr_un> UniqueUnixAddr(bool abstract, int domain) { + struct sockaddr_un addr = {}; + std::string path = NewTempAbsPathInDir("/tmp"); + if (path.size() >= sizeof(addr.sun_path)) { + return PosixError(EINVAL, + "Unable to generate a temp path of appropriate length"); + } + + if (abstract) { + // Indicate that the path is in the abstract namespace. + path[0] = 0; + } + memcpy(addr.sun_path, path.c_str(), path.length()); + addr.sun_family = domain; + return addr; +} + +Creator<SocketPair> AcceptBindSocketPairCreator(bool abstract, int domain, + int type, int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un bind_addr, + UniqueUnixAddr(abstract, domain)); + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un extra_addr, + UniqueUnixAddr(abstract, domain)); + + int bound; + RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(bound, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr))); + MaybeSave(); // Successful bind. + RETURN_ERROR_IF_SYSCALL_FAIL(listen(bound, /* backlog = */ 5)); + MaybeSave(); // Successful listen. + + int connected; + RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL( + connect(connected, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr))); + MaybeSave(); // Successful connect. + + int accepted; + RETURN_ERROR_IF_SYSCALL_FAIL( + accepted = accept4(bound, nullptr, nullptr, + type & (SOCK_NONBLOCK | SOCK_CLOEXEC))); + MaybeSave(); // Successful connect. + + // Cleanup no longer needed resources. + RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); + MaybeSave(); // Dropped original socket. + + // Only unlink if path is not in abstract namespace. + if (bind_addr.sun_path[0] != 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(unlink(bind_addr.sun_path)); + MaybeSave(); // Unlinked path. + } + + return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr, + extra_addr); + }; +} + +Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type, + int protocol) { + return AcceptBindSocketPairCreator(/* abstract= */ false, domain, type, + protocol); +} + +Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type, + int protocol) { + return AcceptBindSocketPairCreator(/* abstract= */ true, domain, type, + protocol); +} + +Creator<SocketPair> BidirectionalBindSocketPairCreator(bool abstract, + int domain, int type, + int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1, + UniqueUnixAddr(abstract, domain)); + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2, + UniqueUnixAddr(abstract, domain)); + + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(sock1, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1))); + MaybeSave(); // Successful bind. + + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(sock2, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2))); + MaybeSave(); // Successful bind. + + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock1, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2))); + MaybeSave(); // Successful connect. + + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock2, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1))); + MaybeSave(); // Successful connect. + + // Cleanup no longer needed resources. + + // Only unlink if path is not in abstract namespace. + if (addr1.sun_path[0] != 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr1.sun_path)); + MaybeSave(); // Successful unlink. + } + + // Only unlink if path is not in abstract namespace. + if (addr2.sun_path[0] != 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr2.sun_path)); + MaybeSave(); // Successful unlink. + } + + return absl::make_unique<FDSocketPair>(sock1, sock2); + }; +} + +Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain, + int type, + int protocol) { + return BidirectionalBindSocketPairCreator(/* abstract= */ false, domain, type, + protocol); +} + +Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain, + int type, + int protocol) { + return BidirectionalBindSocketPairCreator(/* abstract= */ true, domain, type, + protocol); +} + +Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type, + int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { + struct sockaddr_un addr = {}; + constexpr char kSocketGoferPath[] = "/socket"; + memcpy(addr.sun_path, kSocketGoferPath, sizeof(kSocketGoferPath)); + addr.sun_family = domain; + + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock1, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + MaybeSave(); // Successful connect. + + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock2, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + MaybeSave(); // Successful connect. + + // Make and close another socketpair to ensure that the duped ends of the + // first socketpair get closed. + // + // The problem is that there is no way to atomically send and close an FD. + // The closest that we can do is send and then immediately close the FD, + // which is what we do in the gofer. The gofer won't respond to another + // request until the reply is sent and the FD is closed, so forcing the + // gofer to handle another request will ensure that this has happened. + for (int i = 0; i < 2; i++) { + int sock; + RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol)); + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + RETURN_ERROR_IF_SYSCALL_FAIL(close(sock)); + } + + return absl::make_unique<FDSocketPair>(sock1, sock2); + }; +} + +Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags) { + return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { + constexpr char kSocketGoferPath[] = "/socket"; + + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = + open(kSocketGoferPath, O_RDWR | flags)); + MaybeSave(); // Successful socket creation. + + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = + open(kSocketGoferPath, O_RDWR | flags)); + MaybeSave(); // Successful socket creation. + + return absl::make_unique<FDSocketPair>(sock1, sock2); + }; +} + +Creator<SocketPair> UnboundSocketPairCreator(bool abstract, int domain, + int type, int protocol) { + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1, + UniqueUnixAddr(abstract, domain)); + ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2, + UniqueUnixAddr(abstract, domain)); + + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2); + }; +} + +Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type, + int protocol) { + return UnboundSocketPairCreator(/* abstract= */ false, domain, type, + protocol); +} + +Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type, + int protocol) { + return UnboundSocketPairCreator(/* abstract= */ true, domain, type, protocol); +} + +void LocalhostAddr(struct sockaddr_in* addr, bool dual_stack) { + addr->sin_family = AF_INET; + addr->sin_port = htons(0); + inet_pton(AF_INET, "127.0.0.1", + reinterpret_cast<void*>(&addr->sin_addr.s_addr)); +} + +void LocalhostAddr(struct sockaddr_in6* addr, bool dual_stack) { + addr->sin6_family = AF_INET6; + addr->sin6_port = htons(0); + if (dual_stack) { + inet_pton(AF_INET6, "::ffff:127.0.0.1", + reinterpret_cast<void*>(&addr->sin6_addr.s6_addr)); + } else { + inet_pton(AF_INET6, "::1", + reinterpret_cast<void*>(&addr->sin6_addr.s6_addr)); + } + addr->sin6_scope_id = 0; +} + +template <typename T> +PosixErrorOr<T> BindIP(int fd, bool dual_stack) { + T addr = {}; + LocalhostAddr(&addr, dual_stack); + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(fd, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr))); + socklen_t addrlen = sizeof(addr); + RETURN_ERROR_IF_SYSCALL_FAIL( + getsockname(fd, reinterpret_cast<struct sockaddr*>(&addr), &addrlen)); + return addr; +} + +template <typename T> +PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair( + int bound, int connected, int type, bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO(T bind_addr, BindIP<T>(bound, dual_stack)); + RETURN_ERROR_IF_SYSCALL_FAIL(listen(bound, /* backlog = */ 5)); + + int connect_result = 0; + RETURN_ERROR_IF_SYSCALL_FAIL( + (connect_result = RetryEINTR(connect)( + connected, reinterpret_cast<struct sockaddr*>(&bind_addr), + sizeof(bind_addr))) == -1 && + errno == EINPROGRESS + ? 0 + : connect_result); + MaybeSave(); // Successful connect. + + if (connect_result == -1) { + struct pollfd connect_poll = {connected, POLLOUT | POLLERR | POLLHUP, 0}; + RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&connect_poll, 1, 0)); + int error = 0; + socklen_t errorlen = sizeof(error); + RETURN_ERROR_IF_SYSCALL_FAIL( + getsockopt(connected, SOL_SOCKET, SO_ERROR, &error, &errorlen)); + errno = error; + RETURN_ERROR_IF_SYSCALL_FAIL( + /* connect */ error == 0 ? 0 : -1); + } + + int accepted = -1; + struct pollfd accept_poll = {bound, POLLIN, 0}; + while (accepted == -1) { + RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&accept_poll, 1, 0)); + + RETURN_ERROR_IF_SYSCALL_FAIL( + (accepted = RetryEINTR(accept4)( + bound, nullptr, nullptr, type & (SOCK_NONBLOCK | SOCK_CLOEXEC))) == + -1 && + errno == EAGAIN + ? 0 + : accepted); + } + MaybeSave(); // Successful accept. + + // FIXME + if (connect_result == -1) { + absl::SleepFor(absl::Seconds(1)); + } + + // Cleanup no longer needed resources. + RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); + MaybeSave(); // Successful close. + + T extra_addr = {}; + LocalhostAddr(&extra_addr, dual_stack); + return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr, + extra_addr); +} + +Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type, + int protocol, + bool dual_stack) { + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + int bound; + RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + int connected; + RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + if (domain == AF_INET) { + return CreateTCPAcceptBindSocketPair<sockaddr_in>(bound, connected, type, + dual_stack); + } + return CreateTCPAcceptBindSocketPair<sockaddr_in6>(bound, connected, type, + dual_stack); + }; +} + +template <typename T> +PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> +CreateUDPBidirectionalBindSocketPair(int sock1, int sock2, int type, + bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO(T addr1, BindIP<T>(sock1, dual_stack)); + ASSIGN_OR_RETURN_ERRNO(T addr2, BindIP<T>(sock2, dual_stack)); + + // Connect sock1 to sock2. + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock1, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2))); + MaybeSave(); // Successful connection. + + // Connect sock2 to sock1. + RETURN_ERROR_IF_SYSCALL_FAIL(connect( + sock2, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1))); + MaybeSave(); // Successful connection. + + return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2); +} + +Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type, + int protocol, + bool dual_stack) { + return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { + int sock1; + RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + int sock2; + RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + if (domain == AF_INET) { + return CreateUDPBidirectionalBindSocketPair<sockaddr_in>( + sock1, sock2, type, dual_stack); + } + return CreateUDPBidirectionalBindSocketPair<sockaddr_in6>(sock1, sock2, + type, dual_stack); + }; +} + +SocketPairKind Reversed(SocketPairKind const& base) { + auto const& creator = base.creator; + return SocketPairKind{ + absl::StrCat("reversed ", base.description), + [creator]() -> PosixErrorOr<std::unique_ptr<ReversedSocketPair>> { + ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator()); + return absl::make_unique<ReversedSocketPair>(std::move(creator_value)); + }}; +} + +std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec) { + return ApplyVecToVec<SocketPairKind>(std::vector<Middleware>{NoOp, Reversed}, + vec); +} + +SocketPairKind NoOp(SocketPairKind const& base) { return base; } + +void TransferTest(int fd1, int fd2) { + char buf1[20]; + RandomizeBuffer(buf1, sizeof(buf1)); + ASSERT_THAT(WriteFd(fd1, buf1, sizeof(buf1)), + SyscallSucceedsWithValue(sizeof(buf1))); + + char buf2[20]; + ASSERT_THAT(ReadFd(fd2, buf2, sizeof(buf2)), + SyscallSucceedsWithValue(sizeof(buf2))); + + EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1))); + + RandomizeBuffer(buf1, sizeof(buf1)); + ASSERT_THAT(WriteFd(fd2, buf1, sizeof(buf1)), + SyscallSucceedsWithValue(sizeof(buf1))); + + ASSERT_THAT(ReadFd(fd1, buf2, sizeof(buf2)), + SyscallSucceedsWithValue(sizeof(buf2))); + + EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1))); +} + +// Initializes the given buffer with random data. +void RandomizeBuffer(char* ptr, size_t len) { + uint32_t seed = time(nullptr); + for (size_t i = 0; i < len; ++i) { + ptr[i] = static_cast<char>(rand_r(&seed)); + } +} + +size_t CalculateUnixSockAddrLen(const char* sun_path) { + // Abstract addresses always return the full length. + if (sun_path[0] == 0) { + return sizeof(sockaddr_un); + } + // Filesystem addresses use the address length plus the 2 byte sun_family and + // null terminator. + return strlen(sun_path) + 3; +} + +struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_un& addr) { + struct sockaddr_storage addr_storage = {}; + memcpy(&addr_storage, &addr, sizeof(addr)); + return addr_storage; +} + +struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in& addr) { + struct sockaddr_storage addr_storage = {}; + memcpy(&addr_storage, &addr, sizeof(addr)); + return addr_storage; +} + +struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in6& addr) { + struct sockaddr_storage addr_storage = {}; + memcpy(&addr_storage, &addr, sizeof(addr)); + return addr_storage; +} + +SocketKind SimpleSocket(int fam, int type, int proto) { + return SocketKind{ + absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto), + SyscallSocketCreator(fam, type, proto)}; +} + +ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets, + size_t size, bool reader) { + const int rfd = sockets->second_fd(); + ScopedThread t([rfd, size, reader] { + if (!reader) { + return; + } + + // Potentially too many syscalls in the loop. + const DisableSave ds; + + std::vector<char> buf(size); + size_t total = 0; + + while (total < size) { + int ret = read(rfd, buf.data(), buf.size()); + if (ret == -1 && errno == EAGAIN) { + continue; + } + if (ret > 0) { + total += ret; + } + + // Assert to return on first failure. + ASSERT_THAT(ret, SyscallSucceeds()); + } + }); + + std::vector<char> buf(size); + + struct iovec iov = {}; + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + return RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0); +} + +PosixErrorOr<int> PortAvailable(int port, AddressFamily family, SocketType type, + bool reuse_addr) { + if (port < 0) { + return PosixError(EINVAL, "Invalid port"); + } + + // Both Ipv6 and Dualstack are AF_INET6. + int sock_fam = (family == AddressFamily::kIpv4 ? AF_INET : AF_INET6); + int sock_type = (type == SocketType::kTcp ? SOCK_STREAM : SOCK_DGRAM); + ASSIGN_OR_RETURN_ERRNO(auto fd, Socket(sock_fam, sock_type, 0)); + + if (reuse_addr) { + int one = 1; + RETURN_ERROR_IF_SYSCALL_FAIL( + setsockopt(fd.get(), SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))); + } + + // Try to bind. + sockaddr_storage storage = {}; + int storage_size = 0; + if (family == AddressFamily::kIpv4) { + sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&storage); + storage_size = sizeof(*addr); + addr->sin_family = AF_INET; + addr->sin_port = htons(port); + addr->sin_addr.s_addr = htonl(INADDR_ANY); + } else { + sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&storage); + storage_size = sizeof(*addr); + addr->sin6_family = AF_INET6; + addr->sin6_port = htons(port); + if (family == AddressFamily::kDualStack) { + inet_pton(AF_INET6, "::ffff:0.0.0.0", + reinterpret_cast<void*>(&addr->sin6_addr.s6_addr)); + } else { + addr->sin6_addr = in6addr_any; + } + } + + RETURN_ERROR_IF_SYSCALL_FAIL( + bind(fd.get(), reinterpret_cast<sockaddr*>(&storage), storage_size)); + + // If the user specified 0 as the port, we will return the port that the + // kernel gave us, otherwise we will validate that this socket bound to the + // requested port. + sockaddr_storage bound_storage = {}; + socklen_t bound_storage_size = sizeof(bound_storage); + RETURN_ERROR_IF_SYSCALL_FAIL( + getsockname(fd.get(), reinterpret_cast<sockaddr*>(&bound_storage), + &bound_storage_size)); + + int available_port = -1; + if (bound_storage.ss_family == AF_INET) { + sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&bound_storage); + available_port = ntohs(addr->sin_port); + } else if (bound_storage.ss_family == AF_INET6) { + sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&bound_storage); + available_port = ntohs(addr->sin6_port); + } else { + return PosixError(EPROTOTYPE, "Getsockname returned invalid family"); + } + + // If we requested a specific port make sure our bound port is that port. + if (port != 0 && available_port != port) { + return PosixError(EINVAL, + absl::StrCat("Bound port ", available_port, + " was not equal to requested port ", port)); + } + + // If we're trying to do a TCP socket, let's also try to listen. + if (type == SocketType::kTcp) { + RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd.get(), 1)); + } + + return available_port; +} + +PosixError FreeAvailablePort(int port) { + return NoError(); +} + +PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size) { + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg->msg_iov = &iov; + msg->msg_iovlen = 1; + + int ret; + RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(sendmsg)(sock, msg, 0)); + return ret; +} + +void RecvNoData(int sock) { + char data = 0; + struct iovec iov; + iov.iov_base = &data; + iov.iov_len = 1; + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h new file mode 100644 index 000000000..e3e741478 --- /dev/null +++ b/test/syscalls/linux/socket_test_util.h @@ -0,0 +1,449 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_ + +#include <errno.h> +#include <netinet/ip.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <functional> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// Wrapper for socket(2) that returns a FileDescriptor. +inline PosixErrorOr<FileDescriptor> Socket(int family, int type, int protocol) { + int fd = socket(family, type, protocol); + MaybeSave(); + if (fd < 0) { + return PosixError( + errno, absl::StrFormat("socket(%d, %d, %d)", family, type, protocol)); + } + return FileDescriptor(fd); +} + +// Wrapper for accept(2) that returns a FileDescriptor. +inline PosixErrorOr<FileDescriptor> Accept(int sockfd, sockaddr* addr, + socklen_t* addrlen) { + int fd = RetryEINTR(accept)(sockfd, addr, addrlen); + MaybeSave(); + if (fd < 0) { + return PosixError( + errno, absl::StrFormat("accept(%d, %p, %p)", sockfd, addr, addrlen)); + } + return FileDescriptor(fd); +} + +// Wrapper for accept4(2) that returns a FileDescriptor. +inline PosixErrorOr<FileDescriptor> Accept4(int sockfd, sockaddr* addr, + socklen_t* addrlen, int flags) { + int fd = RetryEINTR(accept4)(sockfd, addr, addrlen, flags); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, absl::StrFormat("accept4(%d, %p, %p, %#x)", sockfd, + addr, addrlen, flags)); + } + return FileDescriptor(fd); +} + +inline ssize_t SendFd(int fd, void* buf, size_t count, int flags) { + return internal::ApplyFileIoSyscall( + [&](size_t completed) { + return sendto(fd, static_cast<char*>(buf) + completed, + count - completed, flags, nullptr, 0); + }, + count); +} + +// A Creator<T> is a function that attempts to create and return a new T. (This +// is copy/pasted from cloud/gvisor/api/sandbox_util.h and is just duplicated +// here for clarity.) +template <typename T> +using Creator = std::function<PosixErrorOr<std::unique_ptr<T>>()>; + +// A SocketPair represents a pair of socket file descriptors owned by the +// SocketPair. +class SocketPair { + public: + virtual ~SocketPair() = default; + + virtual int first_fd() const = 0; + virtual int second_fd() const = 0; + virtual int release_first_fd() = 0; + virtual int release_second_fd() = 0; + virtual const struct sockaddr* first_addr() const = 0; + virtual const struct sockaddr* second_addr() const = 0; + virtual size_t first_addr_size() const = 0; + virtual size_t second_addr_size() const = 0; + virtual size_t first_addr_len() const = 0; + virtual size_t second_addr_len() const = 0; +}; + +// A FDSocketPair is a SocketPair that consists of only a pair of file +// descriptors. +class FDSocketPair : public SocketPair { + public: + FDSocketPair(int first_fd, int second_fd) + : first_(first_fd), second_(second_fd) {} + + int first_fd() const override { return first_.get(); } + int second_fd() const override { return second_.get(); } + int release_first_fd() override { return first_.release(); } + int release_second_fd() override { return second_.release(); } + const struct sockaddr* first_addr() const override { return nullptr; } + const struct sockaddr* second_addr() const override { return nullptr; } + size_t first_addr_size() const override { return 0; } + size_t second_addr_size() const override { return 0; } + size_t first_addr_len() const override { return 0; } + size_t second_addr_len() const override { return 0; } + + private: + FileDescriptor first_; + FileDescriptor second_; +}; + +// CalculateUnixSockAddrLen calculates the length returned by recvfrom(2) and +// recvmsg(2) for Unix sockets. +size_t CalculateUnixSockAddrLen(const char* sun_path); + +// A AddrFDSocketPair is a SocketPair that consists of a pair of file +// descriptors in addition to a pair of socket addresses. +class AddrFDSocketPair : public SocketPair { + public: + AddrFDSocketPair(int first_fd, int second_fd, + const struct sockaddr_un& first_address, + const struct sockaddr_un& second_address) + : first_(first_fd), + second_(second_fd), + first_addr_(to_storage(first_address)), + second_addr_(to_storage(second_address)), + first_len_(CalculateUnixSockAddrLen(first_address.sun_path)), + second_len_(CalculateUnixSockAddrLen(second_address.sun_path)), + first_size_(sizeof(first_address)), + second_size_(sizeof(second_address)) {} + + AddrFDSocketPair(int first_fd, int second_fd, + const struct sockaddr_in& first_address, + const struct sockaddr_in& second_address) + : first_(first_fd), + second_(second_fd), + first_addr_(to_storage(first_address)), + second_addr_(to_storage(second_address)), + first_len_(sizeof(first_address)), + second_len_(sizeof(second_address)), + first_size_(sizeof(first_address)), + second_size_(sizeof(second_address)) {} + + AddrFDSocketPair(int first_fd, int second_fd, + const struct sockaddr_in6& first_address, + const struct sockaddr_in6& second_address) + : first_(first_fd), + second_(second_fd), + first_addr_(to_storage(first_address)), + second_addr_(to_storage(second_address)), + first_len_(sizeof(first_address)), + second_len_(sizeof(second_address)), + first_size_(sizeof(first_address)), + second_size_(sizeof(second_address)) {} + + int first_fd() const override { return first_.get(); } + int second_fd() const override { return second_.get(); } + int release_first_fd() override { return first_.release(); } + int release_second_fd() override { return second_.release(); } + const struct sockaddr* first_addr() const override { + return reinterpret_cast<const struct sockaddr*>(&first_addr_); + } + const struct sockaddr* second_addr() const override { + return reinterpret_cast<const struct sockaddr*>(&second_addr_); + } + size_t first_addr_size() const override { return first_size_; } + size_t second_addr_size() const override { return second_size_; } + size_t first_addr_len() const override { return first_len_; } + size_t second_addr_len() const override { return second_len_; } + + private: + // to_storage coverts a sockaddr_* to a sockaddr_storage. + static struct sockaddr_storage to_storage(const sockaddr_un& addr); + static struct sockaddr_storage to_storage(const sockaddr_in& addr); + static struct sockaddr_storage to_storage(const sockaddr_in6& addr); + + FileDescriptor first_; + FileDescriptor second_; + const struct sockaddr_storage first_addr_; + const struct sockaddr_storage second_addr_; + const size_t first_len_; + const size_t second_len_; + const size_t first_size_; + const size_t second_size_; +}; + +// SyscallSocketPairCreator returns a Creator<SocketPair> that obtains file +// descriptors by invoking the socketpair() syscall. +Creator<SocketPair> SyscallSocketPairCreator(int domain, int type, + int protocol); + +// SyscallSocketCreator returns a Creator<FileDescriptor> that obtains a file +// descriptor by invoking the socket() syscall. +Creator<FileDescriptor> SyscallSocketCreator(int domain, int type, + int protocol); + +// FilesystemBidirectionalBindSocketPairCreator returns a Creator<SocketPair> +// that obtains file descriptors by invoking the bind() and connect() syscalls +// on filesystem paths. Only works for DGRAM sockets. +Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain, + int type, + int protocol); + +// AbstractBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by invoking the bind() and connect() syscalls on +// abstract namespace paths. Only works for DGRAM sockets. +Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain, + int type, + int protocol); + +// SocketpairGoferSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by connect() syscalls on two sockets with socketpair +// gofer paths. +Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type, + int protocol); + +// SocketpairGoferFileSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by open() syscalls on socketpair gofer paths. +Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags); + +// FilesystemAcceptBindSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by invoking the accept() and bind() syscalls on +// a filesystem path. Only works for STREAM and SEQPACKET sockets. +Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type, + int protocol); + +// AbstractAcceptBindSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by invoking the accept() and bind() syscalls on a +// abstract namespace path. Only works for STREAM and SEQPACKET sockets. +Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type, + int protocol); + +// FilesystemUnboundSocketPairCreator returns a Creator<SocketPair> that obtains +// file descriptors by invoking the socket() syscall and generates a filesystem +// path for binding. +Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type, + int protocol); + +// AbstractUnboundSocketPairCreator returns a Creator<SocketPair> that obtains +// file descriptors by invoking the socket() syscall and generates an abstract +// path for binding. +Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type, + int protocol); + +// TCPAcceptBindSocketPairCreator returns a Creator<SocketPair> that obtains +// file descriptors by invoking the accept() and bind() syscalls on TCP sockets. +Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type, + int protocol, + bool dual_stack); + +// UDPBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that +// obtains file descriptors by invoking the bind() and connect() syscalls on UDP +// sockets. +Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type, + int protocol, + bool dual_stack); + +// A SocketPairKind couples a human-readable description of a socket pair with +// a function that creates such a socket pair. +struct SocketPairKind { + std::string description; + Creator<SocketPair> creator; + + // Create creates a socket pair of this kind. + PosixErrorOr<std::unique_ptr<SocketPair>> Create() const { return creator(); } +}; + +// A SocketKind couples a human-readable description of a socket with +// a function that creates such a socket. +struct SocketKind { + std::string description; + Creator<FileDescriptor> creator; + + // Create creates a socket pair of this kind. + PosixErrorOr<std::unique_ptr<FileDescriptor>> Create() const { + return creator(); + } +}; + +// A ReversedSocketPair wraps another SocketPair but flips the first and second +// file descriptors. ReversedSocketPair is used to test socket pairs that +// should be symmetric. +class ReversedSocketPair : public SocketPair { + public: + explicit ReversedSocketPair(std::unique_ptr<SocketPair> base) + : base_(std::move(base)) {} + + int first_fd() const override { return base_->second_fd(); } + int second_fd() const override { return base_->first_fd(); } + int release_first_fd() override { return base_->release_second_fd(); } + int release_second_fd() override { return base_->release_first_fd(); } + const struct sockaddr* first_addr() const override { + return base_->second_addr(); + } + const struct sockaddr* second_addr() const override { + return base_->first_addr(); + } + size_t first_addr_size() const override { return base_->second_addr_size(); } + size_t second_addr_size() const override { return base_->first_addr_size(); } + size_t first_addr_len() const override { return base_->second_addr_len(); } + size_t second_addr_len() const override { return base_->first_addr_len(); } + + private: + std::unique_ptr<SocketPair> base_; +}; + +// Reversed returns a SocketPairKind that represents SocketPairs created by +// flipping the file descriptors provided by another SocketPair. +SocketPairKind Reversed(SocketPairKind const& base); + +// IncludeReversals returns a vector<SocketPairKind> that returns all +// SocketPairKinds in `vec` as well as all SocketPairKinds obtained by flipping +// the file descriptors provided by the kinds in `vec`. +std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec); + +// A Middleware is a function wraps a SocketPairKind. +using Middleware = std::function<SocketPairKind(SocketPairKind)>; + +// Reversed returns a SocketPairKind that represents SocketPairs created by +// flipping the file descriptors provided by another SocketPair. +template <typename T> +Middleware SetSockOpt(int level, int optname, T* value) { + return [=](SocketPairKind const& base) { + auto const& creator = base.creator; + return SocketPairKind{ + absl::StrCat("setsockopt(", level, ", ", optname, ", ", *value, ") ", + base.description), + [creator, level, optname, + value]() -> PosixErrorOr<std::unique_ptr<SocketPair>> { + ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator()); + if (creator_value->first_fd() >= 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(setsockopt( + creator_value->first_fd(), level, optname, value, sizeof(T))); + } + if (creator_value->second_fd() >= 0) { + RETURN_ERROR_IF_SYSCALL_FAIL(setsockopt( + creator_value->second_fd(), level, optname, value, sizeof(T))); + } + return creator_value; + }}; + }; +} + +constexpr int kSockOptOn = 1; +constexpr int kSockOptOff = 0; + +// NoOp returns the same SocketPairKind that it is passed. +SocketPairKind NoOp(SocketPairKind const& base); + +// TransferTest tests that data can be send back and fourth between two +// specified FDs. Note that calls to this function should be wrapped in +// ASSERT_NO_FATAL_FAILURE(). +void TransferTest(int fd1, int fd2); + +// Fills [buf, buf+len) with random bytes. +void RandomizeBuffer(char* buf, size_t len); + +// Base test fixture for tests that operate on pairs of connected sockets. +class SocketPairTest : public ::testing::TestWithParam<SocketPairKind> { + protected: + SocketPairTest() { + // gUnit uses printf, so so will we. + printf("Testing with %s\n", GetParam().description.c_str()); + } + + PosixErrorOr<std::unique_ptr<SocketPair>> NewSocketPair() const { + return GetParam().Create(); + } +}; + +// Base test fixture for tests that operate on simple Sockets. +class SimpleSocketTest : public ::testing::TestWithParam<SocketKind> { + protected: + SimpleSocketTest() { + // gUnit uses printf, so so will we. + printf("Testing with %s\n", GetParam().description.c_str()); + } + + PosixErrorOr<std::unique_ptr<FileDescriptor>> NewSocket() const { + return GetParam().Create(); + } +}; + +SocketKind SimpleSocket(int fam, int type, int proto); + +// Send a buffer of size 'size' to sockets->first_fd(), returning the result of +// sendmsg. +// +// If reader, read from second_fd() until size bytes have been read. +ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets, + size_t size, bool reader); + +// Initializes the given buffer with random data. +void RandomizeBuffer(char* ptr, size_t len); + +enum class AddressFamily { kIpv4 = 1, kIpv6 = 2, kDualStack = 3 }; +enum class SocketType { kUdp = 1, kTcp = 2 }; + +// Returns a PosixError or a port that is available. If 0 is specified as the +// port it will bind port 0 (and allow the kernel to select any free port). +// Otherwise, it will try to bind the specified port and validate that it can be +// used for the requested family and socket type. The final option is +// reuse_addr. This specifies whether SO_REUSEADDR should be applied before a +// bind(2) attempt. SO_REUSEADDR means that sockets in TIME_WAIT states or other +// bound UDP sockets would not cause an error on bind(2). This option should be +// set if subsequent calls to bind on the returned port will also use +// SO_REUSEADDR. +// +// Note: That this test will attempt to bind the ANY address for the respective +// protocol. +PosixErrorOr<int> PortAvailable(int port, AddressFamily family, SocketType type, + bool reuse_addr); + +// FreeAvailablePort is used to return a port that was obtained by using +// the PortAvailable helper with port 0. +PosixError FreeAvailablePort(int port); + +// SendMsg converts a buffer to an iovec and adds it to msg before sending it. +PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size); + +// RecvNoData checks that no data is receivable on sock. +void RecvNoData(int sock); + +// Base test fixture for tests that apply to all kinds of pairs of connected +// sockets. +using AllSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_ diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc new file mode 100644 index 000000000..c60a965ae --- /dev/null +++ b/test/syscalls/linux/socket_unix.cc @@ -0,0 +1,1181 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_unix.h" + +#include <net/if.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <vector> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +// This file is a generic socket test file. It must be built with another file +// that provides the test types. + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(UnixSocketPairTest, BasicFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +TEST_P(UnixSocketPairTest, BasicTwoFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + int sent_fds[] = {pair1->second_fd(), pair2->second_fd()}; + + ASSERT_NO_FATAL_FAILURE( + SendFDs(sockets->first_fd(), sent_fds, 2, sent_data, sizeof(sent_data))); + + char received_data[20]; + int received_fds[] = {-1, -1}; + + ASSERT_NO_FATAL_FAILURE(RecvFDs(sockets->second_fd(), received_fds, 2, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[0], pair1->first_fd())); + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[1], pair2->first_fd())); +} + +TEST_P(UnixSocketPairTest, BasicThreeFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + auto pair3 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + int sent_fds[] = {pair1->second_fd(), pair2->second_fd(), pair3->second_fd()}; + + ASSERT_NO_FATAL_FAILURE( + SendFDs(sockets->first_fd(), sent_fds, 3, sent_data, sizeof(sent_data))); + + char received_data[20]; + int received_fds[] = {-1, -1, -1}; + + ASSERT_NO_FATAL_FAILURE(RecvFDs(sockets->second_fd(), received_fds, 3, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[0], pair1->first_fd())); + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[1], pair2->first_fd())); + ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[2], pair3->first_fd())); +} + +TEST_P(UnixSocketPairTest, BadFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + int sent_fd = -1; + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(sent_fd))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(sizeof(sent_fd)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), &sent_fd, sizeof(sent_fd)); + + struct iovec iov; + iov.iov_base = sent_data; + iov.iov_len = sizeof(sent_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EBADF)); +} + +// BasicFDPassNoSpace starts off by sending a single FD just like BasicFDPass. +// The difference is that when calling recvmsg, no space for FDs is provided, +// only space for the cmsg header. +TEST_P(UnixSocketPairTest, BasicFDPassNoSpace) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(0)); + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(msg.msg_controllen, 0); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +// BasicFDPassUnalignedRecv starts off by sending a single FD just like +// BasicFDPass. The difference is that when calling recvmsg, the length of the +// receive data is only aligned on a 4 byte boundry instead of the normal 8. +TEST_P(UnixSocketPairTest, BasicFDPassUnalignedRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFDUnaligned( + sockets->second_fd(), &fd, received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +TEST_P(UnixSocketPairTest, ConcurrentBasicFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + int sockfd1 = sockets->first_fd(); + auto recv_func = [sockfd1, sent_data]() { + char received_data[20]; + int fd = -1; + RecvSingleFD(sockfd1, &fd, received_data, sizeof(received_data)); + ASSERT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + char buf[20]; + ASSERT_THAT(ReadFd(fd, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(WriteFd(fd, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + }; + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->second_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + ScopedThread t(recv_func); + + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(WriteFd(pair->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[20]; + ASSERT_THAT(ReadFd(pair->first_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + t.Join(); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +// FDPassNoRecv checks that the control message can be safely ignored by using +// read(2) instead of recvmsg(2). +TEST_P(UnixSocketPairTest, FDPassNoRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + // Read while ignoring the passed FD. + char received_data[20]; + ASSERT_THAT( + ReadFd(sockets->second_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + // Check that the socket still works for reads and writes. + ASSERT_NO_FATAL_FAILURE( + TransferTest(sockets->first_fd(), sockets->second_fd())); +} + +// FDPassInterspersed1 checks that sent control messages cannot be read before +// their associated data has been read. +TEST_P(UnixSocketPairTest, FDPassInterspersed1) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char written_data[20]; + RandomizeBuffer(written_data, sizeof(written_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), written_data, sizeof(written_data)), + SyscallSucceedsWithValue(sizeof(written_data))); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + // Check that we don't get a control message, but do get the data. + char received_data[20]; + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)); + EXPECT_EQ(0, memcmp(written_data, received_data, sizeof(written_data))); +} + +// FDPassInterspersed2 checks that sent control messages cannot be read after +// their assocated data has been read while ignoring the control message by +// using read(2) instead of recvmsg(2). +TEST_P(UnixSocketPairTest, FDPassInterspersed2) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char written_data[20]; + RandomizeBuffer(written_data, sizeof(written_data)); + ASSERT_THAT(WriteFd(sockets->first_fd(), written_data, sizeof(written_data)), + SyscallSucceedsWithValue(sizeof(written_data))); + + char received_data[20]; + ASSERT_THAT( + ReadFd(sockets->second_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + EXPECT_EQ(0, memcmp(written_data, received_data, sizeof(written_data))); +} + +TEST_P(UnixSocketPairTest, FDPassNotCoalesced) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(), + sent_data1, sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(), + sent_data2, sizeof(sent_data2))); + + char received_data1[sizeof(sent_data1) + sizeof(sent_data2)]; + int received_fd1 = -1; + + RecvSingleFD(sockets->second_fd(), &received_fd1, received_data1, + sizeof(received_data1), sizeof(sent_data1)); + + EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1))); + TransferTest(pair1->first_fd(), pair1->second_fd()); + + char received_data2[sizeof(sent_data1) + sizeof(sent_data2)]; + int received_fd2 = -1; + + RecvSingleFD(sockets->second_fd(), &received_fd2, received_data2, + sizeof(received_data2), sizeof(sent_data2)); + + EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2))); + TransferTest(pair2->first_fd(), pair2->second_fd()); +} + +TEST_P(UnixSocketPairTest, FDPassPeek) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char peek_data[20]; + int peek_fd = -1; + PeekSingleFD(sockets->second_fd(), &peek_fd, peek_data, sizeof(peek_data)); + EXPECT_EQ(0, memcmp(sent_data, peek_data, sizeof(sent_data))); + TransferTest(peek_fd, pair->first_fd()); + EXPECT_THAT(close(peek_fd), SyscallSucceeds()); + + char received_data[20]; + int received_fd = -1; + RecvSingleFD(sockets->second_fd(), &received_fd, received_data, + sizeof(received_data)); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + TransferTest(received_fd, pair->first_fd()); + EXPECT_THAT(close(received_fd), SyscallSucceeds()); +} + +TEST_P(UnixSocketPairTest, BasicCredPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + EXPECT_EQ(sent_creds.pid, received_creds.pid); + EXPECT_EQ(sent_creds.uid, received_creds.uid); + EXPECT_EQ(sent_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairTest, SendNullCredsBeforeSoPassCredRecvEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairTest, SendNullCredsAfterSoPassCredRecvEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + SetSoPassCred(sockets->second_fd()); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + char received_data[20]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairTest, SendNullCredsBeforeSoPassCredSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->first_fd()); + + char received_data[20]; + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairTest, SendNullCredsAfterSoPassCredSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + SetSoPassCred(sockets->first_fd()); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + char received_data[20]; + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairTest, SendNullCredsBeforeSoPassCredRecvEndAfterSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + SetSoPassCred(sockets->first_fd()); + + ASSERT_NO_FATAL_FAILURE( + SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairTest, WriteBeforeSoPassCredRecvEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairTest, WriteAfterSoPassCredRecvEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[20]; + + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairTest, WriteBeforeSoPassCredSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + SetSoPassCred(sockets->first_fd()); + + char received_data[20]; + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairTest, WriteAfterSoPassCredSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->first_fd()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[20]; + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairTest, WriteBeforeSoPassCredRecvEndAfterSendEnd) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + SetSoPassCred(sockets->first_fd()); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixSocketPairTest, SoPassCred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int opt; + socklen_t optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); + + SetSoPassCred(sockets->first_fd()); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_TRUE(opt); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); + + int zero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &zero, + sizeof(zero)), + SyscallSucceeds()); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); + + optLen = sizeof(opt); + EXPECT_THAT( + getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen), + SyscallSucceeds()); + EXPECT_FALSE(opt); +} + +TEST_P(UnixSocketPairTest, NoDataCredPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct msghdr msg = {}; + + struct iovec iov; + iov.iov_base = sent_data; + iov.iov_len = sizeof(sent_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + char control[CMSG_SPACE(0)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(0); + + ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(UnixSocketPairTest, NoPassCred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + char received_data[20]; + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(UnixSocketPairTest, CredAndFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendCredsAndFD(sockets->first_fd(), sent_creds, + pair->second_fd(), sent_data, + sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds, + &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + EXPECT_EQ(sent_creds.pid, received_creds.pid); + EXPECT_EQ(sent_creds.uid, received_creds.uid); + EXPECT_EQ(sent_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +TEST_P(UnixSocketPairTest, FDPassBeforeSoPassCred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[20]; + struct ucred received_creds; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds, + &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +TEST_P(UnixSocketPairTest, FDPassAfterSoPassCred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + SetSoPassCred(sockets->second_fd()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + struct ucred received_creds; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds, + &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +TEST_P(UnixSocketPairTest, CloexecDroppedWhenFDPassed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = ASSERT_NO_ERRNO_AND_VALUE( + UnixDomainSocketPair(SOCK_SEQPACKET | SOCK_CLOEXEC).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[20]; + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data))); + + EXPECT_THAT(fcntl(fd, F_GETFD), SyscallSucceedsWithValue(0)); +} + +TEST_P(UnixSocketPairTest, CloexecRecvFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + char received_data[20]; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CMSG_CLOEXEC), + SyscallSucceedsWithValue(sizeof(received_data))); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS); + + int fd = -1; + memcpy(&fd, CMSG_DATA(cmsg), sizeof(int)); + + EXPECT_THAT(fcntl(fd, F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); +} + +TEST_P(UnixSocketPairTest, FDPassAfterSoPassCredWithoutCredSpace) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + SetSoPassCred(sockets->second_fd()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + char control[CMSG_LEN(0)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[20]; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + EXPECT_EQ(msg.msg_controllen, sizeof(control)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, sizeof(control)); + EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET); + EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); +} + +// This test will validate that MSG_CTRUNC as an input flag to recvmsg will +// not appear as an output flag on the control message when truncation doesn't +// happen. +TEST_P(UnixSocketPairTest, MsgCtruncInputIsNoop) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int)) /* we're passing a single fd */]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + char received_data[20]; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CTRUNC), + SyscallSucceedsWithValue(sizeof(received_data))); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS); + + // Now we should verify that MSG_CTRUNC wasn't set as an output flag. + EXPECT_EQ(msg.msg_flags & MSG_CTRUNC, 0); +} + +TEST_P(UnixSocketPairTest, FDPassAfterSoPassCredWithoutCredHeaderSpace) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + SetSoPassCred(sockets->second_fd()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + struct msghdr msg = {}; + char control[CMSG_LEN(0) / 2]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + char received_data[20]; + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + EXPECT_EQ(msg.msg_controllen, 0); +} + +TEST_P(UnixSocketPairTest, InvalidGetSockOpt) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + int opt; + socklen_t optlen = sizeof(opt); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, -1, &opt, &optlen), + SyscallFailsWithErrno(ENOPROTOOPT)); +} + +TEST_P(UnixSocketPairTest, BindToBadName) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + constexpr char kBadName[] = "/some/path/that/does/not/exist"; + sockaddr_un sockaddr; + sockaddr.sun_family = AF_LOCAL; + memcpy(sockaddr.sun_path, kBadName, sizeof(kBadName)); + + EXPECT_THAT( + bind(pair->first_fd(), reinterpret_cast<struct sockaddr*>(&sockaddr), + sizeof(sockaddr)), + SyscallFailsWithErrno(ENOENT)); +} + +TEST_P(UnixSocketPairTest, RecvmmsgTimeoutAfterRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + char received_data[sizeof(sent_data) * 2]; + std::vector<struct mmsghdr> msgs(2); + std::vector<struct iovec> iovs(msgs.size()); + const int chunk_size = sizeof(received_data) / msgs.size(); + for (size_t i = 0; i < msgs.size(); i++) { + iovs[i].iov_len = chunk_size; + iovs[i].iov_base = &received_data[i * chunk_size]; + msgs[i].msg_hdr.msg_iov = &iovs[i]; + msgs[i].msg_hdr.msg_iovlen = 1; + } + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + + struct timespec timeout = {0, 1}; + ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->second_fd(), &msgs[0], msgs.size(), + 0, &timeout), + SyscallSucceedsWithValue(1)); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + EXPECT_EQ(chunk_size, msgs[0].msg_len); +} + +TEST_P(UnixSocketPairTest, TIOCINQSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int size = -1; + EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, 0); + + const char some_data[] = "dangerzone"; + ASSERT_THAT( + RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0), + SyscallSucceeds()); + EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, sizeof(some_data)); + + // Linux only reports the first message's size, which is wrong. We test for + // the behavior described in the man page. + SKIP_IF(!IsRunningOnGvisor()); + + ASSERT_THAT( + RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0), + SyscallSucceeds()); + EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, sizeof(some_data) * 2); +} + +TEST_P(UnixSocketPairTest, TIOCOUTQSucceeds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int size = -1; + EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, 0); + + // Linux reports bogus numbers which are related to its internal allocations. + // We test for the behavior described in the man page. + SKIP_IF(!IsRunningOnGvisor()); + + const char some_data[] = "dangerzone"; + ASSERT_THAT( + RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0), + SyscallSucceeds()); + EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, sizeof(some_data)); + + ASSERT_THAT( + RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0), + SyscallSucceeds()); + EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds()); + EXPECT_EQ(size, sizeof(some_data) * 2); +} + +TEST_P(UnixSocketPairTest, NetdeviceIoctlsSucceed) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_DGRAM, 0)); + + // Prepare the request. + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + + // Check that the ioctl either succeeds or fails with ENODEV. + int err = ioctl(sock.get(), SIOCGIFINDEX, &ifr); + if (err < 0) { + ASSERT_EQ(errno, ENODEV); + } +} + +TEST_P(UnixSocketPairTest, SocketShutdown) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[20]; + const std::string data = "abc"; + ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), 3), + SyscallSucceedsWithValue(3)); + ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RDWR), SyscallSucceeds()); + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RDWR), SyscallSucceeds()); + + // Shutting down a socket does not clear the buffer. + ASSERT_THAT(ReadFd(sockets->second_fd(), buf, 3), + SyscallSucceedsWithValue(3)); + EXPECT_EQ(data, absl::string_view(buf, 3)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix.h b/test/syscalls/linux/socket_unix.h new file mode 100644 index 000000000..d2a16afb2 --- /dev/null +++ b/test/syscalls/linux/socket_unix.h @@ -0,0 +1,29 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected unix sockets. +using UnixSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_ diff --git a/test/syscalls/linux/socket_unix_abstract.cc b/test/syscalls/linux/socket_unix_abstract.cc new file mode 100644 index 000000000..0878f63ff --- /dev/null +++ b/test/syscalls/linux/socket_unix_abstract.cc @@ -0,0 +1,38 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_abstract_nonblock.cc b/test/syscalls/linux/socket_unix_abstract_nonblock.cc new file mode 100644 index 000000000..93fb33832 --- /dev/null +++ b/test/syscalls/linux/socket_unix_abstract_nonblock.cc @@ -0,0 +1,38 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_dgram.cc b/test/syscalls/linux/socket_unix_dgram.cc new file mode 100644 index 000000000..c17d3990f --- /dev/null +++ b/test/syscalls/linux/socket_unix_dgram.cc @@ -0,0 +1,45 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_unix_dgram.h" + +#include <stdio.h> +#include <sys/un.h> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(DgramUnixSocketPairTest, WriteOneSideClosed) { + // FIXME: gVisor datagram sockets return EPIPE instead of + // ECONNREFUSED. + SKIP_IF(IsRunningOnGvisor()); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->second_fd(), kStr, 3), + SyscallFailsWithErrno(ECONNREFUSED)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_dgram.h b/test/syscalls/linux/socket_unix_dgram.h new file mode 100644 index 000000000..722a3d8e6 --- /dev/null +++ b/test/syscalls/linux/socket_unix_dgram.h @@ -0,0 +1,29 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected dgram unix sockets. +using DgramUnixSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_ diff --git a/test/syscalls/linux/socket_unix_dgram_local.cc b/test/syscalls/linux/socket_unix_dgram_local.cc new file mode 100644 index 000000000..b2fa72b5e --- /dev/null +++ b/test/syscalls/linux/socket_unix_dgram_local.cc @@ -0,0 +1,59 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_stream.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix_dgram.h" +#include "test/syscalls/linux/socket_unix_non_stream.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, DgramUnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnixNonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc new file mode 100644 index 000000000..9152c229c --- /dev/null +++ b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc @@ -0,0 +1,68 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of connected non-blocking dgram +// unix sockets. +using NonBlockingDgramUnixSocketPairTest = SocketPairTest; + +TEST_P(NonBlockingDgramUnixSocketPairTest, ReadOneSideClosed) { + if (IsRunningOnGvisor()) { + // FIXME: gVisor datagram sockets return 0 instead of + // EAGAIN. + return; + } + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + char data[10] = {}; + ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)), + SyscallFailsWithErrno(EAGAIN)); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonBlockingDgramUnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_domain.cc b/test/syscalls/linux/socket_unix_domain.cc new file mode 100644 index 000000000..f8f0d01eb --- /dev/null +++ b/test/syscalls/linux/socket_unix_domain.cc @@ -0,0 +1,38 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_generic.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, AllSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_filesystem.cc b/test/syscalls/linux/socket_unix_filesystem.cc new file mode 100644 index 000000000..be873edcb --- /dev/null +++ b/test/syscalls/linux/socket_unix_filesystem.cc @@ -0,0 +1,38 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_filesystem_nonblock.cc b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc new file mode 100644 index 000000000..63e85ac11 --- /dev/null +++ b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc @@ -0,0 +1,38 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_non_stream.cc b/test/syscalls/linux/socket_unix_non_stream.cc new file mode 100644 index 000000000..620397746 --- /dev/null +++ b/test/syscalls/linux/socket_unix_non_stream.cc @@ -0,0 +1,229 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_unix_non_stream.h" + +#include <stdio.h> +#include <sys/mman.h> +#include <sys/un.h> + +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/memory_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(UnixNonStreamSocketPairTest, RecvMsgTooLarge) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int rcvbuf; + socklen_t length = sizeof(rcvbuf); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &rcvbuf, &length), + SyscallSucceeds()); + + // Make the call larger than the receive buffer. + const int recv_size = 3 * rcvbuf; + + // Write a message that does fit in the receive buffer. + const int write_size = rcvbuf - kPageSize; + + std::vector<char> write_buf(write_size, 'a'); + const int ret = RetryEINTR(write)(sockets->second_fd(), write_buf.data(), + write_buf.size()); + if (ret < 0 && errno == ENOBUFS) { + // NOTE: Linux may stall the write for a long time and + // ultimately return ENOBUFS. Allow this error, since a retry will likely + // result in the same error. + return; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + std::vector<char> recv_buf(recv_size); + + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->first_fd(), recv_buf.data(), + recv_buf.size(), write_size)); + + recv_buf.resize(write_size); + EXPECT_EQ(recv_buf, write_buf); +} + +// Create a region of anonymous memory of size 'size', which is fragmented in +// FileMem. +// +// ptr contains the start address of the region. The returned vector contains +// all of the mappings to be unmapped when done. +PosixErrorOr<std::vector<Mapping>> CreateFragmentedRegion(const int size, + void** ptr) { + Mapping region; + ASSIGN_OR_RETURN_ERRNO(region, Mmap(nullptr, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); + + *ptr = region.ptr(); + + // Don't save hundreds of times for all of these mmaps. + DisableSave ds; + + std::vector<Mapping> pages; + + // Map and commit a single page at a time, mapping and committing an unrelated + // page between each call to force FileMem fragmentation. + for (uintptr_t addr = region.addr(); addr < region.endaddr(); + addr += kPageSize) { + Mapping page; + ASSIGN_OR_RETURN_ERRNO( + page, + Mmap(reinterpret_cast<void*>(addr), kPageSize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0)); + *reinterpret_cast<volatile char*>(page.ptr()) = 42; + + pages.emplace_back(std::move(page)); + + // Unrelated page elsewhere. + ASSIGN_OR_RETURN_ERRNO(page, + Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); + *reinterpret_cast<volatile char*>(page.ptr()) = 42; + + pages.emplace_back(std::move(page)); + } + + // The mappings above have taken ownership of the region. + region.release(); + + return pages; +} + +// A contiguous iov that is heavily fragmented in FileMem can still be sent +// successfully. +TEST_P(UnixNonStreamSocketPairTest, FragmentedSendMsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + const int buffer_size = UIO_MAXIOV * kPageSize; + // Extra page for message header overhead. + const int sndbuf = buffer_size + kPageSize; + // N.B. setsockopt(SO_SNDBUF) doubles the passed value. + const int set_sndbuf = sndbuf / 2; + + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &set_sndbuf, sizeof(set_sndbuf)), + SyscallSucceeds()); + + int actual_sndbuf = 0; + socklen_t length = sizeof(actual_sndbuf); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &actual_sndbuf, &length), + SyscallSucceeds()); + + if (actual_sndbuf != sndbuf) { + // Unable to get the sndbuf we want. + // + // N.B. At minimum, the socketpair gofer should provide a socket that is + // already the correct size. + // + // TODO: When internal UDS support SO_SNDBUF, we can assert that + // we always get the right SO_SNDBUF on gVisor. + LOG(INFO) << "SO_SNDBUF = " << actual_sndbuf << ", want " << sndbuf + << ". Skipping test"; + return; + } + + // Create a contiguous region of memory of 2*UIO_MAXIOV*PAGE_SIZE. We'll call + // sendmsg with a single iov, but the goal is to get the sentry to split this + // into > UIO_MAXIOV iovs when calling the kernel. + void* ptr; + std::vector<Mapping> pages = + ASSERT_NO_ERRNO_AND_VALUE(CreateFragmentedRegion(buffer_size, &ptr)); + + struct iovec iov = {}; + iov.iov_base = ptr; + iov.iov_len = buffer_size; + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + // NOTE: Linux has poor behavior in the presence of + // physical memory fragmentation. As a result, this may stall for a long time + // and ultimately return ENOBUFS. Allow this error, since it means that we + // made it to the host kernel and started the sendmsg. + EXPECT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0), + AnyOf(SyscallSucceedsWithValue(buffer_size), + SyscallFailsWithErrno(ENOBUFS))); +} + +// A contiguous iov that is heavily fragmented in FileMem can still be received +// into successfully. +TEST_P(UnixNonStreamSocketPairTest, FragmentedRecvMsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + const int buffer_size = UIO_MAXIOV * kPageSize; + // Extra page for message header overhead. + const int sndbuf = buffer_size + kPageSize; + // N.B. setsockopt(SO_SNDBUF) doubles the passed value. + const int set_sndbuf = sndbuf / 2; + + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &set_sndbuf, sizeof(set_sndbuf)), + SyscallSucceeds()); + + int actual_sndbuf = 0; + socklen_t length = sizeof(actual_sndbuf); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &actual_sndbuf, &length), + SyscallSucceeds()); + + if (actual_sndbuf != sndbuf) { + // Unable to get the sndbuf we want. + // + // N.B. At minimum, the socketpair gofer should provide a socket that is + // already the correct size. + // + // TODO: When internal UDS support SO_SNDBUF, we can assert that + // we always get the right SO_SNDBUF on gVisor. + LOG(INFO) << "SO_SNDBUF = " << actual_sndbuf << ", want " << sndbuf + << ". Skipping test"; + return; + } + + std::vector<char> write_buf(buffer_size, 'a'); + const int ret = RetryEINTR(write)(sockets->first_fd(), write_buf.data(), + write_buf.size()); + if (ret < 0 && errno == ENOBUFS) { + // NOTE: Linux may stall the write for a long time and + // ultimately return ENOBUFS. Allow this error, since a retry will likely + // result in the same error. + return; + } + ASSERT_THAT(ret, SyscallSucceeds()); + + // Create a contiguous region of memory of 2*UIO_MAXIOV*PAGE_SIZE. We'll call + // sendmsg with a single iov, but the goal is to get the sentry to split this + // into > UIO_MAXIOV iovs when calling the kernel. + void* ptr; + std::vector<Mapping> pages = + ASSERT_NO_ERRNO_AND_VALUE(CreateFragmentedRegion(buffer_size, &ptr)); + + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg( + sockets->second_fd(), reinterpret_cast<char*>(ptr), buffer_size)); + + EXPECT_EQ(0, memcmp(write_buf.data(), ptr, buffer_size)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_non_stream.h b/test/syscalls/linux/socket_unix_non_stream.h new file mode 100644 index 000000000..e4214d949 --- /dev/null +++ b/test/syscalls/linux/socket_unix_non_stream.h @@ -0,0 +1,30 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected non-stream +// unix-domain sockets. +using UnixNonStreamSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_ diff --git a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc new file mode 100644 index 000000000..c5d525dde --- /dev/null +++ b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc @@ -0,0 +1,47 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_non_stream_blocking.h" + +#include <vector> + +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, BlockingNonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_pair.cc b/test/syscalls/linux/socket_unix_pair.cc new file mode 100644 index 000000000..85dd3711b --- /dev/null +++ b/test/syscalls/linux/socket_unix_pair.cc @@ -0,0 +1,38 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_pair_nonblock.cc b/test/syscalls/linux/socket_unix_pair_nonblock.cc new file mode 100644 index 000000000..6a40fe68c --- /dev/null +++ b/test/syscalls/linux/socket_unix_pair_nonblock.cc @@ -0,0 +1,38 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_blocking.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET}, + List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonBlockingSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_seqpacket.cc b/test/syscalls/linux/socket_unix_seqpacket.cc new file mode 100644 index 000000000..ad0af77e9 --- /dev/null +++ b/test/syscalls/linux/socket_unix_seqpacket.cc @@ -0,0 +1,49 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_unix_seqpacket.h" + +#include <stdio.h> +#include <sys/un.h> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST_P(SeqpacketUnixSocketPairTest, WriteOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->second_fd(), kStr, 3), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(SeqpacketUnixSocketPairTest, ReadOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + char data[10] = {}; + ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)), + SyscallSucceedsWithValue(0)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_seqpacket.h b/test/syscalls/linux/socket_unix_seqpacket.h new file mode 100644 index 000000000..da8eb2b2b --- /dev/null +++ b/test/syscalls/linux/socket_unix_seqpacket.h @@ -0,0 +1,30 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_ + +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected seqpacket unix +// sockets. +using SeqpacketUnixSocketPairTest = SocketPairTest; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_ diff --git a/test/syscalls/linux/socket_unix_seqpacket_local.cc b/test/syscalls/linux/socket_unix_seqpacket_local.cc new file mode 100644 index 000000000..f9139a754 --- /dev/null +++ b/test/syscalls/linux/socket_unix_seqpacket_local.cc @@ -0,0 +1,59 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_non_stream.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/socket_unix_non_stream.h" +#include "test/syscalls/linux/socket_unix_seqpacket.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, SeqpacketUnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnixNonStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream.cc b/test/syscalls/linux/socket_unix_stream.cc new file mode 100644 index 000000000..8232c9e35 --- /dev/null +++ b/test/syscalls/linux/socket_unix_stream.cc @@ -0,0 +1,69 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of connected stream unix sockets. +using StreamUnixSocketPairTest = SocketPairTest; + +TEST_P(StreamUnixSocketPairTest, WriteOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->second_fd(), kStr, 3), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(StreamUnixSocketPairTest, ReadOneSideClosed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + char data[10] = {}; + ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)), + SyscallSucceedsWithValue(0)); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, StreamUnixSocketPairTest, + ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_stream_blocking_local.cc new file mode 100644 index 000000000..1cdeadd27 --- /dev/null +++ b/test/syscalls/linux/socket_unix_stream_blocking_local.cc @@ -0,0 +1,47 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_stream_blocking.h" + +#include <vector> + +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, BlockingStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream_local.cc b/test/syscalls/linux/socket_unix_stream_local.cc new file mode 100644 index 000000000..9f11e2d49 --- /dev/null +++ b/test/syscalls/linux/socket_unix_stream_local.cc @@ -0,0 +1,49 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <vector> + +#include "test/syscalls/linux/socket_stream.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, StreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc new file mode 100644 index 000000000..4c3d3a187 --- /dev/null +++ b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc @@ -0,0 +1,49 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "test/syscalls/linux/socket_stream_nonblock.h" + +#include <vector> + +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::vector<SocketPairKind> GetSocketPairs() { + return VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, NonBlockingStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_abstract.cc b/test/syscalls/linux/socket_unix_unbound_abstract.cc new file mode 100644 index 000000000..a35b3b9bd --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_abstract.cc @@ -0,0 +1,116 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of unbound abstract unix sockets. +using UnboundAbstractUnixSocketPairTest = SocketPairTest; + +TEST_P(UnboundAbstractUnixSocketPairTest, AddressAfterNull) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = + *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr()); + ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0); + SKIP_IF(addr.sun_path[sizeof(addr.sun_path) - 2] != 0 || + addr.sun_path[sizeof(addr.sun_path) - 3] != 0); + + addr.sun_path[sizeof(addr.sun_path) - 2] = 'a'; + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), + reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallSucceeds()); +} + +TEST_P(UnboundAbstractUnixSocketPairTest, ShortAddressNotExtended) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = + *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr()); + ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size() - 1), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(UnboundAbstractUnixSocketPairTest, BindNothing) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + struct sockaddr_un addr = {.sun_family = AF_UNIX}; + ASSERT_THAT(bind(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallSucceeds()); +} + +TEST_P(UnboundAbstractUnixSocketPairTest, GetSockNameFullLength) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + sockaddr_storage addr = {}; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT(getsockname(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, sockets->first_addr_size()); +} + +TEST_P(UnboundAbstractUnixSocketPairTest, GetSockNamePartialLength) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size() - 1), + SyscallSucceeds()); + + sockaddr_storage addr = {}; + socklen_t addr_len = sizeof(addr); + ASSERT_THAT(getsockname(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, sockets->first_addr_size() - 1); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnboundAbstractUnixSocketPairTest, + ::testing::ValuesIn(ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations( + List<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK}, List<int>{0, SOCK_CLOEXEC})))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_dgram.cc b/test/syscalls/linux/socket_unix_unbound_dgram.cc new file mode 100644 index 000000000..a01b7f644 --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_dgram.cc @@ -0,0 +1,162 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of unbound dgram unix sockets. +using UnboundDgramUnixSocketPairTest = SocketPairTest; + +TEST_P(UnboundDgramUnixSocketPairTest, BindConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(UnboundDgramUnixSocketPairTest, SelfConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(UnboundDgramUnixSocketPairTest, DoubleConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + +TEST_P(UnboundDgramUnixSocketPairTest, GetRemoteAddress) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + socklen_t addressLength = sockets->first_addr_size(); + struct sockaddr_storage address = {}; + ASSERT_THAT(getpeername(sockets->second_fd(), (struct sockaddr*)(&address), + &addressLength), + SyscallSucceeds()); + EXPECT_EQ( + 0, memcmp(&address, sockets->first_addr(), sockets->first_addr_size())); +} + +TEST_P(UnboundDgramUnixSocketPairTest, Sendto) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(sendto(sockets->second_fd(), sent_data, sizeof(sent_data), 0, + sockets->first_addr(), sockets->first_addr_size()), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[sizeof(sent_data)]; + ASSERT_THAT(ReadFd(sockets->first_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); +} + +TEST_P(UnboundDgramUnixSocketPairTest, ZeroWriteAllowed) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char sent_data[3]; + // Send a zero length packet. + ASSERT_THAT(write(sockets->second_fd(), sent_data, 0), + SyscallSucceedsWithValue(0)); + // Receive the packet. + char received_data[sizeof(sent_data)]; + ASSERT_THAT(read(sockets->first_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UnboundDgramUnixSocketPairTest, Listen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(listen(sockets->first_fd(), 0), SyscallFailsWithErrno(ENOTSUP)); +} + +TEST_P(UnboundDgramUnixSocketPairTest, Accept) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr), + SyscallFailsWithErrno(ENOTSUP)); +} + +TEST_P(UnboundDgramUnixSocketPairTest, SendtoWithoutConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char data = 'a'; + ASSERT_THAT( + RetryEINTR(sendto)(sockets->second_fd(), &data, sizeof(data), 0, + sockets->first_addr(), sockets->first_addr_size()), + SyscallSucceedsWithValue(sizeof(data))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnboundDgramUnixSocketPairTest, + ::testing::ValuesIn(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC}))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_filesystem.cc b/test/syscalls/linux/socket_unix_unbound_filesystem.cc new file mode 100644 index 000000000..56d882643 --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_filesystem.cc @@ -0,0 +1,84 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of unbound filesystem unix +// sockets. +using UnboundFilesystemUnixSocketPairTest = SocketPairTest; + +TEST_P(UnboundFilesystemUnixSocketPairTest, AddressAfterNull) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = + *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr()); + ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0); + SKIP_IF(addr.sun_path[sizeof(addr.sun_path) - 2] != 0 || + addr.sun_path[sizeof(addr.sun_path) - 3] != 0); + + addr.sun_path[sizeof(addr.sun_path) - 2] = 'a'; + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(bind(sockets->second_fd(), + reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(UnboundFilesystemUnixSocketPairTest, GetSockNameLength) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + sockaddr_storage got_addr = {}; + socklen_t got_addr_len = sizeof(got_addr); + ASSERT_THAT( + getsockname(sockets->first_fd(), + reinterpret_cast<struct sockaddr*>(&got_addr), &got_addr_len), + SyscallSucceeds()); + + sockaddr_un want_addr = + *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr()); + + EXPECT_EQ(got_addr_len, + strlen(want_addr.sun_path) + 1 + sizeof(want_addr.sun_family)); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnboundFilesystemUnixSocketPairTest, + ::testing::ValuesIn(ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations( + List<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM}, + List<int>{0, SOCK_NONBLOCK}, List<int>{0, SOCK_CLOEXEC})))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc new file mode 100644 index 000000000..fa3b99490 --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc @@ -0,0 +1,91 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of unbound seqpacket unix sockets. +using UnboundUnixSeqpacketSocketPairTest = SocketPairTest; + +TEST_P(UnboundUnixSeqpacketSocketPairTest, SendtoWithoutConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char data = 'a'; + ASSERT_THAT(sendto(sockets->second_fd(), &data, sizeof(data), 0, + sockets->first_addr(), sockets->first_addr_size()), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UnboundUnixSeqpacketSocketPairTest, SendtoWithoutConnectIgnoresAddr) { + // FIXME: gVisor tries to find /foo/bar and thus returns ENOENT. + if (IsRunningOnGvisor()) { + return; + } + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // Even a bogus address is completely ignored. + constexpr char kPath[] = "/foo/bar"; + + // Sanity check that kPath doesn't exist. + struct stat s; + ASSERT_THAT(stat(kPath, &s), SyscallFailsWithErrno(ENOENT)); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + char data = 'a'; + ASSERT_THAT( + sendto(sockets->second_fd(), &data, sizeof(data), 0, + reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(ENOTCONN)); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnboundUnixSeqpacketSocketPairTest, + ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_SEQPACKET}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_unbound_stream.cc b/test/syscalls/linux/socket_unix_unbound_stream.cc new file mode 100644 index 000000000..99636b221 --- /dev/null +++ b/test/syscalls/linux/socket_unix_unbound_stream.cc @@ -0,0 +1,738 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdio.h> +#include <sys/un.h> +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Test fixture for tests that apply to pairs of connected unix stream sockets. +using UnixStreamSocketPairTest = SocketPairTest; + +// FDPassPartialRead checks that sent control messages cannot be read after +// any of their assocated data has been read while ignoring the control message +// by using read(2) instead of recvmsg(2). +TEST_P(UnixStreamSocketPairTest, FDPassPartialRead) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data))); + + char received_data[sizeof(sent_data) / 2]; + ASSERT_THAT( + ReadFd(sockets->second_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data))); + + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)); + EXPECT_EQ(0, memcmp(sent_data + sizeof(received_data), received_data, + sizeof(received_data))); +} + +TEST_P(UnixStreamSocketPairTest, FDPassCoalescedRead) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(), + sent_data1, sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(), + sent_data2, sizeof(sent_data2))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + ASSERT_THAT( + ReadFd(sockets->second_fd(), received_data, sizeof(received_data)), + SyscallSucceedsWithValue(sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); +} + +// ZeroLengthMessageFDDiscarded checks that control messages associated with +// zero length messages are discarded. +TEST_P(UnixStreamSocketPairTest, ZeroLengthMessageFDDiscarded) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Zero length arrays are invalid in ISO C++, so allocate one of size 1 and + // send a length of 0. + char sent_data1[1] = {}; + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE( + SendSingleFD(sockets->first_fd(), pair->second_fd(), sent_data1, 0)); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data[sizeof(sent_data2)] = {}; + + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)); + EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(received_data))); +} + +// FDPassCoalescedRecv checks that control messages not in the first message are +// preserved in a coalesced recv. +TEST_P(UnixStreamSocketPairTest, FDPassCoalescedRecv) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data) / 2), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data + sizeof(sent_data) / 2, + sizeof(sent_data) / 2)); + + char received_data[sizeof(sent_data)]; + + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); +} + +// ReadsNotCoalescedAfterFDPass checks that messages after a message containing +// an FD control message are not coalesced. +TEST_P(UnixStreamSocketPairTest, ReadsNotCoalescedAfterFDPass) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(), + sent_data, sizeof(sent_data) / 2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data + sizeof(sent_data) / 2, + sizeof(sent_data) / 2), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + char received_data[sizeof(sent_data)]; + + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data), + sizeof(sent_data) / 2)); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd())); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(sent_data) / 2)); + + EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data, + sizeof(sent_data) / 2)); +} + +// FDPassNotCombined checks that FD control messages are not combined in a +// coalesced read. +TEST_P(UnixStreamSocketPairTest, FDPassNotCombined) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + auto pair1 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(), + sent_data, sizeof(sent_data) / 2)); + + auto pair2 = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(), + sent_data + sizeof(sent_data) / 2, + sizeof(sent_data) / 2)); + + char received_data[sizeof(sent_data)]; + + int fd = -1; + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data), + sizeof(sent_data) / 2)); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair1->first_fd())); + + EXPECT_THAT(close(fd), SyscallSucceeds()); + fd = -1; + + ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data, + sizeof(received_data), + sizeof(sent_data) / 2)); + + EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data, + sizeof(sent_data) / 2)); + + ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair2->first_fd())); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_P(UnixStreamSocketPairTest, CredPassPartialRead) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + struct ucred sent_creds; + + ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds()); + + ASSERT_NO_FATAL_FAILURE( + SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data))); + + int one = 1; + ASSERT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &one, + sizeof(one)), + SyscallSucceeds()); + + for (int i = 0; i < 2; i++) { + char received_data[10]; + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data + i * sizeof(received_data), received_data, + sizeof(received_data))); + EXPECT_EQ(sent_creds.pid, received_creds.pid); + EXPECT_EQ(sent_creds.uid, received_creds.uid); + EXPECT_EQ(sent_creds.gid, received_creds.gid); + } +} + +// Unix stream sockets peek in the same way as datagram sockets. +// +// SinglePeek checks that only a single message is peekable in a single recv. +TEST_P(UnixStreamSocketPairTest, SinglePeek) { + if (!IsRunningOnGvisor()) { + // Don't run this test on linux kernels newer than 4.3.x Linux kernel commit + // 9f389e35674f5b086edd70ed524ca0f287259725 which changes this behavior. We + // used to target 3.11 compatibility, so disable this test on newer kernels. + // + // NOTE: Bring this up to Linux 4.4 compatibility. + auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); + SKIP_IF(version.major > 4 || (version.major == 4 && version.minor >= 3)); + } + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char sent_data[40]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), sent_data, + sizeof(sent_data) / 2, 0), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data + sizeof(sent_data) / 2, + sizeof(sent_data) / 2, 0), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + char received_data[sizeof(sent_data)]; + for (int i = 0; i < 3; i++) { + memset(received_data, 0, sizeof(received_data)); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(received_data), MSG_PEEK), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + } + memset(received_data, 0, sizeof(received_data)); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(sent_data) / 2, 0), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); + memset(received_data, 0, sizeof(received_data)); + ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data, + sizeof(sent_data) / 2, 0), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data, + sizeof(sent_data) / 2)); +} + +TEST_P(UnixStreamSocketPairTest, CredsNotCoalescedUp) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + SetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + + struct ucred received_creds; + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(sent_data1))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(sent_data2))); + + EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2))); + + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixStreamSocketPairTest, CredsNotCoalescedDown) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + UnsetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(sent_data1))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data), + sizeof(sent_data2))); + + EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2))); + + want_creds = {0, 65534, 65534}; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixStreamSocketPairTest, CoalescedCredsNoPasscred) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + UnsetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); +} + +TEST_P(UnixStreamSocketPairTest, CoalescedCreds1) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + SetSoPassCred(sockets->second_fd()); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); + + struct ucred want_creds { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixStreamSocketPairTest, CoalescedCreds2) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds, + received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); + + struct ucred want_creds; + ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds.pid, received_creds.pid); + EXPECT_EQ(want_creds.uid, received_creds.uid); + EXPECT_EQ(want_creds.gid, received_creds.gid); +} + +TEST_P(UnixStreamSocketPairTest, NonCoalescedDifferingCreds1) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + SetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + char received_data1[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds1; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds1, + received_data1, sizeof(sent_data1))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1))); + + struct ucred want_creds1 { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds1.pid, received_creds1.pid); + EXPECT_EQ(want_creds1.uid, received_creds1.uid); + EXPECT_EQ(want_creds1.gid, received_creds1.gid); + + char received_data2[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds2; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds2, + received_data2, sizeof(sent_data2))); + + EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2))); + + struct ucred want_creds2; + ASSERT_THAT(want_creds2.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds2.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds2.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds2.pid, received_creds2.pid); + EXPECT_EQ(want_creds2.uid, received_creds2.uid); + EXPECT_EQ(want_creds2.gid, received_creds2.gid); +} + +TEST_P(UnixStreamSocketPairTest, NonCoalescedDifferingCreds2) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + UnsetSoPassCred(sockets->second_fd()); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + SetSoPassCred(sockets->second_fd()); + + char received_data1[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds1; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds1, + received_data1, sizeof(sent_data1))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1))); + + struct ucred want_creds1; + ASSERT_THAT(want_creds1.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(want_creds1.uid = getuid(), SyscallSucceeds()); + ASSERT_THAT(want_creds1.gid = getgid(), SyscallSucceeds()); + + EXPECT_EQ(want_creds1.pid, received_creds1.pid); + EXPECT_EQ(want_creds1.uid, received_creds1.uid); + EXPECT_EQ(want_creds1.gid, received_creds1.gid); + + char received_data2[sizeof(sent_data1) + sizeof(sent_data2)]; + struct ucred received_creds2; + + ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds2, + received_data2, sizeof(sent_data2))); + + EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2))); + + struct ucred want_creds2 { + 0, 65534, 65534 + }; + + EXPECT_EQ(want_creds2.pid, received_creds2.pid); + EXPECT_EQ(want_creds2.uid, received_creds2.uid); + EXPECT_EQ(want_creds2.gid, received_creds2.gid); +} + +TEST_P(UnixStreamSocketPairTest, CoalescedDifferingCreds) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + SetSoPassCred(sockets->second_fd()); + + char sent_data1[20]; + RandomizeBuffer(sent_data1, sizeof(sent_data1)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)), + SyscallSucceedsWithValue(sizeof(sent_data1))); + + char sent_data2[20]; + RandomizeBuffer(sent_data2, sizeof(sent_data2)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)), + SyscallSucceedsWithValue(sizeof(sent_data2))); + + UnsetSoPassCred(sockets->second_fd()); + + char sent_data3[20]; + RandomizeBuffer(sent_data3, sizeof(sent_data3)); + + ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data3, sizeof(sent_data3)), + SyscallSucceedsWithValue(sizeof(sent_data3))); + + char received_data[sizeof(sent_data1) + sizeof(sent_data2) + + sizeof(sent_data3)]; + + ASSERT_NO_FATAL_FAILURE( + RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data))); + + EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1))); + EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1), + sizeof(sent_data2))); + EXPECT_EQ(0, memcmp(sent_data3, + received_data + sizeof(sent_data1) + sizeof(sent_data2), + sizeof(sent_data3))); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnixStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + UnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + FilesystemBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractBoundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})))))); + +// Test fixture for tests that apply to pairs of unbound unix stream sockets. +using UnboundUnixStreamSocketPairTest = SocketPairTest; + +TEST_P(UnboundUnixStreamSocketPairTest, SendtoWithoutConnect) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + char data = 'a'; + ASSERT_THAT(sendto(sockets->second_fd(), &data, sizeof(data), 0, + sockets->first_addr(), sockets->first_addr_size()), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +TEST_P(UnboundUnixStreamSocketPairTest, SendtoWithoutConnectIgnoresAddr) { + // FIXME: gVisor tries to find /foo/bar and thus returns ENOENT. + if (IsRunningOnGvisor()) { + return; + } + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + // Even a bogus address is completely ignored. + constexpr char kPath[] = "/foo/bar"; + + // Sanity check that kPath doesn't exist. + struct stat s; + ASSERT_THAT(stat(kPath, &s), SyscallFailsWithErrno(ENOENT)); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + char data = 'a'; + ASSERT_THAT( + sendto(sockets->second_fd(), &data, sizeof(data), 0, + reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +INSTANTIATE_TEST_CASE_P( + AllUnixDomainSockets, UnboundUnixStreamSocketPairTest, + ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>( + ApplyVec<SocketPairKind>( + FilesystemUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})), + ApplyVec<SocketPairKind>( + AbstractUnboundUnixDomainSocketPair, + AllBitwiseCombinations(List<int>{SOCK_STREAM}, + List<int>{0, SOCK_NONBLOCK}, + List<int>{0, SOCK_CLOEXEC})))))); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc new file mode 100644 index 000000000..aea19dbff --- /dev/null +++ b/test/syscalls/linux/stat.cc @@ -0,0 +1,410 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <unistd.h> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "gtest/gtest.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class StatTest : public FileTest {}; + +TEST_F(StatTest, FstatatAbs) { + struct stat st; + + // Check that the stat works. + EXPECT_THAT(fstatat(AT_FDCWD, test_file_name_.c_str(), &st, 0), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st.st_mode)); +} + +TEST_F(StatTest, FstatatEmptyPath) { + struct stat st; + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); + + // Check that the stat works. + EXPECT_THAT(fstatat(fd.get(), "", &st, AT_EMPTY_PATH), SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st.st_mode)); +} + +TEST_F(StatTest, FstatatRel) { + struct stat st; + int dirfd; + auto filename = std::string(Basename(test_file_name_)); + + // Open the temporary directory read-only. + ASSERT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY), + SyscallSucceeds()); + + // Check that the stat works. + EXPECT_THAT(fstatat(dirfd, filename.c_str(), &st, 0), SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st.st_mode)); + close(dirfd); +} + +TEST_F(StatTest, FstatatSymlink) { + struct stat st; + + // Check that the link is followed. + EXPECT_THAT(fstatat(AT_FDCWD, "/proc/self", &st, 0), SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + EXPECT_FALSE(S_ISLNK(st.st_mode)); + + // Check that the flag works. + EXPECT_THAT(fstatat(AT_FDCWD, "/proc/self", &st, AT_SYMLINK_NOFOLLOW), + SyscallSucceeds()); + EXPECT_TRUE(S_ISLNK(st.st_mode)); + EXPECT_FALSE(S_ISDIR(st.st_mode)); +} + +TEST_F(StatTest, Nlinks) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Directory is initially empty, it should contain 2 links (one from itself, + // one from "."). + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2)); + + // Create a file in the test directory. Files shouldn't increase the link + // count on the base directory. + TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path())); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2)); + + // Create subdirectories. This should increase the link count by 1 per + // subdirectory. + TempPath dir1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path())); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(3)); + TempPath dir2 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path())); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(4)); + + // Removing directories should reduce the link count. + dir1.reset(); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(3)); + dir2.reset(); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2)); + + // Removing files should have no effect on link count. + file1.reset(); + EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2)); +} + +TEST_F(StatTest, BlocksIncreaseOnWrite) { + struct stat st; + + // Stat the empty file. + ASSERT_THAT(fstat(test_file_fd_.get(), &st), SyscallSucceeds()); + + const int initial_blocks = st.st_blocks; + + // Write to the file, making sure to exceed the block size. + std::vector<char> buf(2 * st.st_blksize, 'a'); + ASSERT_THAT(write(test_file_fd_.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Stat the file again, and verify that number of allocated blocks has + // increased. + ASSERT_THAT(fstat(test_file_fd_.get(), &st), SyscallSucceeds()); + EXPECT_GT(st.st_blocks, initial_blocks); +} + +TEST_F(StatTest, PathNotCleaned) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Create a file in the basedir. + TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path())); + + // Stating the file directly should succeed. + struct stat buf; + EXPECT_THAT(lstat(file.path().c_str(), &buf), SyscallSucceeds()); + + // Try to stat the file using a directory that does not exist followed by + // "..". If the path is cleaned prior to stating (which it should not be) + // then this will succeed. + const std::string bad_path = JoinPath("/does_not_exist/..", file.path()); + EXPECT_THAT(lstat(bad_path.c_str(), &buf), SyscallFailsWithErrno(ENOENT)); +} + +TEST_F(StatTest, PathCanContainDotDot) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath subdir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path())); + const std::string subdir_name = std::string(Basename(subdir.path())); + + // Create a file in the subdir. + TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(subdir.path())); + const std::string file_name = std::string(Basename(file.path())); + + // Stat the file through a path that includes '..' and '.' but still resolves + // to the file. + const std::string good_path = + JoinPath(basedir.path(), subdir_name, "..", subdir_name, ".", file_name); + struct stat buf; + EXPECT_THAT(lstat(good_path.c_str(), &buf), SyscallSucceeds()); +} + +TEST_F(StatTest, PathCanContainEmptyComponent) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Create a file in the basedir. + TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path())); + const std::string file_name = std::string(Basename(file.path())); + + // Stat the file through a path that includes an empty component. We have to + // build this ourselves because JoinPath automatically removes empty + // components. + const std::string good_path = absl::StrCat(basedir.path(), "//", file_name); + struct stat buf; + EXPECT_THAT(lstat(good_path.c_str(), &buf), SyscallSucceeds()); +} + +TEST_F(StatTest, TrailingSlashNotCleanedReturnsENOTDIR) { + TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Create a file in the basedir. + TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path())); + + // Stat the file with an extra "/" on the end of it. Since file is not a + // directory, this should return ENOTDIR. + const std::string bad_path = absl::StrCat(file.path(), "/"); + struct stat buf; + EXPECT_THAT(lstat(bad_path.c_str(), &buf), SyscallFailsWithErrno(ENOTDIR)); +} + +TEST_F(StatTest, LeadingDoubleSlash) { + // Create a file, and make sure we can stat it. + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + struct stat st; + ASSERT_THAT(lstat(file.path().c_str(), &st), SyscallSucceeds()); + + // Now add an extra leading slash. + const std::string double_slash_path = absl::StrCat("/", file.path()); + ASSERT_TRUE(absl::StartsWith(double_slash_path, "//")); + + // We should be able to stat the new path, and it should resolve to the same + // file (same device and inode). + struct stat double_slash_st; + ASSERT_THAT(lstat(double_slash_path.c_str(), &double_slash_st), + SyscallSucceeds()); + EXPECT_EQ(st.st_dev, double_slash_st.st_dev); + EXPECT_EQ(st.st_ino, double_slash_st.st_ino); +} + +// Test that a rename doesn't change the underlying file. +TEST_F(StatTest, StatDoesntChangeAfterRename) { + const TempPath old_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath new_path(NewTempAbsPath()); + + struct stat st_old = {}; + struct stat st_new = {}; + + ASSERT_THAT(stat(old_dir.path().c_str(), &st_old), SyscallSucceeds()); + ASSERT_THAT(rename(old_dir.path().c_str(), new_path.path().c_str()), + SyscallSucceeds()); + ASSERT_THAT(stat(new_path.path().c_str(), &st_new), SyscallSucceeds()); + + EXPECT_EQ(st_old.st_nlink, st_new.st_nlink); + EXPECT_EQ(st_old.st_dev, st_new.st_dev); + EXPECT_EQ(st_old.st_ino, st_new.st_ino); + EXPECT_EQ(st_old.st_mode, st_new.st_mode); + EXPECT_EQ(st_old.st_uid, st_new.st_uid); + EXPECT_EQ(st_old.st_gid, st_new.st_gid); + EXPECT_EQ(st_old.st_size, st_new.st_size); +} + +// Test link counts with a regular file as the child. +TEST_F(StatTest, LinkCountsWithRegularFileChild) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + struct stat st_parent_before = {}; + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_before), SyscallSucceeds()); + EXPECT_EQ(st_parent_before.st_nlink, 2); + + // Adding a regular file doesn't adjust the parent's link count. + const TempPath child = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + struct stat st_parent_after = {}; + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds()); + EXPECT_EQ(st_parent_after.st_nlink, 2); + + // The child should have a single link from the parent. + struct stat st_child = {}; + ASSERT_THAT(stat(child.path().c_str(), &st_child), SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st_child.st_mode)); + EXPECT_EQ(st_child.st_nlink, 1); + + // Finally unlinking the child should not affect the parent's link count. + ASSERT_THAT(unlink(child.path().c_str()), SyscallSucceeds()); + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds()); + EXPECT_EQ(st_parent_after.st_nlink, 2); +} + +// This test verifies that inodes remain around when there is an open fd +// after link count hits 0. +TEST_F(StatTest, ZeroLinksOpenFdRegularFileChild) { + // Setting the enviornment variable GVISOR_GOFER_UNCACHED to any value + // will prevent this test from running, see the tmpfs lifecycle. + // + // We need to support this because when a file is unlinked and we forward + // the stat to the gofer it would return ENOENT. + const char* uncached_gofer = getenv("GVISOR_GOFER_UNCACHED"); + SKIP_IF(uncached_gofer != nullptr); + + // We don't support saving unlinked files. + const DisableSave ds; + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir.path(), "hello", TempPath::kDefaultFileMode)); + + // The child should have a single link from the parent. + struct stat st_child_before = {}; + ASSERT_THAT(stat(child.path().c_str(), &st_child_before), SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(st_child_before.st_mode)); + EXPECT_EQ(st_child_before.st_nlink, 1); + EXPECT_EQ(st_child_before.st_size, 5); // Hello is 5 bytes. + + // Open the file so we can fstat after unlinking. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(child.path(), O_RDONLY)); + + // Now a stat should return ENOENT but we should still be able to stat + // via the open fd and fstat. + ASSERT_THAT(unlink(child.path().c_str()), SyscallSucceeds()); + + // Since the file has no more links stat should fail. + struct stat st_child_after = {}; + ASSERT_THAT(stat(child.path().c_str(), &st_child_after), + SyscallFailsWithErrno(ENOENT)); + + // Fstat should still allow us to access the same file via the fd. + struct stat st_child_fd = {}; + ASSERT_THAT(fstat(fd.get(), &st_child_fd), SyscallSucceeds()); + EXPECT_EQ(st_child_before.st_dev, st_child_fd.st_dev); + EXPECT_EQ(st_child_before.st_ino, st_child_fd.st_ino); + EXPECT_EQ(st_child_before.st_mode, st_child_fd.st_mode); + EXPECT_EQ(st_child_before.st_uid, st_child_fd.st_uid); + EXPECT_EQ(st_child_before.st_gid, st_child_fd.st_gid); + EXPECT_EQ(st_child_before.st_size, st_child_fd.st_size); + + // TODO: This isn't ideal but since fstatfs(2) will always return + // OVERLAYFS_SUPER_MAGIC we have no way to know if this fs is backed by a + // gofer which doesn't support links. + EXPECT_TRUE(st_child_fd.st_nlink == 0 || st_child_fd.st_nlink == 1); +} + +// Test link counts with a directory as the child. +TEST_F(StatTest, LinkCountsWithDirChild) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Before a child is added the two links are "." and the link from the parent. + struct stat st_parent_before = {}; + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_before), SyscallSucceeds()); + EXPECT_EQ(st_parent_before.st_nlink, 2); + + // Create a subdirectory and stat for the parent link counts. + const TempPath sub_dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path())); + + // The three links are ".", the link from the parent, and the link from + // the child as "..". + struct stat st_parent_after = {}; + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds()); + EXPECT_EQ(st_parent_after.st_nlink, 3); + + // The child will have 1 link from the parent and 1 link which represents ".". + struct stat st_child = {}; + ASSERT_THAT(stat(sub_dir.path().c_str(), &st_child), SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st_child.st_mode)); + EXPECT_EQ(st_child.st_nlink, 2); + + // Finally delete the child dir and the parent link count should return to 2. + ASSERT_THAT(rmdir(sub_dir.path().c_str()), SyscallSucceeds()); + ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds()); + + // Now we should only have links from the parent and "." since the subdir + // has been removed. + EXPECT_EQ(st_parent_after.st_nlink, 2); +} + +// Test statting a child of a non-directory. +TEST_F(StatTest, ChildOfNonDir) { + // Create a path that has a child of a regular file. + const std::string filename = JoinPath(test_file_name_, "child"); + + // Statting the path should return ENOTDIR. + struct stat st; + EXPECT_THAT(lstat(filename.c_str(), &st), SyscallFailsWithErrno(ENOTDIR)); +} + +// Verify that we get an ELOOP from too many symbolic links even when there +// are directories in the middle. +TEST_F(StatTest, LstatELOOPPath) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::string subdir_base = "subdir"; + ASSERT_THAT(mkdir(JoinPath(dir.path(), subdir_base).c_str(), 0755), + SyscallSucceeds()); + + std::string target = JoinPath(dir.path(), subdir_base, subdir_base); + std::string dst = JoinPath("..", subdir_base); + ASSERT_THAT(symlink(dst.c_str(), target.c_str()), SyscallSucceeds()); + auto cleanup = Cleanup( + [&target]() { EXPECT_THAT(unlink(target.c_str()), SyscallSucceeds()); }); + + // Now build a path which is /subdir/subdir/... repeated many times so that + // we can build a path that is shorter than PATH_MAX but can still cause + // too many symbolic links. Note: Every other subdir is actually a directory + // so we're not in a situation where it's a -> b -> a -> b, where a and b + // are symbolic links. + std::string path = dir.path(); + std::string subdir_append = absl::StrCat("/", subdir_base); + do { + absl::StrAppend(&path, subdir_append); + // Keep appending /subdir until we would overflow PATH_MAX. + } while ((path.size() + subdir_append.size()) < PATH_MAX); + + struct stat s = {}; + ASSERT_THAT(lstat(path.c_str(), &s), SyscallFailsWithErrno(ELOOP)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/stat_times.cc b/test/syscalls/linux/stat_times.cc new file mode 100644 index 000000000..442957c65 --- /dev/null +++ b/test/syscalls/linux/stat_times.cc @@ -0,0 +1,220 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> + +#include <tuple> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::IsEmpty; +using ::testing::Not; + +class StatTimesTest : public ::testing::Test { + protected: + std::tuple<absl::Time, absl::Time, absl::Time> GetTime(const TempPath& file) { + struct stat statbuf = {}; + EXPECT_THAT(stat(file.path().c_str(), &statbuf), SyscallSucceeds()); + + const auto atime = absl::TimeFromTimespec(statbuf.st_atim); + const auto mtime = absl::TimeFromTimespec(statbuf.st_mtim); + const auto ctime = absl::TimeFromTimespec(statbuf.st_ctim); + return std::make_tuple(atime, mtime, ctime); + } +}; + +TEST_F(StatTimesTest, FileCreationTimes) { + const DisableSave ds; // Timing-related test. + + // Get a time for when the file is created. + const absl::Time before = absl::Now() - absl::Seconds(1); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const absl::Time after = absl::Now() + absl::Seconds(1); + + absl::Time atime, mtime, ctime; + std::tie(atime, mtime, ctime) = GetTime(file); + + EXPECT_LE(before, atime); + EXPECT_LE(before, mtime); + EXPECT_LE(before, ctime); + EXPECT_GE(after, atime); + EXPECT_GE(after, mtime); + EXPECT_GE(after, ctime); +} + +TEST_F(StatTimesTest, FileCtimeChanges) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + MaybeSave(); // FIXME: ctime is inconsistent. + + absl::Time atime, mtime, ctime; + std::tie(atime, mtime, ctime) = GetTime(file); + + absl::SleepFor(absl::Seconds(1)); + + // Chmod should only change ctime. + EXPECT_THAT(chmod(file.path().c_str(), 0666), SyscallSucceeds()); + + absl::Time atime2, mtime2, ctime2; + std::tie(atime2, mtime2, ctime2) = GetTime(file); + EXPECT_EQ(atime2, atime); + EXPECT_EQ(mtime2, mtime); + EXPECT_GT(ctime2, ctime); + + absl::SleepFor(absl::Seconds(1)); + + // Rename should only change ctime. + const auto newpath = NewTempAbsPath(); + EXPECT_THAT(rename(file.path().c_str(), newpath.c_str()), SyscallSucceeds()); + file.reset(newpath); + + std::tie(atime, mtime, ctime) = GetTime(file); + EXPECT_EQ(atime, atime2); + EXPECT_EQ(mtime, mtime2); + EXPECT_GT(ctime, ctime2); + + absl::SleepFor(absl::Seconds(1)); + + // Utimes should only change ctime and the time that we ask to change (atime + // to now in this case). + const absl::Time before = absl::Now() - absl::Seconds(1); + const struct timespec ts[2] = {{0, UTIME_NOW}, {0, UTIME_OMIT}}; + ASSERT_THAT(utimensat(AT_FDCWD, file.path().c_str(), ts, 0), + SyscallSucceeds()); + const absl::Time after = absl::Now() + absl::Seconds(1); + + std::tie(atime2, mtime2, ctime2) = GetTime(file); + EXPECT_LE(before, atime2); + EXPECT_GE(after, atime2); + EXPECT_EQ(mtime2, mtime); + EXPECT_GT(ctime2, ctime); +} + +TEST_F(StatTimesTest, FileMtimeChanges) { + const auto file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "yaaass", 0666)); + + absl::Time atime, mtime, ctime; + std::tie(atime, mtime, ctime) = GetTime(file); + + absl::SleepFor(absl::Seconds(1)); + + // Truncate should only change mtime and ctime. + EXPECT_THAT(truncate(file.path().c_str(), 0), SyscallSucceeds()); + + absl::Time atime2, mtime2, ctime2; + std::tie(atime2, mtime2, ctime2) = GetTime(file); + EXPECT_EQ(atime2, atime); + EXPECT_GT(mtime2, mtime); + EXPECT_GT(ctime2, ctime); + + absl::SleepFor(absl::Seconds(1)); + + // Write should only change mtime and ctime. + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0)); + const std::string contents = "all the single dollars"; + EXPECT_THAT(write(fd.get(), contents.data(), contents.size()), + SyscallSucceeds()); + + std::tie(atime, mtime, ctime) = GetTime(file); + EXPECT_EQ(atime, atime2); + EXPECT_GT(mtime, mtime2); + EXPECT_GT(ctime, ctime2); +} + +TEST_F(StatTimesTest, FileAtimeChanges) { + const std::string contents = "bills bills bills"; + const auto file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), contents, 0666)); + + MaybeSave(); // FIXME: ctime is inconsistent. + + absl::Time atime, mtime, ctime; + std::tie(atime, mtime, ctime) = GetTime(file); + + absl::SleepFor(absl::Seconds(1)); + + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY, 0)); + + // Read should only change atime. + char buf[20]; + const absl::Time before = absl::Now() - absl::Seconds(1); + int read_result; + ASSERT_THAT(read_result = read(fd.get(), buf, sizeof(buf)), + SyscallSucceeds()); + const absl::Time after = absl::Now() + absl::Seconds(1); + + EXPECT_EQ(std::string(buf, read_result), contents); + + absl::Time atime2, mtime2, ctime2; + std::tie(atime2, mtime2, ctime2) = GetTime(file); + + EXPECT_LE(before, atime2); + EXPECT_GE(after, atime2); + EXPECT_GT(atime2, atime); + EXPECT_EQ(mtime2, mtime); + EXPECT_EQ(ctime2, ctime); +} + +TEST_F(StatTimesTest, DirAtimeChanges) { + const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const auto file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + MaybeSave(); // FIXME: ctime is inconsistent. + + absl::Time atime, mtime, ctime; + std::tie(atime, mtime, ctime) = GetTime(dir); + + absl::SleepFor(absl::Seconds(1)); + + const absl::Time before = absl::Now() - absl::Seconds(1); + + // NOTE: Keep an fd open. This ensures that the inode backing the + // directory won't be destroyed before the final GetTime to avoid writing out + // timestamps and causing side effects. + const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0)); + + // Listing the directory contents should only change atime. + auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), false)); + EXPECT_THAT(contents, Not(IsEmpty())); + + const absl::Time after = absl::Now() + absl::Seconds(1); + + absl::Time atime2, mtime2, ctime2; + std::tie(atime2, mtime2, ctime2) = GetTime(dir); + + EXPECT_LE(before, atime2); + EXPECT_GE(after, atime2); + EXPECT_GT(atime2, atime); + EXPECT_EQ(mtime2, mtime); + EXPECT_EQ(ctime2, ctime); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/statfs.cc b/test/syscalls/linux/statfs.cc new file mode 100644 index 000000000..1fc9758c9 --- /dev/null +++ b/test/syscalls/linux/statfs.cc @@ -0,0 +1,81 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/statfs.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(StatfsTest, CannotStatBadPath) { + auto temp_file = NewTempAbsPathInDir("/tmp"); + + struct statfs st; + EXPECT_THAT(statfs(temp_file.c_str(), &st), SyscallFailsWithErrno(ENOENT)); +} + +TEST(StatfsTest, InternalTmpfs) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + + struct statfs st; + EXPECT_THAT(statfs(temp_file.path().c_str(), &st), SyscallSucceeds()); +} + +TEST(StatfsTest, InternalDevShm) { + struct statfs st; + EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds()); +} + +TEST(StatfsTest, NameLen) { + struct statfs st; + EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds()); + + EXPECT_EQ(st.f_namelen, NAME_MAX); +} + +TEST(FstatfsTest, CannotStatBadFd) { + struct statfs st; + EXPECT_THAT(fstatfs(-1, &st), SyscallFailsWithErrno(EBADF)); +} + +TEST(FstatfsTest, InternalTmpfs) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY)); + + struct statfs st; + EXPECT_THAT(fstatfs(fd.get(), &st), SyscallSucceeds()); +} + +TEST(FstatfsTest, InternalDevShm) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/shm", O_RDONLY)); + + struct statfs st; + EXPECT_THAT(fstatfs(fd.get(), &st), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc new file mode 100644 index 000000000..563763d10 --- /dev/null +++ b/test/syscalls/linux/sticky.cc @@ -0,0 +1,116 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <grp.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <unistd.h> +#include <vector> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +DEFINE_int32(scratch_uid1, 65534, "first scratch UID"); +DEFINE_int32(scratch_gid, 65534, "first scratch GID"); + +namespace gvisor { +namespace testing { + +namespace { + +TEST(StickyTest, StickyBitPermDenied) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); + std::string path = JoinPath(dir.path(), "NewDir"); + ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + + // Drop privileges and change IDs only in child thread, or else this parent + // thread won't be able to open some log files after the test ends. + ScopedThread([&] { + // Drop privileges. + if (HaveCapability(CAP_FOWNER).ValueOrDie()) { + EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, false)); + } + + // Change EUID and EGID. + EXPECT_THAT(syscall(SYS_setresgid, -1, FLAGS_scratch_gid, -1), + SyscallSucceeds()); + EXPECT_THAT(syscall(SYS_setresuid, -1, FLAGS_scratch_uid1, -1), + SyscallSucceeds()); + + EXPECT_THAT(rmdir(path.c_str()), SyscallFailsWithErrno(EPERM)); + }); +} + +TEST(StickyTest, StickyBitSameUID) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); + std::string path = JoinPath(dir.path(), "NewDir"); + ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + + // Drop privileges and change IDs only in child thread, or else this parent + // thread won't be able to open some log files after the test ends. + ScopedThread([&] { + // Drop privileges. + if (HaveCapability(CAP_FOWNER).ValueOrDie()) { + EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, false)); + } + + // Change EGID. + EXPECT_THAT(syscall(SYS_setresgid, -1, FLAGS_scratch_gid, -1), + SyscallSucceeds()); + + // We still have the same EUID. + EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds()); + }); +} + +TEST(StickyTest, StickyBitCapFOWNER) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); + std::string path = JoinPath(dir.path(), "NewDir"); + ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + + // Drop privileges and change IDs only in child thread, or else this parent + // thread won't be able to open some log files after the test ends. + ScopedThread([&] { + // Set PR_SET_KEEPCAPS. + EXPECT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds()); + + // Change EUID and EGID. + EXPECT_THAT(syscall(SYS_setresgid, -1, FLAGS_scratch_gid, -1), + SyscallSucceeds()); + EXPECT_THAT(syscall(SYS_setresuid, -1, FLAGS_scratch_uid1, -1), + SyscallSucceeds()); + + EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true)); + EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds()); + }); +} +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc new file mode 100644 index 000000000..cfc87bc8f --- /dev/null +++ b/test/syscalls/linux/symlink.cc @@ -0,0 +1,288 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +mode_t FilePermission(const std::string& path) { + struct stat buf = {0}; + TEST_CHECK(lstat(path.c_str(), &buf) == 0); + return buf.st_mode & 0777; +} + +// Test that name collisions are checked on the new link path, not the source +// path. +TEST(SymlinkTest, CanCreateSymlinkWithCachedSourceDirent) { + const std::string srcname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + const std::string basedir = std::string(Dirname(srcname)); + ASSERT_EQ(basedir, Dirname(newname)); + + ASSERT_THAT(chdir(basedir.c_str()), SyscallSucceeds()); + + // Open the source node to cause the underlying dirent to be cached. It will + // remain cached while we have the file open. + int fd; + ASSERT_THAT(fd = open(srcname.c_str(), O_CREAT | O_RDWR, 0666), + SyscallSucceeds()); + FileDescriptor fd_closer(fd); + + // Attempt to create a symlink. If the bug exists, this will fail since the + // dirent link creation code will check for a name collision on the source + // link name. + EXPECT_THAT(symlink(std::string(Basename(srcname)).c_str(), + std::string(Basename(newname)).c_str()), + SyscallSucceeds()); +} + +TEST(SymlinkTest, CanCreateSymlinkFile) { + const std::string oldname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + + int fd; + ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds()); + EXPECT_EQ(FilePermission(newname), 0777); + + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(newname)); + EXPECT_EQ(oldname, link); + + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, CanCreateSymlinkDir) { + const std::string olddir = NewTempAbsPath(); + const std::string newdir = NewTempAbsPath(); + + EXPECT_THAT(mkdir(olddir.c_str(), 0777), SyscallSucceeds()); + EXPECT_THAT(symlink(olddir.c_str(), newdir.c_str()), SyscallSucceeds()); + EXPECT_EQ(FilePermission(newdir), 0777); + + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(newdir)); + EXPECT_EQ(olddir, link); + + EXPECT_THAT(unlink(newdir.c_str()), SyscallSucceeds()); + + ASSERT_THAT(rmdir(olddir.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, CannotCreateSymlinkInReadOnlyDir) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + const std::string olddir = NewTempAbsPath(); + ASSERT_THAT(mkdir(olddir.c_str(), 0444), SyscallSucceeds()); + + const std::string newdir = NewTempAbsPathInDir(olddir); + EXPECT_THAT(symlink(olddir.c_str(), newdir.c_str()), + SyscallFailsWithErrno(EACCES)); + + ASSERT_THAT(rmdir(olddir.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, CannotSymlinkOverExistingFile) { + const std::string oldname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + + int oldfd; + int newfd; + ASSERT_THAT(oldfd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(oldfd), SyscallSucceeds()); + ASSERT_THAT(newfd = open(newname.c_str(), O_CREAT | O_RDWR, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(newfd), SyscallSucceeds()); + + EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), + SyscallFailsWithErrno(EEXIST)); + + EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, OldnameIsEmpty) { + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(symlink("", newname.c_str()), SyscallFailsWithErrno(ENOENT)); +} + +TEST(SymlinkTest, OldnameIsDangling) { + const std::string newname = NewTempAbsPath(); + EXPECT_THAT(symlink("/dangling", newname.c_str()), SyscallSucceeds()); + + // This is required for S/R random save tests, which pre-run this test + // in the same TEST_TMPDIR, which means that we need to clean it for any + // operations exclusively creating files, like symlink above. + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, NewnameCannotExist) { + const std::string newname = + JoinPath(GetAbsoluteTestTmpdir(), "thisdoesnotexist", "foo"); + EXPECT_THAT(symlink("/thisdoesnotmatter", newname.c_str()), + SyscallFailsWithErrno(ENOENT)); +} + +TEST(SymlinkTest, CanEvaluateLink) { + const std::string oldname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + + int fd; + ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666), + SyscallSucceeds()); + struct stat old; + EXPECT_THAT(fstat(fd, &old), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds()); + EXPECT_EQ(FilePermission(newname), 0777); + + EXPECT_THAT(fd = open(newname.c_str(), O_RDWR, 0666), SyscallSucceeds()); + struct stat old_linked; + EXPECT_THAT(fstat(fd, &old_linked), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + // Check that in fact newname points to the file we expect. + // FIXME: use only inodes here once they are consistent, + // but this is better than nothing. + EXPECT_EQ(old.st_dev, old_linked.st_dev); + EXPECT_EQ(old.st_mode, old_linked.st_mode); + EXPECT_EQ(old.st_size, old_linked.st_size); + + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, TargetIsNotMapped) { + const std::string oldname = NewTempAbsPath(); + const std::string newname = NewTempAbsPath(); + + int fd; + // Create the target so that when we read the link, it exists. + ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + // Create a symlink called newname that points to oldname. + EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds()); + + std::vector<char> buf(1024); + int linksize; + // Read the link and assert that the oldname is still the same. + EXPECT_THAT(linksize = readlink(newname.c_str(), buf.data(), 1024), + SyscallSucceeds()); + EXPECT_EQ(0, strncmp(oldname.c_str(), buf.data(), linksize)); + + EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, PreadFromSymlink) { + std::string name = NewTempAbsPath(); + int fd; + ASSERT_THAT(fd = open(name.c_str(), O_CREAT, 0644), SyscallSucceeds()); + ASSERT_THAT(close(fd), SyscallSucceeds()); + + std::string linkname = NewTempAbsPath(); + ASSERT_THAT(symlink(name.c_str(), linkname.c_str()), SyscallSucceeds()); + + ASSERT_THAT(fd = open(linkname.c_str(), O_RDONLY), SyscallSucceeds()); + + char buf[1024]; + EXPECT_THAT(pread64(fd, buf, 1024, 0), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(unlink(name.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(linkname.c_str()), SyscallSucceeds()); +} + +TEST(SymlinkTest, SymlinkAtDegradedPermissions_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + int dirfd; + ASSERT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0), + SyscallSucceeds()); + + const DisableSave ds; // Permissions are dropped. + EXPECT_THAT(fchmod(dirfd, 0), SyscallSucceeds()); + + std::string basename = std::string(Basename(file.path())); + EXPECT_THAT(symlinkat("/dangling", dirfd, basename.c_str()), + SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(SymlinkTest, ReadlinkAtDegradedPermissions_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string oldpath = NewTempAbsPathInDir(dir.path()); + const std::string oldbase = std::string(Basename(oldpath)); + ASSERT_THAT(symlink("/dangling", oldpath.c_str()), SyscallSucceeds()); + + int dirfd; + EXPECT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0), + SyscallSucceeds()); + + const DisableSave ds; // Permissions are dropped. + EXPECT_THAT(fchmod(dirfd, 0), SyscallSucceeds()); + + char buf[1024]; + int linksize; + EXPECT_THAT(linksize = readlinkat(dirfd, oldbase.c_str(), buf, 1024), + SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(SymlinkTest, ChmodSymlink) { + auto target = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string newpath = NewTempAbsPath(); + ASSERT_THAT(symlink(target.path().c_str(), newpath.c_str()), + SyscallSucceeds()); + EXPECT_EQ(FilePermission(newpath), 0777); + EXPECT_THAT(chmod(newpath.c_str(), 0666), SyscallSucceeds()); + EXPECT_EQ(FilePermission(newpath), 0777); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sync.cc b/test/syscalls/linux/sync.cc new file mode 100644 index 000000000..5b777b6eb --- /dev/null +++ b/test/syscalls/linux/sync.cc @@ -0,0 +1,60 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> + +#include <sys/syscall.h> +#include <unistd.h> +#include <string> + +#include "gtest/gtest.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SyncTest, SyncEverything) { + ASSERT_THAT(syscall(SYS_sync), SyscallSucceeds()); +} + +TEST(SyncTest, SyncFileSytem) { + int fd; + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_THAT(fd = open(f.path().c_str(), O_RDONLY), SyscallSucceeds()); + EXPECT_THAT(syncfs(fd), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(SyncTest, SyncFromPipe) { + int pipes[2]; + EXPECT_THAT(pipe(pipes), SyscallSucceeds()); + EXPECT_THAT(syncfs(pipes[0]), SyscallSucceeds()); + EXPECT_THAT(syncfs(pipes[1]), SyscallSucceeds()); + EXPECT_THAT(close(pipes[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipes[1]), SyscallSucceeds()); +} + +TEST(SyncTest, CannotSyncFileSytemAtBadFd) { + EXPECT_THAT(syncfs(-1), SyscallFailsWithErrno(EBADF)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sync_file_range.cc b/test/syscalls/linux/sync_file_range.cc new file mode 100644 index 000000000..ebe4ca171 --- /dev/null +++ b/test/syscalls/linux/sync_file_range.cc @@ -0,0 +1,111 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> + +#include <string> + +#include "gtest/gtest.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SyncFileRangeTest, TempFileSucceeds) { + auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR)); + constexpr char data[] = "some data to sync"; + int fd = f.get(); + + EXPECT_THAT(write(fd, data, sizeof(data)), + SyscallSucceedsWithValue(sizeof(data))); + EXPECT_THAT(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE), + SyscallSucceeds()); + EXPECT_THAT(sync_file_range(fd, 0, 0, 0), SyscallSucceeds()); + EXPECT_THAT( + sync_file_range(fd, 0, 0, + SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER | + SYNC_FILE_RANGE_WAIT_BEFORE), + SyscallSucceeds()); + EXPECT_THAT(sync_file_range( + fd, 0, 1, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER), + SyscallSucceeds()); + EXPECT_THAT(sync_file_range( + fd, 1, 0, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER), + SyscallSucceeds()); +} + +TEST(SyncFileRangeTest, CannotSyncFileRangeOnUnopenedFd) { + auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR)); + constexpr char data[] = "some data to sync"; + int fd = f.get(); + + EXPECT_THAT(write(fd, data, sizeof(data)), + SyscallSucceedsWithValue(sizeof(data))); + + pid_t pid = fork(); + if (pid == 0) { + f.reset(); + + // fd is now invalid. + TEST_CHECK(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE) == -1); + TEST_PCHECK(errno == EBADF); + _exit(0); + } + + int status = 0; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(WEXITSTATUS(status), 0); +} + +TEST(SyncFileRangeTest, BadArgs) { + auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR)); + int fd = f.get(); + + EXPECT_THAT(sync_file_range(fd, -1, 0, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(sync_file_range(fd, 0, -1, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(sync_file_range(fd, 8912, INT64_MAX - 4096, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(SyncFileRangeTest, CannotSyncFileRangeWithWaitBefore) { + auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR)); + constexpr char data[] = "some data to sync"; + int fd = f.get(); + + EXPECT_THAT(write(fd, data, sizeof(data)), + SyscallSucceedsWithValue(sizeof(data))); + if (IsRunningOnGvisor()) { + EXPECT_THAT(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE), + SyscallFailsWithErrno(ENOSYS)); + EXPECT_THAT( + sync_file_range(fd, 0, 0, + SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE), + SyscallFailsWithErrno(ENOSYS)); + } +} + +} // namespace +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sysinfo.cc b/test/syscalls/linux/sysinfo.cc new file mode 100644 index 000000000..a0dd82640 --- /dev/null +++ b/test/syscalls/linux/sysinfo.cc @@ -0,0 +1,86 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a very simple sanity test to validate that the sysinfo syscall is +// supported by gvisor and returns sane values. +#include <sys/syscall.h> +#include <sys/sysinfo.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(SysinfoTest, SysinfoIsCallable) { + struct sysinfo ignored = {}; + EXPECT_THAT(syscall(SYS_sysinfo, &ignored), SyscallSucceedsWithValue(0)); +} + +TEST(SysinfoTest, EfaultProducedOnBadAddress) { + // Validate that we return EFAULT when a bad address is provided. + // specified by man 2 sysinfo + EXPECT_THAT(syscall(SYS_sysinfo, nullptr), SyscallFailsWithErrno(EFAULT)); +} + +TEST(SysinfoTest, TotalRamSaneValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GT(s.totalram, 0); +} + +TEST(SysinfoTest, MemunitSet) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GE(s.mem_unit, 1); +} + +TEST(SysinfoTest, UptimeSaneValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GE(s.uptime, 0); +} + +TEST(SysinfoTest, UptimeIncreasingValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + absl::SleepFor(absl::Seconds(2)); + struct sysinfo s2 = {}; + EXPECT_THAT(sysinfo(&s2), SyscallSucceedsWithValue(0)); + EXPECT_LT(s.uptime, s2.uptime); +} + +TEST(SysinfoTest, FreeRamSaneValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GT(s.freeram, 0); + EXPECT_LT(s.freeram, s.totalram); +} + +TEST(SysinfoTest, NumProcsSaneValue) { + struct sysinfo s = {}; + EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0)); + EXPECT_GT(s.procs, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/syslog.cc b/test/syscalls/linux/syslog.cc new file mode 100644 index 000000000..5bd0d1cc3 --- /dev/null +++ b/test/syscalls/linux/syslog.cc @@ -0,0 +1,51 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/klog.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr int SYSLOG_ACTION_READ_ALL = 3; +constexpr int SYSLOG_ACTION_SIZE_BUFFER = 10; + +int Syslog(int type, char* buf, int len) { + return syscall(__NR_syslog, type, buf, len); +} + +// Only SYSLOG_ACTION_SIZE_BUFFER and SYSLOG_ACTION_READ_ALL are implemented in +// gVisor. + +TEST(Syslog, Size) { + EXPECT_THAT(Syslog(SYSLOG_ACTION_SIZE_BUFFER, nullptr, 0), SyscallSucceeds()); +} + +TEST(Syslog, ReadAll) { + // There might not be anything to read, so we can't check the write count. + char buf[100]; + EXPECT_THAT(Syslog(SYSLOG_ACTION_READ_ALL, buf, sizeof(buf)), + SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/sysret.cc b/test/syscalls/linux/sysret.cc new file mode 100644 index 000000000..8e10220eb --- /dev/null +++ b/test/syscalls/linux/sysret.cc @@ -0,0 +1,113 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Tests to verify that the behavior of linux and gvisor matches when +// 'sysret' returns to bad (aka non-canonical) %rip or %rsp. +#include <sys/ptrace.h> +#include <sys/user.h> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000; +constexpr uint64_t kNonCanonicalRsp = 0xFFFF000000000000; + +class SysretTest : public ::testing::Test { + protected: + struct user_regs_struct regs_; + pid_t child_; + + void SetUp() override { + pid_t pid = fork(); + + // Child. + if (pid == 0) { + TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0); + MaybeSave(); + TEST_PCHECK(raise(SIGSTOP) == 0); + MaybeSave(); + _exit(0); + } + + // Parent. + int status; + ASSERT_THAT(pid, SyscallSucceeds()); // Might still be < 0. + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + ASSERT_THAT(ptrace(PTRACE_GETREGS, pid, 0, ®s_), SyscallSucceeds()); + + child_ = pid; + } + + void Detach() { + ASSERT_THAT(ptrace(PTRACE_DETACH, child_, 0, 0), SyscallSucceeds()); + } + + void SetRip(uint64_t newrip) { + regs_.rip = newrip; + ASSERT_THAT(ptrace(PTRACE_SETREGS, child_, 0, ®s_), SyscallSucceeds()); + } + + void SetRsp(uint64_t newrsp) { + regs_.rsp = newrsp; + ASSERT_THAT(ptrace(PTRACE_SETREGS, child_, 0, ®s_), SyscallSucceeds()); + } + + // Wait waits for the child pid and returns the exit status. + int Wait() { + int status; + while (true) { + int rval = wait4(child_, &status, 0, NULL); + if (rval < 0) { + return rval; + } + if (rval == child_) { + return status; + } + } + } +}; + +TEST_F(SysretTest, JustDetach) { + Detach(); + int status = Wait(); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; +} + +TEST_F(SysretTest, BadRip) { + SetRip(kNonCanonicalRip); + Detach(); + int status = Wait(); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) + << "status = " << status; +} + +TEST_F(SysretTest, BadRsp) { + SetRsp(kNonCanonicalRsp); + Detach(); + int status = Wait(); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGBUS) + << "status = " << status; +} +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc new file mode 100644 index 000000000..e6fe84ded --- /dev/null +++ b/test/syscalls/linux/tcp_socket.cc @@ -0,0 +1,759 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <sys/poll.h> +#include <sys/socket.h> +#include <unistd.h> +#include <limits> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<sockaddr_storage> InetLoopbackAddr(int family) { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + addr.ss_family = family; + switch (family) { + case AF_INET: + reinterpret_cast<struct sockaddr_in*>(&addr)->sin_addr.s_addr = + htonl(INADDR_LOOPBACK); + break; + case AF_INET6: + reinterpret_cast<struct sockaddr_in6*>(&addr)->sin6_addr = + in6addr_loopback; + break; + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } + return addr; +} + +// Fixture for tests parameterized by the address family to use (AF_INET and +// AF_INET6) when creating sockets. +class TcpSocketTest : public ::testing::TestWithParam<int> { + protected: + // Creates three sockets that will be used by test cases -- a listener, one + // that connects, and the accepted one. + void SetUp() override; + + // Closes the sockets created by SetUp(). + void TearDown() override; + + // Listening socket. + int listener_ = -1; + + // Socket connected via connect(). + int s_ = -1; + + // Socket connected via accept(). + int t_ = -1; + + // Initial size of the send buffer. + int sendbuf_size_ = -1; +}; + +void TcpSocketTest::SetUp() { + ASSERT_THAT(listener_ = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + + ASSERT_THAT(s_ = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + // Bind to some port then start listening. + ASSERT_THAT( + bind(listener_, reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(listen(listener_, SOMAXCONN), SyscallSucceeds()); + + // Get the address we're listening on, then connect to it. We need to do this + // because we're allowing the stack to pick a port for us. + ASSERT_THAT(getsockname(listener_, reinterpret_cast<struct sockaddr*>(&addr), + &addrlen), + SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(connect)(s_, reinterpret_cast<struct sockaddr*>(&addr), + addrlen), + SyscallSucceeds()); + + // Get the initial send buffer size. + socklen_t optlen = sizeof(sendbuf_size_); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &sendbuf_size_, &optlen), + SyscallSucceeds()); + + // Accept the connection. + ASSERT_THAT(t_ = RetryEINTR(accept)(listener_, nullptr, nullptr), + SyscallSucceeds()); +} + +void TcpSocketTest::TearDown() { + EXPECT_THAT(close(listener_), SyscallSucceeds()); + if (s_ >= 0) { + EXPECT_THAT(close(s_), SyscallSucceeds()); + } + if (t_ >= 0) { + EXPECT_THAT(close(t_), SyscallSucceeds()); + } +} + +TEST_P(TcpSocketTest, DataCoalesced) { + char buf[10]; + + // Write in two steps. + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf) / 2), + SyscallSucceedsWithValue(sizeof(buf) / 2)); + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf) / 2), + SyscallSucceedsWithValue(sizeof(buf) / 2)); + + // Allow stack to process both packets. + absl::SleepFor(absl::Seconds(1)); + + // Read in one shot. + EXPECT_THAT(RetryEINTR(recv)(t_, buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST_P(TcpSocketTest, SenderAddressIgnored) { + char buf[3]; + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + + ASSERT_THAT( + RetryEINTR(recvfrom)(t_, buf, sizeof(buf), 0, + reinterpret_cast<struct sockaddr*>(&addr), &addrlen), + SyscallSucceedsWithValue(3)); + + // Check that addr remains zeroed-out. + const char* ptr = reinterpret_cast<char*>(&addr); + for (size_t i = 0; i < sizeof(addr); i++) { + EXPECT_EQ(ptr[i], 0); + } +} + +TEST_P(TcpSocketTest, SenderAddressIgnoredOnPeek) { + char buf[3]; + ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + + ASSERT_THAT( + RetryEINTR(recvfrom)(t_, buf, sizeof(buf), MSG_PEEK, + reinterpret_cast<struct sockaddr*>(&addr), &addrlen), + SyscallSucceedsWithValue(3)); + + // Check that addr remains zeroed-out. + const char* ptr = reinterpret_cast<char*>(&addr); + for (size_t i = 0; i < sizeof(addr); i++) { + EXPECT_EQ(ptr[i], 0); + } +} + +TEST_P(TcpSocketTest, SendtoAddressIgnored) { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + addr.ss_family = GetParam(); // FIXME + + char data = '\0'; + EXPECT_THAT( + RetryEINTR(sendto)(s_, &data, sizeof(data), 0, + reinterpret_cast<sockaddr*>(&addr), sizeof(addr)), + SyscallSucceedsWithValue(1)); +} + +TEST_P(TcpSocketTest, WritevZeroIovec) { + // 2 bytes just to be safe and have vecs[1] not point to something random + // (even though length is 0). + char buf[2]; + char recv_buf[1]; + + // Construct a vec where the final vector is of length 0. + iovec vecs[2] = {}; + vecs[0].iov_base = buf; + vecs[0].iov_len = 1; + vecs[1].iov_base = buf + 1; + vecs[1].iov_len = 0; + + EXPECT_THAT(RetryEINTR(writev)(s_, vecs, 2), SyscallSucceedsWithValue(1)); + + EXPECT_THAT(RetryEINTR(recv)(t_, recv_buf, 1, 0), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(memcmp(recv_buf, buf, 1), 0); +} + +TEST_P(TcpSocketTest, ZeroWriteAllowed) { + char buf[3]; + // Send a zero length packet. + ASSERT_THAT(RetryEINTR(write)(s_, buf, 0), SyscallSucceedsWithValue(0)); + // Verify that there is no packet available. + EXPECT_THAT(RetryEINTR(recv)(t_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Test that a non-blocking write with a buffer that is larger than the send +// buffer size will not actually write the whole thing at once. +TEST_P(TcpSocketTest, NonblockingLargeWrite) { + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds()); + + // Allocate a buffer three times the size of the send buffer. We do this with + // a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> buf(size); + + // Try to write the whole thing. + int n; + ASSERT_THAT(n = RetryEINTR(write)(s_, buf.data(), size), SyscallSucceeds()); + + // We should have written something, but not the whole thing. + EXPECT_GT(n, 0); + EXPECT_LT(n, size); +} + +// Test that a blocking write with a buffer that is larger than the send buffer +// will block until the entire buffer is sent. +TEST_P(TcpSocketTest, BlockingLargeWrite_NoRandomSave) { + // Allocate a buffer three times the size of the send buffer on the heap. We + // do this as a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> writebuf(size); + + // Start reading the response in a loop. + int read_bytes = 0; + ScopedThread t([this, &read_bytes]() { + // Avoid interrupting the blocking write in main thread. + const DisableSave ds; + char readbuf[2500] = {}; + int n = -1; + while (n != 0) { + EXPECT_THAT(n = RetryEINTR(read)(t_, &readbuf, sizeof(readbuf)), + SyscallSucceeds()); + read_bytes += n; + } + }); + + // Try to write the whole thing. + int n; + ASSERT_THAT(n = WriteFd(s_, writebuf.data(), size), SyscallSucceeds()); + + // We should have written the whole thing. + EXPECT_EQ(n, size); + EXPECT_THAT(close(s_), SyscallSucceedsWithValue(0)); + s_ = -1; + t.Join(); + + // We should have read the whole thing. + EXPECT_EQ(read_bytes, size); +} + +// Test that a send with MSG_DONTWAIT flag and buffer that larger than the send +// buffer size will not write the whole thing. +TEST_P(TcpSocketTest, LargeSendDontWait) { + // Allocate a buffer three times the size of the send buffer. We do this on + // with a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> buf(size); + + // Try to write the whole thing with MSG_DONTWAIT flag, which can + // return a partial write. + int n; + ASSERT_THAT(n = RetryEINTR(send)(s_, buf.data(), size, MSG_DONTWAIT), + SyscallSucceeds()); + + // We should have written something, but not the whole thing. + EXPECT_GT(n, 0); + EXPECT_LT(n, size); +} + +// Test that a send on a non-blocking socket with a buffer that larger than the +// send buffer will not write the whole thing at once. +TEST_P(TcpSocketTest, NonblockingLargeSend) { + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds()); + + // Allocate a buffer three times the size of the send buffer. We do this on + // with a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> buf(size); + + // Try to write the whole thing. + int n; + ASSERT_THAT(n = RetryEINTR(send)(s_, buf.data(), size, 0), SyscallSucceeds()); + + // We should have written something, but not the whole thing. + EXPECT_GT(n, 0); + EXPECT_LT(n, size); +} + +// Same test as above, but calls send instead of write. +TEST_P(TcpSocketTest, BlockingLargeSend_NoRandomSave) { + // Allocate a buffer three times the size of the send buffer. We do this on + // with a vector to avoid allocating on the stack. + int size = 3 * sendbuf_size_; + std::vector<char> writebuf(size); + + // Start reading the response in a loop. + int read_bytes = 0; + ScopedThread t([this, &read_bytes]() { + // Avoid interrupting the blocking write in main thread. + const DisableSave ds; + char readbuf[2500] = {}; + int n = -1; + while (n != 0) { + EXPECT_THAT(n = RetryEINTR(read)(t_, &readbuf, sizeof(readbuf)), + SyscallSucceeds()); + read_bytes += n; + } + }); + + // Try to send the whole thing. + int n; + ASSERT_THAT(n = SendFd(s_, writebuf.data(), size, 0), SyscallSucceeds()); + + // We should have written the whole thing. + EXPECT_EQ(n, size); + EXPECT_THAT(close(s_), SyscallSucceedsWithValue(0)); + s_ = -1; + t.Join(); + + // We should have read the whole thing. + EXPECT_EQ(read_bytes, size); +} + +// Test that polling on a socket with a full send buffer will block. +TEST_P(TcpSocketTest, PollWithFullBufferBlocks) { + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds()); + + // Set TCP_NODELAY, which will cause linux to fill the receive buffer from the + // send buffer as quickly as possibly. This way we can fill up both buffers + // faster. + constexpr int tcp_nodelay_flag = 1; + ASSERT_THAT(setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &tcp_nodelay_flag, + sizeof(tcp_nodelay_flag)), + SyscallSucceeds()); + + // Create a large buffer that will be used for sending. + std::vector<char> buf(5 * sendbuf_size_); + + // Write until we receive an error. + while (RetryEINTR(send)(s_, buf.data(), buf.size(), 0) != -1) { + // Sleep to give linux a chance to move data from the send buffer to the + // receive buffer. + usleep(10000); // 10ms. + } + // The last error should have been EWOULDBLOCK. + ASSERT_EQ(errno, EWOULDBLOCK); +} + +TEST_P(TcpSocketTest, MsgTrunc) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT( + RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + // Check that we didn't get anything. + char zeros[sizeof(received_data)] = {}; + EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); +} + +// MSG_CTRUNC is a return flag but linux allows it to be set on input flags +// without returning an error. +TEST_P(TcpSocketTest, MsgTruncWithCtrunc) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, + MSG_TRUNC | MSG_CTRUNC), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + // Check that we didn't get anything. + char zeros[sizeof(received_data)] = {}; + EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); +} + +// This test will verify that MSG_CTRUNC doesn't do anything when specified +// on input. +TEST_P(TcpSocketTest, MsgTruncWithCtruncOnly) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, + MSG_CTRUNC), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + // Since MSG_CTRUNC here had no affect, it should not behave like MSG_TRUNC. + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); +} + +TEST_P(TcpSocketTest, MsgTruncLargeSize) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data) * 2] = {}; + ASSERT_THAT( + RetryEINTR(recv)(t_, received_data, sizeof(received_data), MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // Check that we didn't get anything. + char zeros[sizeof(received_data)] = {}; + EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); +} + +TEST_P(TcpSocketTest, MsgTruncPeek) { + char sent_data[512]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + char received_data[sizeof(sent_data)] = {}; + ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, + MSG_TRUNC | MSG_PEEK), + SyscallSucceedsWithValue(sizeof(sent_data) / 2)); + + // Check that we didn't get anything. + char zeros[sizeof(received_data)] = {}; + EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); + + // Check that we can still get all of the data. + ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); +} + +TEST_P(TcpSocketTest, NoDelayDefault) { + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(TcpSocketTest, SetNoDelay) { + ASSERT_THAT( + setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT(setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &kSockOptOff, + sizeof(kSockOptOff)), + SyscallSucceeds()); + + EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +INSTANTIATE_TEST_CASE_P(AllInetTests, TcpSocketTest, + ::testing::Values(AF_INET, AF_INET6)); + +// Fixture for tests parameterized by address family that don't want the fixture +// to do things. +using SimpleTcpSocketTest = ::testing::TestWithParam<int>; + +TEST_P(SimpleTcpSocketTest, SendUnconnected) { + int fd; + ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + FileDescriptor sock_fd(fd); + + char data = '\0'; + EXPECT_THAT(RetryEINTR(send)(fd, &data, sizeof(data), 0), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(SimpleTcpSocketTest, SendtoWithoutAddressUnconnected) { + int fd; + ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + FileDescriptor sock_fd(fd); + + char data = '\0'; + EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, nullptr, 0), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(SimpleTcpSocketTest, SendtoWithAddressUnconnected) { + int fd; + ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + FileDescriptor sock_fd(fd); + + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + char data = '\0'; + EXPECT_THAT( + RetryEINTR(sendto)(fd, &data, sizeof(data), 0, + reinterpret_cast<sockaddr*>(&addr), sizeof(addr)), + SyscallFailsWithErrno(EPIPE)); +} + +TEST_P(SimpleTcpSocketTest, GetPeerNameUnconnected) { + int fd; + ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), + SyscallSucceeds()); + FileDescriptor sock_fd(fd); + + sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getpeername(fd, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(TcpSocketTest, FullBuffer) { + // Set both FDs to be blocking. + int flags = 0; + ASSERT_THAT(flags = fcntl(s_, F_GETFL), SyscallSucceeds()); + EXPECT_THAT(fcntl(s_, F_SETFL, flags & ~O_NONBLOCK), SyscallSucceeds()); + flags = 0; + ASSERT_THAT(flags = fcntl(t_, F_GETFL), SyscallSucceeds()); + EXPECT_THAT(fcntl(t_, F_SETFL, flags & ~O_NONBLOCK), SyscallSucceeds()); + + // 2500 was chosen as a small value that can be set on Linux. + int set_snd = 2500; + EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &set_snd, sizeof(set_snd)), + SyscallSucceedsWithValue(0)); + int get_snd = -1; + socklen_t get_snd_len = sizeof(get_snd); + EXPECT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &get_snd, &get_snd_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_snd_len, sizeof(get_snd)); + EXPECT_GT(get_snd, 0); + + // 2500 was chosen as a small value that can be set on Linux and gVisor. + int set_rcv = 2500; + EXPECT_THAT(setsockopt(t_, SOL_SOCKET, SO_RCVBUF, &set_rcv, sizeof(set_rcv)), + SyscallSucceedsWithValue(0)); + int get_rcv = -1; + socklen_t get_rcv_len = sizeof(get_rcv); + EXPECT_THAT(getsockopt(t_, SOL_SOCKET, SO_RCVBUF, &get_rcv, &get_rcv_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_rcv_len, sizeof(get_rcv)); + EXPECT_GE(get_rcv, 2500); + + // Quick sanity test. + EXPECT_LT(get_snd + get_rcv, 2500 * IOV_MAX); + + char data[2500] = {}; + std::vector<struct iovec> iovecs; + for (int i = 0; i < IOV_MAX; i++) { + struct iovec iov = {}; + iov.iov_base = data; + iov.iov_len = sizeof(data); + iovecs.push_back(iov); + } + ScopedThread t([this, &iovecs]() { + int result = -1; + EXPECT_THAT(result = RetryEINTR(writev)(s_, iovecs.data(), iovecs.size()), + SyscallSucceeds()); + EXPECT_GT(result, 1); + EXPECT_LT(result, sizeof(data) * iovecs.size()); + }); + + char recv = 0; + EXPECT_THAT(RetryEINTR(read)(t_, &recv, 1), SyscallSucceedsWithValue(1)); + EXPECT_THAT(close(t_), SyscallSucceedsWithValue(0)); + t_ = -1; +} + +TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListener) { + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + const FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EINPROGRESS)); + + // Now polling on the FD with a timeout should return 0 corresponding to no + // FDs ready. + struct pollfd poll_fd = {s.get(), POLLOUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); + + int err; + socklen_t optlen = sizeof(err); + ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen), + SyscallSucceeds()); + + EXPECT_EQ(err, ECONNREFUSED); +} + +TEST_P(SimpleTcpSocketTest, NonBlockingConnect) { + const FileDescriptor listener = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + // Bind to some port then start listening. + ASSERT_THAT( + bind(listener.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds()); + + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Set the FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds()); + + ASSERT_THAT(getsockname(listener.get(), + reinterpret_cast<struct sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EINPROGRESS)); + + int t; + ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr), + SyscallSucceeds()); + + // Now polling on the FD with a timeout should return 0 corresponding to no + // FDs ready. + struct pollfd poll_fd = {s.get(), POLLOUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); + + int err; + socklen_t optlen = sizeof(err); + ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen), + SyscallSucceeds()); + + EXPECT_EQ(err, 0); + + EXPECT_THAT(close(t), SyscallSucceeds()); +} + +// Test that we get an ECONNREFUSED with a blocking socket when no one is +// listening on the other end. +TEST_P(SimpleTcpSocketTest, BlockingConnectRefused) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(ECONNREFUSED)); + + // Avoiding triggering save in destructor of s. + EXPECT_THAT(close(s.release()), SyscallSucceeds()); +} + +// Test that we get an ECONNREFUSED with a nonblocking socket. +TEST_P(SimpleTcpSocketTest, NonBlockingConnectRefused) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + ASSERT_THAT(RetryEINTR(connect)( + s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen), + SyscallFailsWithErrno(EINPROGRESS)); + + // We don't need to specify any events to get POLLHUP or POLLERR as these + // are added before the poll. + struct pollfd poll_fd = {s.get(), /*events=*/0, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000), SyscallSucceedsWithValue(1)); + + // The ECONNREFUSED should cause us to be woken up with POLLHUP. + EXPECT_NE(poll_fd.revents & (POLLHUP | POLLERR), 0); + + // Avoiding triggering save in destructor of s. + EXPECT_THAT(close(s.release()), SyscallSucceeds()); +} + +INSTANTIATE_TEST_CASE_P(AllInetTests, SimpleTcpSocketTest, + ::testing::Values(AF_INET, AF_INET6)); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/temp_umask.h b/test/syscalls/linux/temp_umask.h new file mode 100644 index 000000000..f202dfa59 --- /dev/null +++ b/test/syscalls/linux/temp_umask.h @@ -0,0 +1,39 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_TEMP_UMASK_H_ +#define GVISOR_TEST_SYSCALLS_TEMP_UMASK_H_ + +#include <sys/stat.h> +#include <sys/types.h> + +namespace gvisor { +namespace testing { + +class TempUmask { + public: + // Sets the process umask to `mask`. + explicit TempUmask(mode_t mask) : old_mask_(umask(mask)) {} + + // Sets the process umask to its previous value. + ~TempUmask() { umask(old_mask_); } + + private: + mode_t old_mask_; +}; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_TEMP_UMASK_H_ diff --git a/test/syscalls/linux/tgkill.cc b/test/syscalls/linux/tgkill.cc new file mode 100644 index 000000000..2d258ef11 --- /dev/null +++ b/test/syscalls/linux/tgkill.cc @@ -0,0 +1,48 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(TgkillTest, InvalidTID) { + EXPECT_THAT(tgkill(getpid(), -1, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(tgkill(getpid(), 0, 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TgkillTest, InvalidTGID) { + EXPECT_THAT(tgkill(-1, gettid(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(tgkill(0, gettid(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TgkillTest, ValidInput) { + EXPECT_THAT(tgkill(getpid(), gettid(), 0), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/time.cc b/test/syscalls/linux/time.cc new file mode 100644 index 000000000..3abcd8098 --- /dev/null +++ b/test/syscalls/linux/time.cc @@ -0,0 +1,103 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <time.h> + +#include "gtest/gtest.h" +#include "test/util/proc_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr long kFudgeSeconds = 5; + +// Mimics the time(2) wrapper from glibc prior to 2.15. +time_t vsyscall_time(time_t* t) { + constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; + return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t); +} + +TEST(TimeTest, VsyscallTime_Succeeds) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + time_t t1, t2; + + { + const DisableSave ds; // Timing assertions. + EXPECT_THAT(time(&t1), SyscallSucceeds()); + EXPECT_THAT(vsyscall_time(&t2), SyscallSucceeds()); + } + + // Time should be monotonic. + EXPECT_LE(static_cast<long>(t1), static_cast<long>(t2)); + + // Check that it's within kFudge seconds. + EXPECT_LE(static_cast<long>(t2), static_cast<long>(t1) + kFudgeSeconds); + + // Redo with save. + EXPECT_THAT(time(&t1), SyscallSucceeds()); + EXPECT_THAT(vsyscall_time(&t2), SyscallSucceeds()); + + // Time should be monotonic. + EXPECT_LE(static_cast<long>(t1), static_cast<long>(t2)); +} + +TEST(TimeTest, VsyscallTime_InvalidAddressSIGSEGV) { + EXPECT_EXIT(vsyscall_time(reinterpret_cast<time_t*>(0x1)), + ::testing::KilledBySignal(SIGSEGV), ""); +} +int vsyscall_gettimeofday(struct timeval* tv, struct timezone* tz) { + constexpr uint64_t kVsyscallGettimeofdayEntry = 0xffffffffff600000; + return reinterpret_cast<int (*)(struct timeval*, struct timezone*)>( + kVsyscallGettimeofdayEntry)(tv, tz); +} + +TEST(TimeTest, VsyscallGettimeofday_Succeeds) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + struct timeval tv1, tv2; + struct timezone tz1, tz2; + + { + const DisableSave ds; // Timing assertions. + EXPECT_THAT(gettimeofday(&tv1, &tz1), SyscallSucceeds()); + EXPECT_THAT(vsyscall_gettimeofday(&tv2, &tz2), SyscallSucceeds()); + } + + // See above. + EXPECT_LE(static_cast<long>(tv1.tv_sec), static_cast<long>(tv2.tv_sec)); + EXPECT_LE(static_cast<long>(tv2.tv_sec), + static_cast<long>(tv1.tv_sec) + kFudgeSeconds); + + // Redo with save. + EXPECT_THAT(gettimeofday(&tv1, &tz1), SyscallSucceeds()); + EXPECT_THAT(vsyscall_gettimeofday(&tv2, &tz2), SyscallSucceeds()); +} + +TEST(TimeTest, VsyscallGettimeofday_InvalidAddressSIGSEGV) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + EXPECT_EXIT(vsyscall_gettimeofday(reinterpret_cast<struct timeval*>(0x1), + reinterpret_cast<struct timezone*>(0x1)), + ::testing::KilledBySignal(SIGSEGV), ""); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/timerfd.cc b/test/syscalls/linux/timerfd.cc new file mode 100644 index 000000000..b85321795 --- /dev/null +++ b/test/syscalls/linux/timerfd.cc @@ -0,0 +1,238 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <poll.h> +#include <sys/timerfd.h> +#include <time.h> + +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Wrapper around timerfd_create(2) that returns a FileDescriptor. +PosixErrorOr<FileDescriptor> TimerfdCreate(int clockid, int flags) { + int fd = timerfd_create(clockid, flags); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, "timerfd_create failed"); + } + return FileDescriptor(fd); +} + +// In tests that race a timerfd with a sleep, some slack is required because: +// +// - Timerfd expirations are asynchronous with respect to nanosleeps. +// +// - Because clock_gettime(CLOCK_MONOTONIC) is implemented through the VDSO, +// it technically uses a closely-related, but distinct, time domain from the +// CLOCK_MONOTONIC used to trigger timerfd expirations. +absl::Duration TimerSlack() { return absl::Milliseconds(500); } + +TEST(TimerfdTest, IsInitiallyStopped) { + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, 0)); + struct itimerspec its = {}; + ASSERT_THAT(timerfd_gettime(tfd.get(), &its), SyscallSucceeds()); + EXPECT_EQ(0, its.it_value.tv_sec); + EXPECT_EQ(0, its.it_value.tv_nsec); +} + +TEST(TimerfdTest, SingleShot) { + constexpr absl::Duration kDelay = absl::Seconds(1); + + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, 0)); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(kDelay); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // The timer should fire exactly once since the interval is zero. + absl::SleepFor(kDelay + TimerSlack()); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_EQ(1, val); +} + +TEST(TimerfdTest, Periodic) { + constexpr absl::Duration kDelay = absl::Seconds(1); + constexpr int kPeriods = 3; + + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, 0)); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(kDelay); + its.it_interval = absl::ToTimespec(kDelay); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // Expect to see at least kPeriods expirations. More may occur due to the + // timer slack, or due to delays from scheduling or save/restore. + absl::SleepFor(kPeriods * kDelay + TimerSlack()); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_GE(val, kPeriods); +} + +TEST(TimerfdTest, BlockingRead) { + constexpr absl::Duration kDelay = absl::Seconds(3); + + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, 0)); + struct itimerspec its = {}; + its.it_value.tv_sec = absl::ToInt64Seconds(kDelay); + auto const start_time = absl::Now(); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // read should block until the timer fires. + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + auto const end_time = absl::Now(); + EXPECT_EQ(1, val); + EXPECT_GE((end_time - start_time) + TimerSlack(), kDelay); +} + +TEST(TimerfdTest, NonblockingRead_NoRandomSave) { + constexpr absl::Duration kDelay = absl::Seconds(5); + + auto const tfd = + ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, TFD_NONBLOCK)); + + // Since the timer is initially disabled and has never fired, read should + // return EAGAIN. + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallFailsWithErrno(EAGAIN)); + + DisableSave ds; // Timing-sensitive. + + // Arm the timer. + struct itimerspec its = {}; + its.it_value.tv_sec = absl::ToInt64Seconds(kDelay); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // Since the timer has not yet fired, read should return EAGAIN. + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallFailsWithErrno(EAGAIN)); + + ds.reset(); // No longer timing-sensitive. + + // After the timer fires, read should indicate 1 expiration. + absl::SleepFor(kDelay + TimerSlack()); + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_EQ(1, val); + + // The successful read should have reset the number of expirations. + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(TimerfdTest, BlockingPoll_SetTimeResetsExpirations) { + constexpr absl::Duration kDelay = absl::Seconds(3); + + auto const tfd = + ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, TFD_NONBLOCK)); + struct itimerspec its = {}; + its.it_value.tv_sec = absl::ToInt64Seconds(kDelay); + auto const start_time = absl::Now(); + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + // poll should block until the timer fires. + struct pollfd pfd = {}; + pfd.fd = tfd.get(); + pfd.events = POLLIN; + ASSERT_THAT(poll(&pfd, /* nfds = */ 1, + /* timeout = */ 2 * absl::ToInt64Seconds(kDelay) * 1000), + SyscallSucceedsWithValue(1)); + auto const end_time = absl::Now(); + EXPECT_EQ(POLLIN, pfd.revents); + EXPECT_GE((end_time - start_time) + TimerSlack(), kDelay); + + // Call timerfd_settime again with a value of 0. This should reset the number + // of expirations to 0, causing read to return EAGAIN since the timerfd is + // non-blocking. + its.it_value.tv_sec = 0; + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(TimerfdTest, SetAbsoluteTime) { + constexpr absl::Duration kDelay = absl::Seconds(3); + + // Use a non-blocking timerfd so that if TFD_TIMER_ABSTIME is incorrectly + // non-functional, we get EAGAIN rather than a test timeout. + auto const tfd = + ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, TFD_NONBLOCK)); + struct itimerspec its = {}; + ASSERT_THAT(clock_gettime(CLOCK_MONOTONIC, &its.it_value), SyscallSucceeds()); + its.it_value.tv_sec += absl::ToInt64Seconds(kDelay); + ASSERT_THAT(timerfd_settime(tfd.get(), TFD_TIMER_ABSTIME, &its, nullptr), + SyscallSucceeds()); + + absl::SleepFor(kDelay + TimerSlack()); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_EQ(1, val); +} + +TEST(TimerfdTest, ClockRealtime) { + // Since CLOCK_REALTIME can, by definition, change, we can't make any + // non-flaky assertions about the amount of time it takes for a + // CLOCK_REALTIME-based timer to expire. Just check that it expires at all, + // and hope it happens before the test times out. + constexpr int kDelaySecs = 1; + + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_REALTIME, 0)); + struct itimerspec its = {}; + its.it_value.tv_sec = kDelaySecs; + ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), + SyscallSucceeds()); + + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); + EXPECT_EQ(1, val); +} + +TEST(TimerfdTest, IllegalReadWrite) { + auto const tfd = + ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, TFD_NONBLOCK)); + uint64_t val = 0; + EXPECT_THAT(PreadFd(tfd.get(), &val, sizeof(val), 0), + SyscallFailsWithErrno(ESPIPE)); + EXPECT_THAT(WriteFd(tfd.get(), &val, sizeof(val)), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(PwriteFd(tfd.get(), &val, sizeof(val), 0), + SyscallFailsWithErrno(ESPIPE)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc new file mode 100644 index 000000000..dfe231575 --- /dev/null +++ b/test/syscalls/linux/timers.cc @@ -0,0 +1,642 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <signal.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <syscall.h> +#include <time.h> +#include <unistd.h> + +#include <atomic> + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/cleanup.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +DEFINE_bool(timers_test_sleep, false, + "If true, sleep forever instead of running tests."); + +using ::testing::_; +using ::testing::AnyOf; + +namespace gvisor { +namespace testing { +namespace { + +#ifndef CPUCLOCK_PROF +#define CPUCLOCK_PROF 0 +#endif // CPUCLOCK_PROF + +PosixErrorOr<absl::Duration> ProcessCPUTime(pid_t pid) { + // Use pid-specific CPUCLOCK_PROF, which is the clock used to enforce + // RLIMIT_CPU. + clockid_t clockid = (~static_cast<clockid_t>(pid) << 3) | CPUCLOCK_PROF; + + struct timespec ts; + int ret = clock_gettime(clockid, &ts); + if (ret < 0) { + return PosixError(errno, "clock_gettime failed"); + } + + return absl::DurationFromTimespec(ts); +} + +void NoopSignalHandler(int signo) { + TEST_CHECK_MSG(SIGXCPU == signo, + "NoopSigHandler did not receive expected signal"); +} + +void UninstallingSignalHandler(int signo) { + TEST_CHECK_MSG(SIGXCPU == signo, + "UninstallingSignalHandler did not receive expected signal"); + struct sigaction rev_action; + rev_action.sa_handler = SIG_DFL; + rev_action.sa_flags = 0; + sigemptyset(&rev_action.sa_mask); + sigaction(SIGXCPU, &rev_action, nullptr); +} + +TEST(TimerTest, ProcessKilledOnCPUSoftLimit) { + constexpr absl::Duration kSoftLimit = absl::Seconds(1); + constexpr absl::Duration kHardLimit = absl::Seconds(3); + + struct rlimit cpu_limits; + cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit); + cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit); + + int pid = fork(); + MaybeSave(); + if (pid == 0) { + TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0); + MaybeSave(); + for (;;) { + } + } + auto c = Cleanup([pid] { + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(WTERMSIG(status), SIGXCPU); + }); + + // Wait for the child to exit, but do not reap it. This will allow us to check + // its CPU usage while it is zombied. + EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT), + SyscallSucceeds()); + + // Assert that the child spent 1s of CPU before getting killed. + // + // We must be careful to use CPUCLOCK_PROF, the same clock used for RLIMIT_CPU + // enforcement, to get correct results. Note that this is slightly different + // from rusage-reported CPU usage: + // + // RLIMIT_CPU, CPUCLOCK_PROF use kernel/sched/cputime.c:thread_group_cputime. + // rusage uses kernel/sched/cputime.c:thread_group_cputime_adjusted. + absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid)); + EXPECT_GE(cpu, kSoftLimit); + + // Child did not make it to the hard limit. + // + // Linux sends SIGXCPU synchronously with CPU tick updates. See + // kernel/time/timer.c:update_process_times: + // => account_process_tick // update task CPU usage. + // => run_posix_cpu_timers // enforce RLIMIT_CPU, sending signal. + // + // Thus, only chance for this to flake is if the system time required to + // deliver the signal exceeds 2s. + EXPECT_LT(cpu, kHardLimit); +} + +TEST(TimerTest, ProcessPingedRepeatedlyAfterCPUSoftLimit) { + struct sigaction new_action; + new_action.sa_handler = UninstallingSignalHandler; + new_action.sa_flags = 0; + sigemptyset(&new_action.sa_mask); + + constexpr absl::Duration kSoftLimit = absl::Seconds(1); + constexpr absl::Duration kHardLimit = absl::Seconds(10); + + struct rlimit cpu_limits; + cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit); + cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit); + + int pid = fork(); + MaybeSave(); + if (pid == 0) { + TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0); + MaybeSave(); + TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0); + MaybeSave(); + for (;;) { + } + } + auto c = Cleanup([pid] { + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(WTERMSIG(status), SIGXCPU); + }); + + // Wait for the child to exit, but do not reap it. This will allow us to check + // its CPU usage while it is zombied. + EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT), + SyscallSucceeds()); + + absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid)); + // Following signals come every CPU second. + EXPECT_GE(cpu, kSoftLimit + absl::Seconds(1)); + + // Child did not make it to the hard limit. + // + // As above, should not flake. + EXPECT_LT(cpu, kHardLimit); +} + +TEST(TimerTest, ProcessKilledOnCPUHardLimit) { + struct sigaction new_action; + new_action.sa_handler = NoopSignalHandler; + new_action.sa_flags = 0; + sigemptyset(&new_action.sa_mask); + + constexpr absl::Duration kSoftLimit = absl::Seconds(1); + constexpr absl::Duration kHardLimit = absl::Seconds(3); + + struct rlimit cpu_limits; + cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit); + cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit); + + int pid = fork(); + MaybeSave(); + if (pid == 0) { + TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0); + MaybeSave(); + TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0); + MaybeSave(); + for (;;) { + } + } + auto c = Cleanup([pid] { + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(WTERMSIG(status), SIGKILL); + }); + + // Wait for the child to exit, but do not reap it. This will allow us to check + // its CPU usage while it is zombied. + EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT), + SyscallSucceeds()); + + absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid)); + EXPECT_GE(cpu, kHardLimit); +} + +// RAII type for a kernel "POSIX" interval timer. (The kernel provides system +// calls such as timer_create that behave very similarly, but not identically, +// to those described by timer_create(2); in particular, the kernel does not +// implement SIGEV_THREAD. glibc builds POSIX-compliant interval timers based on +// these kernel interval timers.) +// +// Compare implementation to FileDescriptor. +class IntervalTimer { + public: + IntervalTimer() = default; + + explicit IntervalTimer(int id) { set_id(id); } + + IntervalTimer(IntervalTimer&& orig) : id_(orig.release()) {} + + IntervalTimer& operator=(IntervalTimer&& orig) { + if (this == &orig) return *this; + reset(orig.release()); + return *this; + } + + IntervalTimer(const IntervalTimer& other) = delete; + IntervalTimer& operator=(const IntervalTimer& other) = delete; + + ~IntervalTimer() { reset(); } + + int get() const { return id_; } + + int release() { + int const id = id_; + id_ = -1; + return id; + } + + void reset() { reset(-1); } + + void reset(int id) { + if (id_ >= 0) { + TEST_PCHECK(syscall(SYS_timer_delete, id_) == 0); + MaybeSave(); + } + set_id(id); + } + + PosixErrorOr<struct itimerspec> Set( + int flags, const struct itimerspec& new_value) const { + struct itimerspec old_value = {}; + if (syscall(SYS_timer_settime, id_, flags, &new_value, &old_value) < 0) { + return PosixError(errno, "timer_settime"); + } + MaybeSave(); + return old_value; + } + + PosixErrorOr<struct itimerspec> Get() const { + struct itimerspec curr_value = {}; + if (syscall(SYS_timer_gettime, id_, &curr_value) < 0) { + return PosixError(errno, "timer_gettime"); + } + MaybeSave(); + return curr_value; + } + + PosixErrorOr<int> Overruns() const { + int rv = syscall(SYS_timer_getoverrun, id_); + if (rv < 0) { + return PosixError(errno, "timer_getoverrun"); + } + MaybeSave(); + return rv; + } + + private: + void set_id(int id) { id_ = std::max(id, -1); } + + // Kernel timer_t is int; glibc timer_t is void*. + int id_ = -1; +}; + +PosixErrorOr<IntervalTimer> TimerCreate(clockid_t clockid, + const struct sigevent& sev) { + int timerid; + if (syscall(SYS_timer_create, clockid, &sev, &timerid) < 0) { + return PosixError(errno, "timer_create"); + } + MaybeSave(); + return IntervalTimer(timerid); +} + +// See timerfd.cc:TimerSlack() for rationale. +constexpr absl::Duration kTimerSlack = absl::Milliseconds(500); + +TEST(IntervalTimerTest, IsInitiallyStopped) { + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_NONE; + const auto timer = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + const struct itimerspec its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get()); + EXPECT_EQ(0, its.it_value.tv_sec); + EXPECT_EQ(0, its.it_value.tv_nsec); +} + +TEST(IntervalTimerTest, SingleShotSilent) { + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_NONE; + const auto timer = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kDelay = absl::Seconds(1); + struct itimerspec its = {}; + its.it_value = absl::ToTimespec(kDelay); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // The timer should count down to 0 and stop since the interval is zero. No + // overruns should be counted. + absl::SleepFor(kDelay + kTimerSlack); + its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get()); + EXPECT_EQ(0, its.it_value.tv_sec); + EXPECT_EQ(0, its.it_value.tv_nsec); + EXPECT_THAT(timer.Overruns(), IsPosixErrorOkAndHolds(0)); +} + +TEST(IntervalTimerTest, PeriodicSilent) { + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_NONE; + const auto timer = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + absl::SleepFor(kPeriod * 3 + kTimerSlack); + + // The timer should still be running. + its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get()); + EXPECT_TRUE(its.it_value.tv_nsec != 0 || its.it_value.tv_sec != 0); + + // Timer expirations are not counted as overruns under SIGEV_NONE. + EXPECT_THAT(timer.Overruns(), IsPosixErrorOkAndHolds(0)); +} + +std::atomic<int> counted_signals; + +void IntervalTimerCountingSignalHandler(int sig, siginfo_t* info, + void* ucontext) { + counted_signals.fetch_add(1 + info->si_overrun); +} + +TEST(IntervalTimerTest, PeriodicGroupDirectedSignal) { + constexpr int kSigno = SIGUSR1; + constexpr int kSigvalue = 42; + + // Install our signal handler. + counted_signals.store(0); + struct sigaction sa = {}; + sa.sa_sigaction = IntervalTimerCountingSignalHandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + const auto scoped_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa)); + + // Ensure that kSigno is unblocked on at least one thread. + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, kSigno)); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + absl::SleepFor(kPeriod * kCycles + kTimerSlack); + EXPECT_GE(counted_signals.load(), kCycles); +} + +// From Linux's include/uapi/asm-generic/siginfo.h. +#ifndef sigev_notify_thread_id +#define sigev_notify_thread_id _sigev_un._tid +#endif + +TEST(IntervalTimerTest, PeriodicThreadDirectedSignal) { + constexpr int kSigno = SIGUSR1; + constexpr int kSigvalue = 42; + + // Block kSigno so that we can accumulate overruns. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask)); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + absl::SleepFor(kPeriod * kCycles + kTimerSlack); + + // At least kCycles expirations should have occurred, resulting in kCycles-1 + // overruns (the first expiration sent the signal successfully). + siginfo_t si; + struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration()); + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + EXPECT_EQ(si.si_code, SI_TIMER); + EXPECT_EQ(si.si_timerid, timer.get()); + EXPECT_GE(si.si_overrun, kCycles - 1); + EXPECT_EQ(si.si_int, kSigvalue); + + // Kill the timer, then drain any additional signal it may have enqueued. We + // can't do this before the preceding sigtimedwait because stopping or + // deleting the timer resets si_overrun to 0. + timer.reset(); + sigtimedwait(&mask, &si, &zero_ts); +} + +TEST(IntervalTimerTest, OtherThreadGroup) { + constexpr int kSigno = SIGUSR1; + + // Create a subprocess that does nothing until killed. + pid_t child_pid; + const auto sp = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec( + "/proc/self/exe", ExecveArray({"timers", "--timers_test_sleep"}), + ExecveArray(), &child_pid, nullptr)); + + // Verify that we can't create a timer that would send signals to it. + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_notify_thread_id = child_pid; + EXPECT_THAT(TimerCreate(CLOCK_MONOTONIC, sev), PosixErrorIs(EINVAL, _)); +} + +TEST(IntervalTimerTest, RealTimeSignalsAreNotDuplicated) { + const int kSigno = SIGRTMIN; + constexpr int kSigvalue = 42; + + // Block signo so that we can accumulate overruns. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + const auto scoped_sigmask = ScopedSignalMask(SIG_BLOCK, mask); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + sev.sigev_notify_thread_id = gettid(); + const auto timer = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + absl::SleepFor(kPeriod * kCycles + kTimerSlack); + + // Stop the timer so that no further signals are enqueued after sigtimedwait. + struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration()); + its.it_value = its.it_interval = zero_ts; + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // The timer should have sent only a single signal, even though the kernel + // supports enqueueing of multiple RT signals. + siginfo_t si; + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + EXPECT_EQ(si.si_code, SI_TIMER); + EXPECT_EQ(si.si_timerid, timer.get()); + // si_overrun was reset by timer_settime. + EXPECT_EQ(si.si_overrun, 0); + EXPECT_EQ(si.si_int, kSigvalue); + EXPECT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST(IntervalTimerTest, AlreadyPendingSignal) { + constexpr int kSigno = SIGUSR1; + constexpr int kSigvalue = 42; + + // Block kSigno so that we can accumulate overruns. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask)); + + // Send ourselves a signal, preventing the timer from enqueuing. + ASSERT_THAT(tgkill(getpid(), gettid(), kSigno), SyscallSucceeds()); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // End the sleep one cycle short; we will sleep for one more cycle below. + absl::SleepFor(kPeriod * (kCycles - 1)); + + // Dequeue the first signal, which we sent to ourselves with tgkill. + siginfo_t si; + struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration()); + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + // glibc sigtimedwait silently replaces SI_TKILL with SI_USER: + // sysdeps/unix/sysv/linux/sigtimedwait.c:__sigtimedwait(). This isn't + // documented, so we don't depend on it. + EXPECT_THAT(si.si_code, AnyOf(SI_USER, SI_TKILL)); + + // Sleep for 1 more cycle to give the timer time to send a signal. + absl::SleepFor(kPeriod + kTimerSlack); + + // At least kCycles expirations should have occurred, resulting in kCycles-1 + // overruns (the last expiration sent the signal successfully). + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + EXPECT_EQ(si.si_code, SI_TIMER); + EXPECT_EQ(si.si_timerid, timer.get()); + EXPECT_GE(si.si_overrun, kCycles - 1); + EXPECT_EQ(si.si_int, kSigvalue); + + // Kill the timer, then drain any additional signal it may have enqueued. We + // can't do this before the preceding sigtimedwait because stopping or + // deleting the timer resets si_overrun to 0. + timer.reset(); + sigtimedwait(&mask, &si, &zero_ts); +} + +TEST(IntervalTimerTest, IgnoredSignalCountsAsOverrun) { + constexpr int kSigno = SIGUSR1; + constexpr int kSigvalue = 42; + + // Ignore kSigno. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + const auto scoped_sigaction = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa)); + + // Unblock kSigno so that ignored signals will be discarded. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, kSigno); + auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, mask)); + + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_THREAD_ID; + sev.sigev_signo = kSigno; + sev.sigev_value.sival_int = kSigvalue; + sev.sigev_notify_thread_id = gettid(); + auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + + constexpr absl::Duration kPeriod = absl::Seconds(1); + constexpr int kCycles = 3; + struct itimerspec its = {}; + its.it_value = its.it_interval = absl::ToTimespec(kPeriod); + ASSERT_NO_ERRNO(timer.Set(0, its)); + + // End the sleep one cycle short; we will sleep for one more cycle below. + absl::SleepFor(kPeriod * (kCycles - 1)); + + // Block kSigno so that ignored signals will be enqueued. + scoped_sigmask.Release()(); + scoped_sigmask = ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask)); + + // Sleep for 1 more cycle to give the timer time to send a signal. + absl::SleepFor(kPeriod + kTimerSlack); + + // At least kCycles expirations should have occurred, resulting in kCycles-1 + // overruns (the last expiration sent the signal successfully). + siginfo_t si; + struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration()); + ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts), + SyscallSucceedsWithValue(kSigno)); + EXPECT_EQ(si.si_signo, kSigno); + EXPECT_EQ(si.si_code, SI_TIMER); + EXPECT_EQ(si.si_timerid, timer.get()); + EXPECT_GE(si.si_overrun, kCycles - 1); + EXPECT_EQ(si.si_int, kSigvalue); + + // Kill the timer, then drain any additional signal it may have enqueued. We + // can't do this before the preceding sigtimedwait because stopping or + // deleting the timer resets si_overrun to 0. + timer.reset(); + sigtimedwait(&mask, &si, &zero_ts); +} + +} // namespace +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (FLAGS_timers_test_sleep) { + while (true) { + absl::SleepFor(absl::Seconds(10)); + } + } + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/tkill.cc b/test/syscalls/linux/tkill.cc new file mode 100644 index 000000000..9842ccc9b --- /dev/null +++ b/test/syscalls/linux/tkill.cc @@ -0,0 +1,75 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <cerrno> +#include <csignal> + +#include "gtest/gtest.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +static int tkill(pid_t tid, int sig) { + int ret; + do { + // NOTE: tkill(2) could return EAGAIN for RT signals. + ret = syscall(SYS_tkill, tid, sig); + } while (ret == -1 && errno == EAGAIN); + return ret; +} + +TEST(TkillTest, InvalidTID) { + EXPECT_THAT(tkill(-1, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(tkill(0, 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TkillTest, ValidTID) { + EXPECT_THAT(tkill(gettid(), 0), SyscallSucceeds()); +} + +void SigHandler(int sig, siginfo_t* info, void* context) { + TEST_CHECK(sig == SIGRTMAX); + TEST_CHECK(info->si_pid == getpid()); + TEST_CHECK(info->si_uid == getuid()); + TEST_CHECK(info->si_code == SI_TKILL); +} + +// Test with a real signal. +TEST(TkillTest, ValidTIDAndRealSignal) { + struct sigaction sa; + sa.sa_sigaction = SigHandler; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + ASSERT_THAT(sigaction(SIGRTMAX, &sa, nullptr), SyscallSucceeds()); + // InitGoogle blocks all RT signals, so we need undo it. + sigset_t unblock; + sigemptyset(&unblock); + sigaddset(&unblock, SIGRTMAX); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &unblock, nullptr), SyscallSucceeds()); + EXPECT_THAT(tkill(gettid(), SIGRTMAX), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/truncate.cc b/test/syscalls/linux/truncate.cc new file mode 100644 index 000000000..2616a9147 --- /dev/null +++ b/test/syscalls/linux/truncate.cc @@ -0,0 +1,217 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <signal.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/vfs.h> +#include <time.h> +#include <unistd.h> +#include <iostream> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class FixtureTruncateTest : public FileTest { + void SetUp() override { FileTest::SetUp(); } +}; + +TEST_F(FixtureTruncateTest, Truncate) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + // Check that it starts at size zero. + struct stat buf; + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Stay at size zero. + EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Grow to ten bytes. + EXPECT_THAT(truncate(test_file_name_.c_str(), 10), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 10); + + // Can't be truncated to a negative number. + EXPECT_THAT(truncate(test_file_name_.c_str(), -1), + SyscallFailsWithErrno(EINVAL)); + + // Try growing past the file size limit. + sigset_t new_mask; + sigemptyset(&new_mask); + sigaddset(&new_mask, SIGXFSZ); + sigprocmask(SIG_BLOCK, &new_mask, nullptr); + struct timespec timelimit; + timelimit.tv_sec = 10; + timelimit.tv_nsec = 0; + + struct rlimit setlim; + setlim.rlim_cur = 1024; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + EXPECT_THAT(truncate(test_file_name_.c_str(), 1025), + SyscallFailsWithErrno(EFBIG)); + EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds()); + + // Shrink back down to zero. + EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); +} + +TEST_F(FixtureTruncateTest, Ftruncate) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + // Check that it starts at size zero. + struct stat buf; + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Stay at size zero. + EXPECT_THAT(ftruncate(test_file_fd_.get(), 0), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); + + // Grow to ten bytes. + EXPECT_THAT(ftruncate(test_file_fd_.get(), 10), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 10); + + // Can't be truncated to a negative number. + EXPECT_THAT(ftruncate(test_file_fd_.get(), -1), + SyscallFailsWithErrno(EINVAL)); + + // Try growing past the file size limit. + sigset_t new_mask; + sigemptyset(&new_mask); + sigaddset(&new_mask, SIGXFSZ); + sigprocmask(SIG_BLOCK, &new_mask, nullptr); + struct timespec timelimit; + timelimit.tv_sec = 10; + timelimit.tv_nsec = 0; + + struct rlimit setlim; + setlim.rlim_cur = 1024; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + EXPECT_THAT(ftruncate(test_file_fd_.get(), 1025), + SyscallFailsWithErrno(EFBIG)); + EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ); + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds()); + + // Shrink back down to zero. + EXPECT_THAT(ftruncate(test_file_fd_.get(), 0), SyscallSucceeds()); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 0); +} + +// Truncating a file down clears that portion of the file. +TEST_F(FixtureTruncateTest, FtruncateShrinkGrow) { + std::vector<char> buf(10, 'a'); + EXPECT_THAT(WriteFd(test_file_fd_.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Shrink then regrow the file. This should clear the second half of the file. + EXPECT_THAT(ftruncate(test_file_fd_.get(), 5), SyscallSucceeds()); + EXPECT_THAT(ftruncate(test_file_fd_.get(), 10), SyscallSucceeds()); + + EXPECT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), SyscallSucceeds()); + + std::vector<char> buf2(10); + EXPECT_THAT(ReadFd(test_file_fd_.get(), buf2.data(), buf2.size()), + SyscallSucceedsWithValue(buf2.size())); + + std::vector<char> expect = {'a', 'a', 'a', 'a', 'a', + '\0', '\0', '\0', '\0', '\0'}; + EXPECT_EQ(expect, buf2); +} + +TEST(TruncateTest, TruncateDir) { + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(truncate(temp_dir.path().c_str(), 0), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(TruncateTest, FtruncateDir) { + auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(temp_dir.path(), O_DIRECTORY | O_RDONLY)); + EXPECT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TruncateTest, TruncateNonWriteable) { + // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to + // always override write permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::string_view(), 0555 /* mode */)); + EXPECT_THAT(truncate(temp_file.path().c_str(), 0), + SyscallFailsWithErrno(EACCES)); +} + +TEST(TruncateTest, FtruncateNonWriteable) { + auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), absl::string_view(), 0555 /* mode */)); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY)); + EXPECT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + +TEST(TruncateTest, TruncateNonExist) { + EXPECT_THAT(truncate("/foo/bar", 0), SyscallFailsWithErrno(ENOENT)); +} + +TEST(TruncateTest, FtruncateVirtualTmp_NoRandomSave) { + auto temp_file = NewTempAbsPathInDir("/dev/shm"); + const DisableSave ds; // Incompatible permissions. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file, O_RDWR | O_CREAT | O_EXCL, 0)); + EXPECT_THAT(ftruncate(fd.get(), 100), SyscallSucceeds()); +} + +// NOTE: There are additional truncate(2)/ftruncate(2) tests in mknod.cc +// which are there to avoid running the tests on a number of different +// filesystems which may not support mknod. + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/udp_bind.cc b/test/syscalls/linux/udp_bind.cc new file mode 100644 index 000000000..419aaac76 --- /dev/null +++ b/test/syscalls/linux/udp_bind.cc @@ -0,0 +1,316 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +struct sockaddr_in_common { + sa_family_t sin_family; + in_port_t sin_port; +}; + +struct SendtoTestParam { + // Human readable description of test parameter. + std::string description; + + // Test is broken in gVisor, skip. + bool skip_on_gvisor; + + // Domain for the socket that will do the sending. + int send_domain; + + // Address to bind for the socket that will do the sending. + struct sockaddr_storage send_addr; + socklen_t send_addr_len; // 0 for unbound. + + // Address to connect to for the socket that will do the sending. + struct sockaddr_storage connect_addr; + socklen_t connect_addr_len; // 0 for no connection. + + // Domain for the socket that will do the receiving. + int recv_domain; + + // Address to bind for the socket that will do the receiving. + struct sockaddr_storage recv_addr; + socklen_t recv_addr_len; + + // Address to send to. + struct sockaddr_storage sendto_addr; + socklen_t sendto_addr_len; + + // Expected errno for the sendto call. + std::vector<int> sendto_errnos; // empty on success. +}; + +class SendtoTest : public ::testing::TestWithParam<SendtoTestParam> { + protected: + SendtoTest() { + // gUnit uses printf, so so will we. + printf("Testing with %s\n", GetParam().description.c_str()); + } +}; + +TEST_P(SendtoTest, Sendto) { + auto param = GetParam(); + + SKIP_IF(param.skip_on_gvisor && IsRunningOnGvisor()); + + const FileDescriptor s1 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(param.send_domain, SOCK_DGRAM, 0)); + const FileDescriptor s2 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(param.recv_domain, SOCK_DGRAM, 0)); + + if (param.send_addr_len > 0) { + ASSERT_THAT(bind(s1.get(), reinterpret_cast<sockaddr*>(¶m.send_addr), + param.send_addr_len), + SyscallSucceeds()); + } + + if (param.connect_addr_len > 0) { + ASSERT_THAT( + connect(s1.get(), reinterpret_cast<sockaddr*>(¶m.connect_addr), + param.connect_addr_len), + SyscallSucceeds()); + } + + ASSERT_THAT(bind(s2.get(), reinterpret_cast<sockaddr*>(¶m.recv_addr), + param.recv_addr_len), + SyscallSucceeds()); + + struct sockaddr_storage real_recv_addr = {}; + socklen_t real_recv_addr_len = param.recv_addr_len; + ASSERT_THAT( + getsockname(s2.get(), reinterpret_cast<sockaddr*>(&real_recv_addr), + &real_recv_addr_len), + SyscallSucceeds()); + + ASSERT_EQ(real_recv_addr_len, param.recv_addr_len); + + int recv_port = + reinterpret_cast<sockaddr_in_common*>(&real_recv_addr)->sin_port; + + struct sockaddr_storage sendto_addr = param.sendto_addr; + reinterpret_cast<sockaddr_in_common*>(&sendto_addr)->sin_port = recv_port; + + char buf[20] = {}; + if (!param.sendto_errnos.empty()) { + ASSERT_THAT(RetryEINTR(sendto)(s1.get(), buf, sizeof(buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr), + param.sendto_addr_len), + SyscallFailsWithErrno(ElementOf(param.sendto_errnos))); + return; + } + + ASSERT_THAT(RetryEINTR(sendto)(s1.get(), buf, sizeof(buf), 0, + reinterpret_cast<sockaddr*>(&sendto_addr), + param.sendto_addr_len), + SyscallSucceedsWithValue(sizeof(buf))); + + struct sockaddr_storage got_addr = {}; + socklen_t got_addr_len = sizeof(sockaddr_storage); + ASSERT_THAT(RetryEINTR(recvfrom)(s2.get(), buf, sizeof(buf), 0, + reinterpret_cast<sockaddr*>(&got_addr), + &got_addr_len), + SyscallSucceedsWithValue(sizeof(buf))); + + ASSERT_GT(got_addr_len, sizeof(sockaddr_in_common)); + int got_port = reinterpret_cast<sockaddr_in_common*>(&got_addr)->sin_port; + + struct sockaddr_storage sender_addr = {}; + socklen_t sender_addr_len = sizeof(sockaddr_storage); + ASSERT_THAT(getsockname(s1.get(), reinterpret_cast<sockaddr*>(&sender_addr), + &sender_addr_len), + SyscallSucceeds()); + + ASSERT_GT(sender_addr_len, sizeof(sockaddr_in_common)); + int sender_port = + reinterpret_cast<sockaddr_in_common*>(&sender_addr)->sin_port; + + EXPECT_EQ(got_port, sender_port); +} + +socklen_t Ipv4Addr(sockaddr_storage* addr, int port = 0) { + auto addr4 = reinterpret_cast<sockaddr_in*>(addr); + addr4->sin_family = AF_INET; + addr4->sin_port = port; + inet_pton(AF_INET, "127.0.0.1", &addr4->sin_addr.s_addr); + return sizeof(struct sockaddr_in); +} + +socklen_t Ipv6Addr(sockaddr_storage* addr, int port = 0) { + auto addr6 = reinterpret_cast<sockaddr_in6*>(addr); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = port; + inet_pton(AF_INET6, "::1", &addr6->sin6_addr.s6_addr); + return sizeof(struct sockaddr_in6); +} + +socklen_t Ipv4MappedIpv6Addr(sockaddr_storage* addr, int port = 0) { + auto addr6 = reinterpret_cast<sockaddr_in6*>(addr); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = port; + inet_pton(AF_INET6, "::ffff:127.0.0.1", &addr6->sin6_addr.s6_addr); + return sizeof(struct sockaddr_in6); +} + +INSTANTIATE_TEST_CASE_P( + UdpBindTest, SendtoTest, + ::testing::Values( + []() { + SendtoTestParam param = {}; + param.description = "IPv4 mapped IPv6 sendto IPv4 mapped IPv6"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv4MappedIpv6Addr(¶m.send_addr); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv4MappedIpv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv6 sendto IPv6"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv6Addr(¶m.send_addr); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv4 sendto IPv4"; + param.send_domain = AF_INET; + param.send_addr_len = Ipv4Addr(¶m.send_addr); + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv4 mapped IPv6 sendto IPv4"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv4MappedIpv6Addr(¶m.send_addr); + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv4 sendto IPv4 mapped IPv6"; + param.send_domain = AF_INET; + param.send_addr_len = Ipv4Addr(¶m.send_addr); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv4MappedIpv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "unbound IPv6 sendto IPv4 mapped IPv6"; + param.send_domain = AF_INET6; + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv4MappedIpv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "unbound IPv6 sendto IPv4"; + param.send_domain = AF_INET6; + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv6 sendto IPv4"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv6Addr(¶m.send_addr); + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + param.sendto_errnos = {ENETUNREACH}; + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "IPv4 mapped IPv6 sendto IPv6"; + param.send_domain = AF_INET6; + param.send_addr_len = Ipv4MappedIpv6Addr(¶m.send_addr); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv6Addr(¶m.sendto_addr); + param.sendto_errnos = {EAFNOSUPPORT}; + // The errno returned changed in Linux commit c8e6ad0829a723. + param.sendto_errnos = {EINVAL, EAFNOSUPPORT}; + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "connected IPv4 mapped IPv6 sendto IPv6"; + param.send_domain = AF_INET6; + param.connect_addr_len = + Ipv4MappedIpv6Addr(¶m.connect_addr, 5000); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv6Addr(¶m.sendto_addr); + // The errno returned changed in Linux commit c8e6ad0829a723. + param.sendto_errnos = {EINVAL, EAFNOSUPPORT}; + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "connected IPv6 sendto IPv4 mapped IPv6"; + // TODO: Determine if this inconsistent behavior is worth + // implementing. + param.skip_on_gvisor = true; + param.send_domain = AF_INET6; + param.connect_addr_len = Ipv6Addr(¶m.connect_addr, 5000); + param.recv_domain = AF_INET6; + param.recv_addr_len = Ipv4MappedIpv6Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }(), + []() { + SendtoTestParam param = {}; + param.description = "connected IPv6 sendto IPv4"; + // TODO: Determine if this inconsistent behavior is worth + // implementing. + param.skip_on_gvisor = true; + param.send_domain = AF_INET6; + param.connect_addr_len = Ipv6Addr(¶m.connect_addr, 5000); + param.recv_domain = AF_INET; + param.recv_addr_len = Ipv4Addr(¶m.recv_addr); + param.sendto_addr_len = Ipv4MappedIpv6Addr(¶m.sendto_addr); + return param; + }())); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/udp_socket.cc b/test/syscalls/linux/udp_socket.cc new file mode 100644 index 000000000..a02b418a3 --- /dev/null +++ b/test/syscalls/linux/udp_socket.cc @@ -0,0 +1,941 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <linux/errqueue.h> +#include <netinet/in.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// The initial port to be be used on gvisor. +constexpr int TestPort = 40000; + +// Fixture for tests parameterized by the address family to use (AF_INET and +// AF_INET6) when creating sockets. +class UdpSocketTest : public ::testing::TestWithParam<int> { + protected: + // Creates two sockets that will be used by test cases. + void SetUp() override; + + // Closes the sockets created by SetUp(). + void TearDown() override { + EXPECT_THAT(close(s_), SyscallSucceeds()); + EXPECT_THAT(close(t_), SyscallSucceeds()); + + for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { + ASSERT_NO_ERRNO(FreeAvailablePort(ports_[i])); + } + } + + // First UDP socket. + int s_; + + // Second UDP socket. + int t_; + + // The length of the socket address. + socklen_t addrlen_; + + // Initialized address pointing to loopback and port TestPort+i. + struct sockaddr* addr_[3]; + + // Initialize "any" address. + struct sockaddr* anyaddr_; + + // Used ports. + int ports_[3]; + + private: + // Storage for the loopback addresses. + struct sockaddr_storage addr_storage_[3]; + + // Storage for the "any" address. + struct sockaddr_storage anyaddr_storage_; +}; + +// Gets a pointer to the port component of the given address. +uint16_t* Port(struct sockaddr_storage* addr) { + switch (addr->ss_family) { + case AF_INET: { + auto sin = reinterpret_cast<struct sockaddr_in*>(addr); + return &sin->sin_port; + } + case AF_INET6: { + auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr); + return &sin6->sin6_port; + } + } + + return nullptr; +} + +void UdpSocketTest::SetUp() { + ASSERT_THAT(s_ = socket(GetParam(), SOCK_DGRAM, IPPROTO_UDP), + SyscallSucceeds()); + + ASSERT_THAT(t_ = socket(GetParam(), SOCK_DGRAM, IPPROTO_UDP), + SyscallSucceeds()); + + memset(&anyaddr_storage_, 0, sizeof(anyaddr_storage_)); + anyaddr_ = reinterpret_cast<struct sockaddr*>(&anyaddr_storage_); + anyaddr_->sa_family = GetParam(); + + // Initialize address-family-specific values. + switch (GetParam()) { + case AF_INET: { + auto sin = reinterpret_cast<struct sockaddr_in*>(&anyaddr_storage_); + addrlen_ = sizeof(*sin); + sin->sin_addr.s_addr = htonl(INADDR_ANY); + break; + } + case AF_INET6: { + auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&anyaddr_storage_); + addrlen_ = sizeof(*sin6); + sin6->sin6_addr = in6addr_any; + break; + } + } + + if (gvisor::testing::IsRunningOnGvisor()) { + for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { + ports_[i] = TestPort + i; + } + } else { + // When not under gvisor, use utility function to pick port. Assert that + // all ports are different. + std::string error; + for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { + // Find an unused port, we specify port 0 to allow the kernel to provide + // the port. + bool unique = true; + do { + ports_[i] = ASSERT_NO_ERRNO_AND_VALUE(PortAvailable( + 0, AddressFamily::kDualStack, SocketType::kUdp, false)); + ASSERT_GT(ports_[i], 0); + for (size_t j = 0; j < i; ++j) { + if (ports_[j] == ports_[i]) { + unique = false; + break; + } + } + } while (!unique); + } + } + + // Initialize the sockaddrs. + for (size_t i = 0; i < ABSL_ARRAYSIZE(addr_); ++i) { + memset(&addr_storage_[i], 0, sizeof(addr_storage_[i])); + + addr_[i] = reinterpret_cast<struct sockaddr*>(&addr_storage_[i]); + addr_[i]->sa_family = GetParam(); + + switch (GetParam()) { + case AF_INET: { + auto sin = reinterpret_cast<struct sockaddr_in*>(addr_[i]); + sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + sin->sin_port = htons(ports_[i]); + break; + } + case AF_INET6: { + auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr_[i]); + sin6->sin6_addr = in6addr_loopback; + sin6->sin6_port = htons(ports_[i]); + break; + } + } + } +} + +TEST_P(UdpSocketTest, Creation) { + int s_; + + ASSERT_THAT(s_ = socket(GetParam(), SOCK_DGRAM, IPPROTO_UDP), + SyscallSucceeds()); + EXPECT_THAT(close(s_), SyscallSucceeds()); + + ASSERT_THAT(s_ = socket(GetParam(), SOCK_DGRAM, 0), SyscallSucceeds()); + EXPECT_THAT(close(s_), SyscallSucceeds()); + + ASSERT_THAT(s_ = socket(GetParam(), SOCK_STREAM, IPPROTO_UDP), + SyscallFails()); +} + +TEST_P(UdpSocketTest, Getsockname) { + // Check that we're not bound. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, anyaddr_, addrlen_), 0); + + // Bind, then check that we get the right address. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); +} + +TEST_P(UdpSocketTest, Getpeername) { + // Check that we're not connected. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); + + // Connect, then check that we get the right address. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + addrlen = sizeof(addr); + EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); +} + +TEST_P(UdpSocketTest, SendNotConnected) { + // Do send & write, they must fail. + char buf[512]; + EXPECT_THAT(send(s_, buf, sizeof(buf), 0), + SyscallFailsWithErrno(EDESTADDRREQ)); + + EXPECT_THAT(write(s_, buf, sizeof(buf)), SyscallFailsWithErrno(EDESTADDRREQ)); + + // Use sendto. + ASSERT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Check that we're bound now. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_NE(*Port(&addr), 0); +} + +TEST_P(UdpSocketTest, ConnectBinds) { + // Connect the socket. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Check that we're bound now. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_NE(*Port(&addr), 0); +} + +TEST_P(UdpSocketTest, ReceiveNotBound) { + char buf[512]; + EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, Bind) { + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Try to bind again. + EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL)); + + // Check that we're still bound to the original address. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); +} + +TEST_P(UdpSocketTest, BindInUse) { + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Try to bind again. + EXPECT_THAT(bind(t_, addr_[0], addrlen_), SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(UdpSocketTest, ReceiveAfterConnect) { + // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds()); + + // Get the address s_ was bound to during connect. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + + // Send from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, + reinterpret_cast<sockaddr*>(&addr), addrlen), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, Connect) { + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Check that we're connected to the right peer. + struct sockaddr_storage peer; + socklen_t peerlen = sizeof(peer); + EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&peer), &peerlen), + SyscallSucceeds()); + EXPECT_EQ(peerlen, addrlen_); + EXPECT_EQ(memcmp(&peer, addr_[0], addrlen_), 0); + + // Try to bind after connect. + EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL)); + + // Try to connect again. + EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + + // Check that peer name changed. + peerlen = sizeof(peer); + EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&peer), &peerlen), + SyscallSucceeds()); + EXPECT_EQ(peerlen, addrlen_); + EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); +} + +TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) { + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send to a different destination than we're connected to. + char buf[512]; + EXPECT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[1], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST_P(UdpSocketTest, ZerolengthWriteAllowed) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+1. + ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + + char buf[3]; + // Send zero length packet from s_ to t_. + ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); + // Receive the packet. + char received[3]; + EXPECT_THAT(read(t_, received, sizeof(received)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+1. + ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Set t_ to non-blocking. + int opts = 0; + ASSERT_THAT(opts = fcntl(t_, F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(t_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds()); + + char buf[3]; + // Send zero length packet from s_ to t_. + ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); + // Receive the packet. + char received[3]; + EXPECT_THAT(read(t_, received, sizeof(received)), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(read(t_, received, sizeof(received)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(UdpSocketTest, SendAndReceiveNotConnected) { + // Bind s_ to loopback. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send some data to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, SendAndReceiveConnected) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+1. + ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Send some data from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, ReceiveFromNotConnected) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+2. + ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); + + // Send some data from t_ to s_. + char buf[512]; + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Check that the data isn't_ received because it was sent from a different + // address than we're connected. + EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, ReceiveBeforeConnect) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+2. + ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); + + // Send some data from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Connect to loopback:TestPort+1. + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Receive the data. It works because it was sent before the connect. + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); + + // Send again. This time it should not be received. + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, ReceiveFrom) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+1. + ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Send some data from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data and sender address. + char received[512]; + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(recvfrom(s_, received, sizeof(received), 0, + reinterpret_cast<sockaddr*>(&addr), &addrlen), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0); +} + +TEST_P(UdpSocketTest, Listen) { + ASSERT_THAT(listen(s_, SOMAXCONN), SyscallFailsWithErrno(EOPNOTSUPP)); +} + +TEST_P(UdpSocketTest, Accept) { + ASSERT_THAT(accept(s_, nullptr, nullptr), SyscallFailsWithErrno(EOPNOTSUPP)); +} + +// This test validates that a read shutdown with pending data allows the read +// to proceed with the data before returning EAGAIN. +TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) { + char received[512]; + + // Bind t_ to loopback:TestPort+2. + ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Connect the socket, then try to shutdown again. + ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + + // Verify that we get EWOULDBLOCK when there is nothing to read. + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + const char* buf = "abc"; + EXPECT_THAT(write(t_, buf, 3), SyscallSucceedsWithValue(3)); + + int opts = 0; + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(s_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds()); + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + ASSERT_NE(opts & O_NONBLOCK, 0); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + // We should get the data even though read has been shutdown. + EXPECT_THAT(recv(s_, received, 2, 0), SyscallSucceedsWithValue(2)); + + // Because we read less than the entire packet length, since it's a packet + // based socket any subsequent reads should return EWOULDBLOCK. + EXPECT_THAT(recv(s_, received, 1, 0), SyscallFailsWithErrno(EWOULDBLOCK)); +} + +// This test is validating that even after a socket is shutdown if it's +// reconnected it will reset the shutdown state. +TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) { + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Connect the socket, then try to shutdown again. + ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, ReadShutdown) { + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Connect the socket, then try to shutdown again. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UdpSocketTest, ReadShutdownDifferentThread) { + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Connect the socket, then shutdown from another thread. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + ScopedThread t([&] { + absl::SleepFor(absl::Milliseconds(200)); + EXPECT_THAT(shutdown(this->s_, SHUT_RD), SyscallSucceeds()); + }); + EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(0)); + t.Join(); + + EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UdpSocketTest, WriteShutdown) { + EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds()); +} + +TEST_P(UdpSocketTest, SynchronousReceive) { + // Bind s_ to loopback. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send some data to s_ from another thread. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + // Receive the data prior to actually starting the other thread. + char received[512]; + EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Start the thread. + ScopedThread t([&] { + absl::SleepFor(absl::Milliseconds(200)); + ASSERT_THAT( + sendto(this->t_, buf, sizeof(buf), 0, this->addr_[0], this->addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + }); + + EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(512)); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, BoundaryPreserved_SendRecv) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send 3 packets from t_ to s_. + constexpr int psize = 100; + char buf[3 * psize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 3; ++i) { + ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(psize)); + } + + // Receive the data as 3 separate packets. + char received[6 * psize]; + for (int i = 0; i < 3; ++i) { + EXPECT_THAT(recv(s_, received + i * psize, 3 * psize, 0), + SyscallSucceedsWithValue(psize)); + } + EXPECT_EQ(memcmp(buf, received, 3 * psize), 0); +} + +TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Direct writes from t_ to s_. + ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send 2 packets from t_ to s_, where each packet's data consists of 2 + // discontiguous iovecs. + constexpr size_t kPieceSize = 100; + char buf[4 * kPieceSize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 2; i++) { + struct iovec iov[2]; + for (int j = 0; j < 2; j++) { + iov[j].iov_base = reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + ASSERT_THAT(writev(t_, iov, 2), SyscallSucceedsWithValue(2 * kPieceSize)); + } + + // Receive the data as 2 separate packets. + char received[6 * kPieceSize]; + for (int i = 0; i < 2; i++) { + struct iovec iov[3]; + for (int j = 0; j < 3; j++) { + iov[j].iov_base = reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + ASSERT_THAT(readv(s_, iov, 3), SyscallSucceedsWithValue(2 * kPieceSize)); + } + EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); +} + +TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send 2 packets from t_ to s_, where each packet's data consists of 2 + // discontiguous iovecs. + constexpr size_t kPieceSize = 100; + char buf[4 * kPieceSize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 2; i++) { + struct iovec iov[2]; + for (int j = 0; j < 2; j++) { + iov[j].iov_base = reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + struct msghdr msg = {}; + msg.msg_name = addr_[0]; + msg.msg_namelen = addrlen_; + msg.msg_iov = iov; + msg.msg_iovlen = 2; + ASSERT_THAT(sendmsg(t_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize)); + } + + // Receive the data as 2 separate packets. + char received[6 * kPieceSize]; + for (int i = 0; i < 2; i++) { + struct iovec iov[3]; + for (int j = 0; j < 3; j++) { + iov[j].iov_base = reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + struct msghdr msg = {}; + msg.msg_iov = iov; + msg.msg_iovlen = 3; + ASSERT_THAT(recvmsg(s_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize)); + } + EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); +} + +TEST_P(UdpSocketTest, FIONREADShutdown) { + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // A UDP socket must be connected before it can be shutdown. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); +} + +TEST_P(UdpSocketTest, FIONREADWriteShutdown) { + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // A UDP socket must be connected before it can be shutdown. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + const char str[] = "abc"; + ASSERT_THAT(send(s_, str, sizeof(str), 0), + SyscallSucceedsWithValue(sizeof(str))); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, sizeof(str)); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, sizeof(str)); +} + +TEST_P(UdpSocketTest, FIONREAD) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Check that the bound socket with an empty buffer reports an empty first + // packet. + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Send 3 packets from t_ to s_. + constexpr int psize = 100; + char buf[3 * psize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 3; ++i) { + ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(psize)); + + // Check that regardless of how many packets are in the queue, the size + // reported is that of a single packet. + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, psize); + } +} + +TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Check that the bound socket with an empty buffer reports an empty first + // packet. + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Send 3 packets from t_ to s_. + constexpr int psize = 100; + char buf[3 * psize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 3; ++i) { + ASSERT_THAT(sendto(t_, buf + i * psize, 0, 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(0)); + + // Check that regardless of how many packets are in the queue, the size + // reported is that of a single packet. + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + } +} + +TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) { + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // A UDP socket must be connected before it can be shutdown. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + const char str[] = "abc"; + ASSERT_THAT(send(s_, str, 0, 0), SyscallSucceedsWithValue(0)); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); +} + +TEST_P(UdpSocketTest, ErrorQueue) { + char cmsgbuf[CMSG_SPACE(sizeof(sock_extended_err))]; + msghdr msg; + memset(&msg, 0, sizeof(msg)); + iovec iov; + memset(&iov, 0, sizeof(iov)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT. + EXPECT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_ERRQUEUE), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(UdpSocketTest, SoTimestamp) { + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + int v = 1; + EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), + SyscallSucceeds()); + + char buf[3]; + // Send zero length packet from t_ to s_. + ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + + char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))]; + msghdr msg; + memset(&msg, 0, sizeof(msg)); + iovec iov; + memset(&iov, 0, sizeof(iov)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, 0), SyscallSucceedsWithValue(0)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval))); + + struct timeval tv = {}; + memcpy(&tv, CMSG_DATA(cmsg), sizeof(struct timeval)); + + ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); +} + +TEST_P(UdpSocketTest, WriteShutdownNotConnected) { + EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); +} + +INSTANTIATE_TEST_CASE_P(AllInetTests, UdpSocketTest, + ::testing::Values(AF_INET, AF_INET6)); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/uidgid.cc b/test/syscalls/linux/uidgid.cc new file mode 100644 index 000000000..c0c1f2960 --- /dev/null +++ b/test/syscalls/linux/uidgid.cc @@ -0,0 +1,277 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <grp.h> +#include <sys/types.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "test/util/capability_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +DEFINE_int32(scratch_uid1, 65534, "first scratch UID"); +DEFINE_int32(scratch_uid2, 65533, "second scratch UID"); +DEFINE_int32(scratch_gid1, 65534, "first scratch GID"); +DEFINE_int32(scratch_gid2, 65533, "second scratch GID"); + +using ::testing::UnorderedElementsAreArray; + +namespace gvisor { +namespace testing { + +namespace { + +TEST(UidGidTest, Getuid) { + uid_t ruid, euid, suid; + EXPECT_THAT(getresuid(&ruid, &euid, &suid), SyscallSucceeds()); + EXPECT_THAT(getuid(), SyscallSucceedsWithValue(ruid)); + EXPECT_THAT(geteuid(), SyscallSucceedsWithValue(euid)); +} + +TEST(UidGidTest, Getgid) { + gid_t rgid, egid, sgid; + EXPECT_THAT(getresgid(&rgid, &egid, &sgid), SyscallSucceeds()); + EXPECT_THAT(getgid(), SyscallSucceedsWithValue(rgid)); + EXPECT_THAT(getegid(), SyscallSucceedsWithValue(egid)); +} + +TEST(UidGidTest, Getgroups) { + // "If size is zero, list is not modified, but the total number of + // supplementary group IDs for the process is returned." - getgroups(2) + int nr_groups; + ASSERT_THAT(nr_groups = getgroups(0, nullptr), SyscallSucceeds()); + std::vector<gid_t> list(nr_groups); + EXPECT_THAT(getgroups(list.size(), list.data()), SyscallSucceeds()); + + // "EINVAL: size is less than the number of supplementary group IDs, but is + // not zero." + EXPECT_THAT(getgroups(-1, nullptr), SyscallFailsWithErrno(EINVAL)); + + // Testing for EFAULT requires actually having groups, which isn't guaranteed + // here; see the setgroups test below. +} + +// If the caller's real/effective/saved user/group IDs are all 0, IsRoot returns +// true. Otherwise IsRoot logs an explanatory message and returns false. +PosixErrorOr<bool> IsRoot() { + uid_t ruid, euid, suid; + int rc = getresuid(&ruid, &euid, &suid); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "getresuid"); + } + if (ruid != 0 || euid != 0 || suid != 0) { + return false; + } + gid_t rgid, egid, sgid; + rc = getresgid(&rgid, &egid, &sgid); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "getresgid"); + } + if (rgid != 0 || egid != 0 || sgid != 0) { + return false; + } + return true; +} + +// Checks that the calling process' real/effective/saved user IDs are +// ruid/euid/suid respectively. +PosixError CheckUIDs(uid_t ruid, uid_t euid, uid_t suid) { + uid_t actual_ruid, actual_euid, actual_suid; + int rc = getresuid(&actual_ruid, &actual_euid, &actual_suid); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "getresuid"); + } + if (ruid != actual_ruid || euid != actual_euid || suid != actual_suid) { + return PosixError( + EPERM, absl::StrCat( + "incorrect user IDs: got (", + absl::StrJoin({actual_ruid, actual_euid, actual_suid}, ", "), + ", wanted (", absl::StrJoin({ruid, euid, suid}, ", "), ")")); + } + return NoError(); +} + +PosixError CheckGIDs(gid_t rgid, gid_t egid, gid_t sgid) { + gid_t actual_rgid, actual_egid, actual_sgid; + int rc = getresgid(&actual_rgid, &actual_egid, &actual_sgid); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "getresgid"); + } + if (rgid != actual_rgid || egid != actual_egid || sgid != actual_sgid) { + return PosixError( + EPERM, absl::StrCat( + "incorrect group IDs: got (", + absl::StrJoin({actual_rgid, actual_egid, actual_sgid}, ", "), + ", wanted (", absl::StrJoin({rgid, egid, sgid}, ", "), ")")); + } + return NoError(); +} + +// N.B. These tests may break horribly unless run via a gVisor test runner, +// because changing UID in one test may forfeit permissions required by other +// tests. (The test runner runs each test in a separate process.) + +TEST(UidGidRootTest, Setuid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + EXPECT_THAT(syscall(SYS_setuid, -1), SyscallFailsWithErrno(EINVAL)); + + const uid_t uid = FLAGS_scratch_uid1; + EXPECT_THAT(syscall(SYS_setuid, uid), SyscallSucceeds()); + // "If the effective UID of the caller is root (more precisely: if the + // caller has the CAP_SETUID capability), the real UID and saved set-user-ID + // are also set." - setuid(2) + EXPECT_NO_ERRNO(CheckUIDs(uid, uid, uid)); + }); +} + +TEST(UidGidRootTest, Setgid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + EXPECT_THAT(setgid(-1), SyscallFailsWithErrno(EINVAL)); + + const gid_t gid = FLAGS_scratch_gid1; + ASSERT_THAT(setgid(gid), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(gid, gid, gid)); +} + +TEST(UidGidRootTest, SetgidNotFromThreadGroupLeader) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + const gid_t gid = FLAGS_scratch_gid1; + // NOTE: Do setgid in a separate thread so that we can test if + // info.si_pid is set correctly. + ScopedThread([gid] { ASSERT_THAT(setgid(gid), SyscallSucceeds()); }); + EXPECT_NO_ERRNO(CheckGIDs(gid, gid, gid)); +} + +TEST(UidGidRootTest, Setreuid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + // "Supplying a value of -1 for either the real or effective user ID forces + // the system to leave that ID unchanged." - setreuid(2) + EXPECT_THAT(setreuid(-1, -1), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckUIDs(0, 0, 0)); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + const uid_t ruid = FLAGS_scratch_uid1; + const uid_t euid = FLAGS_scratch_uid2; + + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. posix threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + EXPECT_THAT(syscall(SYS_setreuid, ruid, euid), SyscallSucceeds()); + + // "If the real user ID is set or the effective user ID is set to a value + // not equal to the previous real user ID, the saved set-user-ID will be set + // to the new effective user ID." - setreuid(2) + EXPECT_NO_ERRNO(CheckUIDs(ruid, euid, euid)); + }); +} + +TEST(UidGidRootTest, Setregid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + EXPECT_THAT(setregid(-1, -1), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(0, 0, 0)); + + const gid_t rgid = FLAGS_scratch_gid1; + const gid_t egid = FLAGS_scratch_gid2; + ASSERT_THAT(setregid(rgid, egid), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(rgid, egid, egid)); +} + +TEST(UidGidRootTest, Setresuid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + // "If one of the arguments equals -1, the corresponding value is not + // changed." - setresuid(2) + EXPECT_THAT(setresuid(-1, -1, -1), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckUIDs(0, 0, 0)); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + const uid_t ruid = 12345; + const uid_t euid = 23456; + const uid_t suid = 34567; + + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. posix threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + EXPECT_THAT(syscall(SYS_setresuid, ruid, euid, suid), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckUIDs(ruid, euid, suid)); + }); +} + +TEST(UidGidRootTest, Setresgid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + EXPECT_THAT(setresgid(-1, -1, -1), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(0, 0, 0)); + + const gid_t rgid = 12345; + const gid_t egid = 23456; + const gid_t sgid = 34567; + ASSERT_THAT(setresgid(rgid, egid, sgid), SyscallSucceeds()); + EXPECT_NO_ERRNO(CheckGIDs(rgid, egid, sgid)); +} + +TEST(UidGidRootTest, Setgroups) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + std::vector<gid_t> list = {123, 500}; + ASSERT_THAT(setgroups(list.size(), list.data()), SyscallSucceeds()); + std::vector<gid_t> list2(list.size()); + ASSERT_THAT(getgroups(list2.size(), list2.data()), SyscallSucceeds()); + EXPECT_THAT(list, UnorderedElementsAreArray(list2)); + + // "EFAULT: list has an invalid address." + EXPECT_THAT(getgroups(100, reinterpret_cast<gid_t*>(-1)), + SyscallFailsWithErrno(EFAULT)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/uname.cc b/test/syscalls/linux/uname.cc new file mode 100644 index 000000000..d22a34bd7 --- /dev/null +++ b/test/syscalls/linux/uname.cc @@ -0,0 +1,99 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sched.h> +#include <sys/utsname.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" +#include "test/util/capability_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(UnameTest, Sanity) { + struct utsname buf; + ASSERT_THAT(uname(&buf), SyscallSucceeds()); + EXPECT_NE(strlen(buf.release), 0); + EXPECT_NE(strlen(buf.version), 0); + EXPECT_NE(strlen(buf.machine), 0); + EXPECT_NE(strlen(buf.sysname), 0); + EXPECT_NE(strlen(buf.nodename), 0); + EXPECT_NE(strlen(buf.domainname), 0); +} + +TEST(UnameTest, SetNames) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + constexpr char kHostname[] = "wubbalubba"; + ASSERT_THAT(sethostname(kHostname, sizeof(kHostname)), SyscallSucceeds()); + + constexpr char kDomainname[] = "dubdub.com"; + ASSERT_THAT(setdomainname(kDomainname, sizeof(kDomainname)), + SyscallSucceeds()); + + struct utsname buf; + EXPECT_THAT(uname(&buf), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(buf.nodename), kHostname); + EXPECT_EQ(absl::string_view(buf.domainname), kDomainname); + + // These should just be glibc wrappers that also call uname(2). + char hostname[65]; + EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(hostname), kHostname); + + char domainname[65]; + EXPECT_THAT(getdomainname(domainname, sizeof(domainname)), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(domainname), kDomainname); +} + +TEST(UnameTest, UnprivilegedSetNames) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) { + EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false)); + } + + EXPECT_THAT(sethostname("", 0), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(setdomainname("", 0), SyscallFailsWithErrno(EPERM)); +} + +TEST(UnameTest, UnshareUTS) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + struct utsname init; + ASSERT_THAT(uname(&init), SyscallSucceeds()); + + ScopedThread([&]() { + EXPECT_THAT(unshare(CLONE_NEWUTS), SyscallSucceeds()); + + constexpr char kHostname[] = "wubbalubba"; + EXPECT_THAT(sethostname(kHostname, sizeof(kHostname)), SyscallSucceeds()); + + char hostname[65]; + EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds()); + }); + + struct utsname after; + EXPECT_THAT(uname(&after), SyscallSucceeds()); + EXPECT_EQ(absl::string_view(after.nodename), init.nodename); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/unix_domain_socket_test_util.cc b/test/syscalls/linux/unix_domain_socket_test_util.cc new file mode 100644 index 000000000..2d7a530b9 --- /dev/null +++ b/test/syscalls/linux/unix_domain_socket_test_util.cc @@ -0,0 +1,346 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/unix_domain_socket_test_util.h" + +#include <sys/un.h> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +std::string DescribeUnixDomainSocketType(int type) { + const char* type_str = nullptr; + switch (type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC)) { + case SOCK_STREAM: + type_str = "SOCK_STREAM"; + break; + case SOCK_DGRAM: + type_str = "SOCK_DGRAM"; + break; + case SOCK_SEQPACKET: + type_str = "SOCK_SEQPACKET"; + break; + } + if (!type_str) { + return absl::StrCat("Unix domain socket with unknown type ", type); + } else { + return absl::StrCat(((type & SOCK_NONBLOCK) != 0) ? "non-blocking " : "", + ((type & SOCK_CLOEXEC) != 0) ? "close-on-exec " : "", + type_str, " Unix domain socket"); + } +} + +SocketPairKind UnixDomainSocketPair(int type) { + return SocketPairKind{DescribeUnixDomainSocketType(type), + SyscallSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind FilesystemBoundUnixDomainSocketPair(int type) { + std::string description = absl::StrCat(DescribeUnixDomainSocketType(type), + " created with filesystem binding"); + if ((type & SOCK_DGRAM) == SOCK_DGRAM) { + return SocketPairKind{ + description, + FilesystemBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)}; + } + return SocketPairKind{ + description, FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind AbstractBoundUnixDomainSocketPair(int type) { + std::string description = absl::StrCat(DescribeUnixDomainSocketType(type), + " created with abstract namespace binding"); + if ((type & SOCK_DGRAM) == SOCK_DGRAM) { + return SocketPairKind{ + description, + AbstractBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)}; + } + return SocketPairKind{description, + AbstractAcceptBindSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind SocketpairGoferUnixDomainSocketPair(int type) { + std::string description = absl::StrCat(DescribeUnixDomainSocketType(type), + " created with the socketpair gofer"); + return SocketPairKind{description, + SocketpairGoferSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind SocketpairGoferFileSocketPair(int type) { + std::string description = + absl::StrCat(((type & O_NONBLOCK) != 0) ? "non-blocking " : "", + ((type & O_CLOEXEC) != 0) ? "close-on-exec " : "", + "file socket created with the socketpair gofer"); + return SocketPairKind{description, + SocketpairGoferFileSocketPairCreator(type)}; +} + +SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type) { + return SocketPairKind{absl::StrCat(DescribeUnixDomainSocketType(type), + " unbound with a filesystem address"), + FilesystemUnboundSocketPairCreator(AF_UNIX, type, 0)}; +} + +SocketPairKind AbstractUnboundUnixDomainSocketPair(int type) { + return SocketPairKind{ + absl::StrCat(DescribeUnixDomainSocketType(type), + " unbound with an abstract namespace address"), + AbstractUnboundSocketPairCreator(AF_UNIX, type, 0)}; +} + +void SendSingleFD(int sock, int fd, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE(SendFDs(sock, &fd, 1, buf, buf_size)); +} + +void SendFDs(int sock, int fds[], int fds_size, char buf[], int buf_size) { + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(fds_size * sizeof(int))); + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(fds_size * sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + for (int i = 0; i < fds_size; i++) { + memcpy(CMSG_DATA(cmsg) + i * sizeof(int), &fds[i], sizeof(int)); + } + + ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size), + IsPosixErrorOkAndHolds(buf_size)); +} + +void RecvSingleFD(int sock, int* fd, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, buf_size)); +} + +void RecvSingleFD(int sock, int* fd, char buf[], int buf_size, + int expected_size) { + ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, expected_size)); +} + +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE( + RecvFDs(sock, fds, fds_size, buf, buf_size, buf_size)); +} + +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size, + int expected_size, bool peek) { + struct msghdr msg = {}; + std::vector<char> control(CMSG_SPACE(fds_size * sizeof(int))); + msg.msg_control = &control[0]; + msg.msg_controllen = control.size(); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + int flags = 0; + if (peek) { + flags |= MSG_PEEK; + } + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, flags), + SyscallSucceedsWithValue(expected_size)); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(fds_size * sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS); + + for (int i = 0; i < fds_size; i++) { + memcpy(&fds[i], CMSG_DATA(cmsg) + i * sizeof(int), sizeof(int)); + } +} + +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size, + int expected_size) { + ASSERT_NO_FATAL_FAILURE( + RecvFDs(sock, fds, fds_size, buf, buf_size, expected_size, false)); +} + +void PeekSingleFD(int sock, int* fd, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, buf_size, true)); +} + +void RecvNoCmsg(int sock, char buf[], int buf_size, int expected_size) { + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(struct ucred))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0), + SyscallSucceedsWithValue(expected_size)); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_EQ(cmsg, nullptr); +} + +void SendNullCmsg(int sock, char buf[], int buf_size) { + struct msghdr msg = {}; + msg.msg_control = nullptr; + msg.msg_controllen = 0; + + ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size), + IsPosixErrorOkAndHolds(buf_size)); +} + +void SendCreds(int sock, ucred creds, char buf[], int buf_size) { + struct msghdr msg = {}; + + char control[CMSG_SPACE(sizeof(struct ucred))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + memcpy(CMSG_DATA(cmsg), &creds, sizeof(struct ucred)); + + ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size), + IsPosixErrorOkAndHolds(buf_size)); +} + +void SendCredsAndFD(int sock, ucred creds, int fd, char buf[], int buf_size) { + struct msghdr msg = {}; + + char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = {}; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg1 = CMSG_FIRSTHDR(&msg); + cmsg1->cmsg_level = SOL_SOCKET; + cmsg1->cmsg_type = SCM_CREDENTIALS; + cmsg1->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + memcpy(CMSG_DATA(cmsg1), &creds, sizeof(struct ucred)); + + struct cmsghdr* cmsg2 = CMSG_NXTHDR(&msg, cmsg1); + cmsg2->cmsg_level = SOL_SOCKET; + cmsg2->cmsg_type = SCM_RIGHTS; + cmsg2->cmsg_len = CMSG_LEN(sizeof(int)); + memcpy(CMSG_DATA(cmsg2), &fd, sizeof(int)); + + ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size), + IsPosixErrorOkAndHolds(buf_size)); +} + +void RecvCreds(int sock, ucred* creds, char buf[], int buf_size) { + ASSERT_NO_FATAL_FAILURE(RecvCreds(sock, creds, buf, buf_size, buf_size)); +} + +void RecvCreds(int sock, ucred* creds, char buf[], int buf_size, + int expected_size) { + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(struct ucred))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0), + SyscallSucceedsWithValue(expected_size)); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct ucred))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); + + memcpy(creds, CMSG_DATA(cmsg), sizeof(struct ucred)); +} + +void RecvCredsAndFD(int sock, ucred* creds, int* fd, char buf[], int buf_size) { + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0), + SyscallSucceedsWithValue(buf_size)); + + struct cmsghdr* cmsg1 = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg1, nullptr); + ASSERT_EQ(cmsg1->cmsg_len, CMSG_LEN(sizeof(struct ucred))); + ASSERT_EQ(cmsg1->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg1->cmsg_type, SCM_CREDENTIALS); + memcpy(creds, CMSG_DATA(cmsg1), sizeof(struct ucred)); + + struct cmsghdr* cmsg2 = CMSG_NXTHDR(&msg, cmsg1); + ASSERT_NE(cmsg2, nullptr); + ASSERT_EQ(cmsg2->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg2->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg2->cmsg_type, SCM_RIGHTS); + memcpy(fd, CMSG_DATA(cmsg2), sizeof(int)); +} + +void RecvSingleFDUnaligned(int sock, int* fd, char buf[], int buf_size) { + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(int)) - sizeof(int)]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct iovec iov; + iov.iov_base = buf; + iov.iov_len = buf_size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0), + SyscallSucceedsWithValue(buf_size)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS); + + memcpy(fd, CMSG_DATA(cmsg), sizeof(int)); +} + +void SetSoPassCred(int sock) { + int one = 1; + EXPECT_THAT(setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)), + SyscallSucceeds()); +} + +void UnsetSoPassCred(int sock) { + int zero = 0; + EXPECT_THAT(setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &zero, sizeof(zero)), + SyscallSucceeds()); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/unix_domain_socket_test_util.h b/test/syscalls/linux/unix_domain_socket_test_util.h new file mode 100644 index 000000000..1b09aeae7 --- /dev/null +++ b/test/syscalls/linux/unix_domain_socket_test_util.h @@ -0,0 +1,161 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_ + +#include <string> +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// DescribeUnixDomainSocketType returns a human-readable std::string explaining the +// given Unix domain socket type. +std::string DescribeUnixDomainSocketType(int type); + +// UnixDomainSocketPair returns a SocketPairKind that represents SocketPairs +// created by invoking the socketpair() syscall with AF_UNIX and the given type. +SocketPairKind UnixDomainSocketPair(int type); + +// FilesystemBoundUnixDomainSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with a temp file path, +// AF_UNIX and the given type. +SocketPairKind FilesystemBoundUnixDomainSocketPair(int type); + +// AbstractBoundUnixDomainSocketPair returns a SocketPairKind that represents +// SocketPairs created with bind() and accept() syscalls with a temp abstract +// path, AF_UNIX and the given type. +SocketPairKind AbstractBoundUnixDomainSocketPair(int type); + +// SocketpairGoferUnixDomainSocketPair returns a SocketPairKind that was created +// with two sockets conected to the socketpair gofer. +SocketPairKind SocketpairGoferUnixDomainSocketPair(int type); + +// SocketpairGoferFileSocketPair returns a SocketPairKind that was created with +// two open() calls on paths backed by the socketpair gofer. +SocketPairKind SocketpairGoferFileSocketPair(int type); + +// FilesystemUnboundUnixDomainSocketPair returns a SocketPairKind that +// represents two unbound sockets and a filesystem path for binding. +SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type); + +// AbstractUnboundUnixDomainSocketPair returns a SocketPairKind that represents +// two unbound sockets and an abstract namespace path for binding. +SocketPairKind AbstractUnboundUnixDomainSocketPair(int type); + +// SendSingleFD sends both a single FD and some data over a unix domain socket +// specified by an FD. Note that calls to this function must be wrapped in +// ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void SendSingleFD(int sock, int fd, char buf[], int buf_size); + +// SendFDs sends an arbitrary number of FDs and some data over a unix domain +// socket specified by an FD. Note that calls to this function must be wrapped +// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void SendFDs(int sock, int fds[], int fds_size, char buf[], int buf_size); + +// RecvSingleFD receives both a single FD and some data over a unix domain +// socket specified by an FD. Note that calls to this function must be wrapped +// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void RecvSingleFD(int sock, int* fd, char buf[], int buf_size); + +// RecvSingleFD receives both a single FD and some data over a unix domain +// socket specified by an FD. This version allows the expected amount of data +// received to be different than the buffer size. Note that calls to this +// function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions +// to halt the test. +void RecvSingleFD(int sock, int* fd, char buf[], int buf_size, + int expected_size); + +// PeekSingleFD peeks at both a single FD and some data over a unix domain +// socket specified by an FD. Note that calls to this function must be wrapped +// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void PeekSingleFD(int sock, int* fd, char buf[], int buf_size); + +// RecvFDs receives both an arbitrary number of FDs and some data over a unix +// domain socket specified by an FD. Note that calls to this function must be +// wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size); + +// RecvFDs receives both an arbitrary number of FDs and some data over a unix +// domain socket specified by an FD. This version allows the expected amount of +// data received to be different than the buffer size. Note that calls to this +// function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions +// to halt the test. +void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size, + int expected_size); + +// RecvNoCmsg receives some data over a unix domain socket specified by an FD +// and asserts that no control messages are available for receiving. Note that +// calls to this function must be wrapped in ASSERT_NO_FATAL_FAILURE for +// internal assertions to halt the test. +void RecvNoCmsg(int sock, char buf[], int buf_size, int expected_size); + +inline void RecvNoCmsg(int sock, char buf[], int buf_size) { + RecvNoCmsg(sock, buf, buf_size, buf_size); +} + +// SendCreds sends the credentials of the current process and some data over a +// unix domain socket specified by an FD. Note that calls to this function must +// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the +// test. +void SendCreds(int sock, ucred creds, char buf[], int buf_size); + +// SendCredsAndFD sends the credentials of the current process, a single FD, and +// some data over a unix domain socket specified by an FD. Note that calls to +// this function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal +// assertions to halt the test. +void SendCredsAndFD(int sock, ucred creds, int fd, char buf[], int buf_size); + +// RecvCreds receives some credentials and some data over a unix domain socket +// specified by an FD. Note that calls to this function must be wrapped in +// ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void RecvCreds(int sock, ucred* creds, char buf[], int buf_size); + +// RecvCreds receives some credentials and some data over a unix domain socket +// specified by an FD. This version allows the expected amount of data received +// to be different than the buffer size. Note that calls to this function must +// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the +// test. +void RecvCreds(int sock, ucred* creds, char buf[], int buf_size, + int expected_size); + +// RecvCredsAndFD receives some credentials, a single FD, and some data over a +// unix domain socket specified by an FD. Note that calls to this function must +// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the +// test. +void RecvCredsAndFD(int sock, ucred* creds, int* fd, char buf[], int buf_size); + +// SendNullCmsg sends a null control message and some data over a unix domain +// socket specified by an FD. Note that calls to this function must be wrapped +// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test. +void SendNullCmsg(int sock, char buf[], int buf_size); + +// RecvSingleFDUnaligned sends both a single FD and some data over a unix domain +// socket specified by an FD. This function does not obey the spec, but Linux +// allows it and the apphosting code depends on this quirk. Note that calls to +// this function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal +// assertions to halt the test. +void RecvSingleFDUnaligned(int sock, int* fd, char buf[], int buf_size); + +// SetSoPassCred sets the SO_PASSCRED option on the specified socket. +void SetSoPassCred(int sock); + +// UnsetSoPassCred clears the SO_PASSCRED option on the specified socket. +void UnsetSoPassCred(int sock); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_ diff --git a/test/syscalls/linux/unlink.cc b/test/syscalls/linux/unlink.cc new file mode 100644 index 000000000..4d5e0c6b6 --- /dev/null +++ b/test/syscalls/linux/unlink.cc @@ -0,0 +1,211 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(UnlinkTest, IsDir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + EXPECT_THAT(unlink(dir.path().c_str()), SyscallFailsWithErrno(EISDIR)); +} + +TEST(UnlinkTest, DirNotEmpty) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + int fd; + std::string path = JoinPath(dir.path(), "ExistingFile"); + EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(ENOTEMPTY)); +} + +TEST(UnlinkTest, Rmdir) { + std::string path = JoinPath(GetAbsoluteTestTmpdir(), "NewDir"); + ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds()); +} + +TEST(UnlinkTest, AtDir) { + int dirfd; + EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0), + SyscallSucceeds()); + + std::string path = JoinPath(GetAbsoluteTestTmpdir(), "NewDir"); + EXPECT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR), SyscallSucceeds()); + ASSERT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(UnlinkTest, AtDirDegradedPermissions_NoRandomSave) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + int dirfd; + ASSERT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0), + SyscallSucceeds()); + + std::string sub_dir = JoinPath(dir.path(), "NewDir"); + EXPECT_THAT(mkdir(sub_dir.c_str(), 0755), SyscallSucceeds()); + EXPECT_THAT(fchmod(dirfd, 0444), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR), + SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(close(dirfd), SyscallSucceeds()); +} + +// Files cannot be unlinked if the parent is not writable and executable. +TEST(UnlinkTest, ParentDegradedPermissions) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + ASSERT_THAT(chmod(dir.path().c_str(), 0000), SyscallSucceeds()); + + struct stat st; + ASSERT_THAT(stat(file.path().c_str(), &st), SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(unlinkat(AT_FDCWD, file.path().c_str(), 0), + SyscallFailsWithErrno(EACCES)); + + // Non-existent files also return EACCES. + const std::string nonexist = JoinPath(dir.path(), "doesnotexist"); + ASSERT_THAT(stat(nonexist.c_str(), &st), SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(unlinkat(AT_FDCWD, nonexist.c_str(), 0), + SyscallFailsWithErrno(EACCES)); +} + +TEST(UnlinkTest, AtBad) { + int dirfd; + EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0), + SyscallSucceeds()); + + // Try removing a directory as a file. + std::string path = JoinPath(GetAbsoluteTestTmpdir(), "NewDir"); + EXPECT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "NewDir", 0), SyscallFailsWithErrno(EISDIR)); + EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR), SyscallSucceeds()); + + // Try removing a file as a directory. + int fd; + EXPECT_THAT(fd = openat(dirfd, "UnlinkAtFile", O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", AT_REMOVEDIR), + SyscallFailsWithErrno(ENOTDIR)); + ASSERT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", 0), SyscallSucceeds()); + + // Cleanup. + ASSERT_THAT(close(dirfd), SyscallSucceeds()); +} + +TEST(UnlinkTest, AbsTmpFile) { + int fd; + std::string path = JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile"); + EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(unlink(path.c_str()), SyscallSucceeds()); +} + +TEST(UnlinkTest, TooLongName) { + EXPECT_THAT(unlink(std::vector<char>(16384, '0').data()), + SyscallFailsWithErrno(ENAMETOOLONG)); +} + +TEST(UnlinkTest, BadNamePtr) { + EXPECT_THAT(unlink(reinterpret_cast<char*>(1)), + SyscallFailsWithErrno(EFAULT)); +} + +TEST(UnlinkTest, AtFile) { + int dirfd; + EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0666), + SyscallSucceeds()); + int fd; + EXPECT_THAT(fd = openat(dirfd, "UnlinkAtFile", O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", 0), SyscallSucceeds()); +} + +TEST(UnlinkTest, OpenFile) { + // We can't save unlinked file unless they are on tmpfs. + const DisableSave ds; + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + int fd; + EXPECT_THAT(fd = open(file.path().c_str(), O_RDWR, 0666), SyscallSucceeds()); + EXPECT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST(UnlinkTest, CannotRemoveDots) { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string self = JoinPath(file.path(), "."); + ASSERT_THAT(unlink(self.c_str()), SyscallFailsWithErrno(ENOTDIR)); + const std::string parent = JoinPath(file.path(), ".."); + ASSERT_THAT(unlink(parent.c_str()), SyscallFailsWithErrno(ENOTDIR)); +} + +TEST(UnlinkTest, CannotRemoveRoot) { + ASSERT_THAT(unlinkat(-1, "/", AT_REMOVEDIR), SyscallFailsWithErrno(EBUSY)); +} + +TEST(UnlinkTest, CannotRemoveRootWithAtDir) { + const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE( + Open(GetAbsoluteTestTmpdir(), O_DIRECTORY, 0666)); + ASSERT_THAT(unlinkat(dirfd.get(), "/", AT_REMOVEDIR), + SyscallFailsWithErrno(EBUSY)); +} + +TEST(RmdirTest, CannotRemoveDots) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string self = JoinPath(dir.path(), "."); + ASSERT_THAT(rmdir(self.c_str()), SyscallFailsWithErrno(EINVAL)); + const std::string parent = JoinPath(dir.path(), ".."); + ASSERT_THAT(rmdir(parent.c_str()), SyscallFailsWithErrno(ENOTEMPTY)); +} + +TEST(RmdirTest, CanRemoveWithTrailingSlashes) { + auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string slash = absl::StrCat(dir1.path(), "/"); + ASSERT_THAT(rmdir(slash.c_str()), SyscallSucceeds()); + auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string slashslash = absl::StrCat(dir2.path(), "//"); + ASSERT_THAT(rmdir(slashslash.c_str()), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/unshare.cc b/test/syscalls/linux/unshare.cc new file mode 100644 index 000000000..9dd6ec4b6 --- /dev/null +++ b/test/syscalls/linux/unshare.cc @@ -0,0 +1,50 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sched.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/synchronization/mutex.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(UnshareTest, AllowsZeroFlags) { + ASSERT_THAT(unshare(0), SyscallSucceeds()); +} + +TEST(UnshareTest, ThreadFlagFailsIfMultithreaded) { + absl::Mutex mu; + bool finished = false; + ScopedThread t([&] { + mu.Lock(); + mu.Await(absl::Condition(&finished)); + mu.Unlock(); + }); + ASSERT_THAT(unshare(CLONE_THREAD), SyscallFailsWithErrno(EINVAL)); + mu.Lock(); + finished = true; + mu.Unlock(); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc new file mode 100644 index 000000000..d95ee74ec --- /dev/null +++ b/test/syscalls/linux/utimes.cc @@ -0,0 +1,330 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> +#include <utime.h> +#include <string> + +#include "absl/time/time.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// TODO: utimes(nullptr) does not pick the "now" time in the +// application's time domain, so when asserting that times are within a window, +// we expand the window to allow for differences between the time domains. +constexpr absl::Duration kClockSlack = absl::Milliseconds(100); + +// TimeBoxed runs fn, setting before and after to (coarse realtime) times +// guaranteed* to come before and after fn started and completed, respectively. +// +// fn may be called more than once if the clock is adjusted. +// +// * See the comment on kClockSlack. gVisor breaks this guarantee. +void TimeBoxed(absl::Time* before, absl::Time* after, + std::function<void()> const& fn) { + do { + // N.B. utimes and friends use CLOCK_REALTIME_COARSE for setting time (i.e., + // current_kernel_time()). See fs/attr.c:notify_change. + // + // notify_change truncates the time to a multiple of s_time_gran, but most + // filesystems set it to 1, so we don't do any truncation. + struct timespec ts; + EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds()); + *before = absl::TimeFromTimespec(ts); + + fn(); + + EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds()); + *after = absl::TimeFromTimespec(ts); + + if (*after < *before) { + // Clock jumped backwards; retry. + // + // Technically this misses jumps small enough to keep after > before, + // which could lead to test failures, but that is very unlikely to happen. + continue; + } + + if (IsRunningOnGvisor()) { + // See comment on kClockSlack. + *before -= kClockSlack; + *after += kClockSlack; + } + } while (*after < *before); +} + +void TestUtimesOnPath(std::string const& path) { + struct stat statbuf; + + struct timeval times[2] = {{1, 0}, {2, 0}}; + EXPECT_THAT(utimes(path.c_str(), times), SyscallSucceeds()); + EXPECT_THAT(stat(path.c_str(), &statbuf), SyscallSucceeds()); + EXPECT_EQ(1, statbuf.st_atime); + EXPECT_EQ(2, statbuf.st_mtime); + + absl::Time before; + absl::Time after; + TimeBoxed(&before, &after, [&] { + EXPECT_THAT(utimes(path.c_str(), nullptr), SyscallSucceeds()); + }); + + EXPECT_THAT(stat(path.c_str(), &statbuf), SyscallSucceeds()); + + absl::Time atime = absl::TimeFromTimespec(statbuf.st_atim); + EXPECT_GE(atime, before); + EXPECT_LE(atime, after); + + absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim); + EXPECT_GE(mtime, before); + EXPECT_LE(mtime, after); +} + +TEST(UtimesTest, OnFile) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + TestUtimesOnPath(f.path()); +} + +TEST(UtimesTest, OnDir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TestUtimesOnPath(dir.path()); +} + +TEST(UtimesTest, MissingPath) { + auto path = NewTempAbsPath(); + struct timeval times[2] = {{1, 0}, {2, 0}}; + EXPECT_THAT(utimes(path.c_str(), times), SyscallFailsWithErrno(ENOENT)); +} + +void TestFutimesat(int dirFd, std::string const& path) { + struct stat statbuf; + + struct timeval times[2] = {{1, 0}, {2, 0}}; + EXPECT_THAT(futimesat(dirFd, path.c_str(), times), SyscallSucceeds()); + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds()); + EXPECT_EQ(1, statbuf.st_atime); + EXPECT_EQ(2, statbuf.st_mtime); + + absl::Time before; + absl::Time after; + TimeBoxed(&before, &after, [&] { + EXPECT_THAT(futimesat(dirFd, path.c_str(), nullptr), SyscallSucceeds()); + }); + + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds()); + + absl::Time atime = absl::TimeFromTimespec(statbuf.st_atim); + EXPECT_GE(atime, before); + EXPECT_LE(atime, after); + + absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim); + EXPECT_GE(mtime, before); + EXPECT_LE(mtime, after); +} + +TEST(FutimesatTest, OnAbsPath) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + TestFutimesat(0, f.path()); +} + +TEST(FutimesatTest, OnRelPath) { + auto d = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(d.path())); + auto basename = std::string(Basename(f.path())); + const FileDescriptor dirFd = + ASSERT_NO_ERRNO_AND_VALUE(Open(d.path(), O_RDONLY | O_DIRECTORY)); + TestFutimesat(dirFd.get(), basename); +} + +TEST(FutimesatTest, InvalidNsec) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + struct timeval times[4][2] = {{ + {0, 1}, // Valid + {1, static_cast<int64_t>(1e7)} // Invalid + }, + { + {1, static_cast<int64_t>(1e7)}, // Invalid + {0, 1} // Valid + }, + { + {0, 1}, // Valid + {1, -1} // Invalid + }, + { + {1, -1}, // Invalid + {0, 1} // Valid + }}; + + for (unsigned int i = 0; i < sizeof(times) / sizeof(times[0]); i++) { + std::cout << "test:" << i << "\n"; + EXPECT_THAT(futimesat(0, f.path().c_str(), times[i]), + SyscallFailsWithErrno(EINVAL)); + } +} + +void TestUtimensat(int dirFd, std::string const& path) { + struct stat statbuf; + const struct timespec times[2] = {{1, 0}, {2, 0}}; + EXPECT_THAT(utimensat(dirFd, path.c_str(), times, 0), SyscallSucceeds()); + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds()); + EXPECT_EQ(1, statbuf.st_atime); + EXPECT_EQ(2, statbuf.st_mtime); + + // Test setting with UTIME_NOW and UTIME_OMIT. + struct stat statbuf2; + const struct timespec times2[2] = { + {0, UTIME_NOW}, // Should set atime to now. + {0, UTIME_OMIT} // Should not change mtime. + }; + + absl::Time before; + absl::Time after; + TimeBoxed(&before, &after, [&] { + EXPECT_THAT(utimensat(dirFd, path.c_str(), times2, 0), SyscallSucceeds()); + }); + + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf2, 0), SyscallSucceeds()); + + absl::Time atime2 = absl::TimeFromTimespec(statbuf2.st_atim); + EXPECT_GE(atime2, before); + EXPECT_LE(atime2, after); + + absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim); + absl::Time mtime2 = absl::TimeFromTimespec(statbuf2.st_mtim); + // mtime should not be changed. + EXPECT_EQ(mtime, mtime2); + + // Test setting with times = NULL. Should set both atime and mtime to the + // current system time. + struct stat statbuf3; + TimeBoxed(&before, &after, [&] { + EXPECT_THAT(utimensat(dirFd, path.c_str(), nullptr, 0), SyscallSucceeds()); + }); + + EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf3, 0), SyscallSucceeds()); + + absl::Time atime3 = absl::TimeFromTimespec(statbuf3.st_atim); + EXPECT_GE(atime3, before); + EXPECT_LE(atime3, after); + + absl::Time mtime3 = absl::TimeFromTimespec(statbuf3.st_mtim); + EXPECT_GE(mtime3, before); + EXPECT_LE(mtime3, after); + + if (!IsRunningOnGvisor()) { + // FIXME: Gofers set atime and mtime to different "now" times. + EXPECT_EQ(atime3, mtime3); + } +} + +TEST(UtimensatTest, OnAbsPath) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + TestUtimensat(0, f.path()); +} + +TEST(UtimensatTest, OnRelPath) { + auto d = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(d.path())); + auto basename = std::string(Basename(f.path())); + const FileDescriptor dirFd = + ASSERT_NO_ERRNO_AND_VALUE(Open(d.path(), O_RDONLY | O_DIRECTORY)); + TestUtimensat(dirFd.get(), basename); +} + +TEST(UtimensatTest, OmitNoop) { + // Setting both timespecs to UTIME_OMIT on a nonexistant path should succeed. + auto path = NewTempAbsPath(); + const struct timespec times[2] = {{0, UTIME_OMIT}, {0, UTIME_OMIT}}; + EXPECT_THAT(utimensat(0, path.c_str(), times, 0), SyscallSucceeds()); +} + +// Verify that we can actually set atime and mtime to 0. +TEST(UtimeTest, ZeroAtimeandMtime) { + const auto tmp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const auto tmp_file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(tmp_dir.path())); + + // Stat the file before and after updating atime and mtime. + struct stat stat_before = {}; + EXPECT_THAT(stat(tmp_file.path().c_str(), &stat_before), SyscallSucceeds()); + + ASSERT_NE(stat_before.st_atime, 0); + ASSERT_NE(stat_before.st_mtime, 0); + + const struct utimbuf times = {}; // Zero for both atime and mtime. + EXPECT_THAT(utime(tmp_file.path().c_str(), ×), SyscallSucceeds()); + + struct stat stat_after = {}; + EXPECT_THAT(stat(tmp_file.path().c_str(), &stat_after), SyscallSucceeds()); + + // We should see the atime and mtime changed when we set them to 0. + ASSERT_EQ(stat_after.st_atime, 0); + ASSERT_EQ(stat_after.st_mtime, 0); +} + +TEST(UtimensatTest, InvalidNsec) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + struct timespec times[2][2] = {{ + {0, UTIME_OMIT}, // Valid + {2, static_cast<int64_t>(1e10)} // Invalid + }, + { + {2, static_cast<int64_t>(1e10)}, // Invalid + {0, UTIME_OMIT} // Valid + }}; + + for (unsigned int i = 0; i < sizeof(times) / sizeof(times[0]); i++) { + std::cout << "test:" << i << "\n"; + EXPECT_THAT(utimensat(0, f.path().c_str(), times[i], 0), + SyscallFailsWithErrno(EINVAL)); + } +} + +TEST(Utimensat, NullPath) { + // From man utimensat(2): + // "the Linux utimensat() system call implements a nonstandard feature: if + // pathname is NULL, then the call modifies the timestamps of the file + // referred to by the file descriptor dirfd (which may refer to any type of + // file). + // Note, however, that the glibc wrapper for utimensat() disallows + // passing NULL as the value for file: the wrapper function returns the error + // EINVAL in this case." + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + struct stat statbuf; + const struct timespec times[2] = {{1, 0}, {2, 0}}; + // Call syscall directly. + EXPECT_THAT(syscall(SYS_utimensat, fd.get(), NULL, times, 0), + SyscallSucceeds()); + EXPECT_THAT(fstatat(0, f.path().c_str(), &statbuf, 0), SyscallSucceeds()); + EXPECT_EQ(1, statbuf.st_atime); + EXPECT_EQ(2, statbuf.st_mtime); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/vdso.cc b/test/syscalls/linux/vdso.cc new file mode 100644 index 000000000..0f6e1c7c6 --- /dev/null +++ b/test/syscalls/linux/vdso.cc @@ -0,0 +1,48 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <string.h> +#include <sys/mman.h> + +#include <algorithm> + +#include "gtest/gtest.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/proc_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Ensure that the vvar page cannot be made writable. +TEST(VvarTest, WriteVvar) { + auto contents = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + auto maps = ASSERT_NO_ERRNO_AND_VALUE(ParseProcMaps(contents)); + auto it = std::find_if(maps.begin(), maps.end(), [](const ProcMapsEntry& e) { + return e.filename == "[vvar]"; + }); + + SKIP_IF(it == maps.end()); + EXPECT_THAT(mprotect(reinterpret_cast<void*>(it->start), kPageSize, + PROT_READ | PROT_WRITE), + SyscallFailsWithErrno(EACCES)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/vdso_clock_gettime.cc b/test/syscalls/linux/vdso_clock_gettime.cc new file mode 100644 index 000000000..59dd78833 --- /dev/null +++ b/test/syscalls/linux/vdso_clock_gettime.cc @@ -0,0 +1,104 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdint.h> +#include <sys/time.h> +#include <syscall.h> +#include <time.h> +#include <unistd.h> +#include <map> +#include <string> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +std::string PrintClockId(::testing::TestParamInfo<clockid_t> info) { + switch (info.param) { + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + default: + return absl::StrCat(info.param); + } +} + +class CorrectVDSOClockTest : public ::testing::TestWithParam<clockid_t> {}; + +TEST_P(CorrectVDSOClockTest, IsCorrect) { + struct timespec tvdso, tsys; + absl::Time vdso_time, sys_time; + uint64_t total_calls = 0; + + // It is expected that 82.5% of clock_gettime calls will be less than 100us + // skewed from the system time. + // Unfortunately this is not only influenced by the VDSO clock skew, but also + // by arbitrary scheduling delays and the like. The test is therefore + // regularly disabled. + std::map<absl::Duration, std::tuple<double, uint64_t, uint64_t>> confidence = + { + {absl::Microseconds(100), std::make_tuple(0.825, 0, 0)}, + {absl::Microseconds(250), std::make_tuple(0.94, 0, 0)}, + {absl::Milliseconds(1), std::make_tuple(0.999, 0, 0)}, + }; + + absl::Time start = absl::Now(); + while (absl::Now() < start + absl::Seconds(30)) { + EXPECT_THAT(clock_gettime(GetParam(), &tvdso), SyscallSucceeds()); + EXPECT_THAT(syscall(__NR_clock_gettime, GetParam(), &tsys), + SyscallSucceeds()); + + vdso_time = absl::TimeFromTimespec(tvdso); + + for (auto const& conf : confidence) { + std::get<1>(confidence[conf.first]) += + (sys_time - vdso_time) < conf.first; + } + + sys_time = absl::TimeFromTimespec(tsys); + + for (auto const& conf : confidence) { + std::get<2>(confidence[conf.first]) += + (vdso_time - sys_time) < conf.first; + } + + ++total_calls; + } + + for (auto const& conf : confidence) { + EXPECT_GE(std::get<1>(conf.second) / static_cast<double>(total_calls), + std::get<0>(conf.second)); + EXPECT_GE(std::get<2>(conf.second) / static_cast<double>(total_calls), + std::get<0>(conf.second)); + } +} + +INSTANTIATE_TEST_CASE_P(ClockGettime, CorrectVDSOClockTest, + ::testing::Values(CLOCK_MONOTONIC, CLOCK_REALTIME), + PrintClockId); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/vfork.cc b/test/syscalls/linux/vfork.cc new file mode 100644 index 000000000..9999a909e --- /dev/null +++ b/test/syscalls/linux/vfork.cc @@ -0,0 +1,193 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <string> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/test_util.h" + +DEFINE_bool(vfork_test_child, false, + "If true, run the VforkTest child workload."); + +namespace gvisor { +namespace testing { + +namespace { + +// We don't test with raw CLONE_VFORK to avoid interacting with glibc's use of +// TLS. +// +// Even with vfork(2), we must be careful to do little more in the child than +// call execve(2). We use the simplest sleep function possible, though this is +// still precarious, as we're officially only allowed to call execve(2) and +// _exit(2). +constexpr absl::Duration kChildDelay = absl::Seconds(10); + +// Exit code for successful child subprocesses. We don't want to use 0 since +// it's too common, and an execve(2) failure causes the child to exit with the +// errno, so kChildExitCode is chosen to be an unlikely errno: +constexpr int kChildExitCode = 118; // ENOTNAM: Not a XENIX named type file + +int64_t MonotonicNow() { + struct timespec now; + TEST_PCHECK(clock_gettime(CLOCK_MONOTONIC, &now) == 0); + return now.tv_sec * 1000000000ll + now.tv_nsec; +} + +TEST(VforkTest, ParentStopsUntilChildExits) { + const auto test = [] { + // N.B. Run the test in a single-threaded subprocess because + // vfork is not safe in a multi-threaded process. + + const int64_t start = MonotonicNow(); + + pid_t pid = vfork(); + if (pid == 0) { + SleepSafe(kChildDelay); + _exit(kChildExitCode); + } + TEST_PCHECK_MSG(pid > 0, "vfork failed"); + MaybeSave(); + + const int64_t end = MonotonicNow(); + + absl::Duration dur = absl::Nanoseconds(end - start); + + TEST_CHECK(dur >= kChildDelay); + + int status = 0; + TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0)); + TEST_CHECK(WIFEXITED(status)); + TEST_CHECK(WEXITSTATUS(status) == kChildExitCode); + }; + + EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0)); +} + +TEST(VforkTest, ParentStopsUntilChildExecves_NoRandomSave) { + ExecveArray const owned_child_argv = {"/proc/self/exe", "--vfork_test_child"}; + char* const* const child_argv = owned_child_argv.get(); + + const auto test = [&] { + const int64_t start = MonotonicNow(); + + pid_t pid = vfork(); + if (pid == 0) { + SleepSafe(kChildDelay); + execve(child_argv[0], child_argv, /* envp = */ nullptr); + _exit(errno); + } + // Don't attempt save/restore until after recording end_time, + // since the test expects an upper bound on the time spent + // stopped. + int saved_errno = errno; + const int64_t end = MonotonicNow(); + errno = saved_errno; + TEST_PCHECK_MSG(pid > 0, "vfork failed"); + MaybeSave(); + + absl::Duration dur = absl::Nanoseconds(end - start); + + // The parent should resume execution after execve, but before + // the post-execve test child exits. + TEST_CHECK(dur >= kChildDelay); + TEST_CHECK(dur <= 2 * kChildDelay); + + int status = 0; + TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0)); + TEST_CHECK(WIFEXITED(status)); + TEST_CHECK(WEXITSTATUS(status) == kChildExitCode); + }; + + EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0)); +} + +// A vfork child does not unstop the parent a second time when it exits after +// exec. +TEST(VforkTest, ExecedChildExitDoesntUnstopParent_NoRandomSave) { + ExecveArray const owned_child_argv = {"/proc/self/exe", "--vfork_test_child"}; + char* const* const child_argv = owned_child_argv.get(); + + const auto test = [&] { + pid_t pid1 = vfork(); + if (pid1 == 0) { + execve(child_argv[0], child_argv, /* envp = */ nullptr); + _exit(errno); + } + TEST_PCHECK_MSG(pid1 > 0, "vfork failed"); + MaybeSave(); + + // pid1 exec'd and is now sleeping. + SleepSafe(kChildDelay / 2); + + const int64_t start = MonotonicNow(); + + pid_t pid2 = vfork(); + if (pid2 == 0) { + SleepSafe(kChildDelay); + _exit(kChildExitCode); + } + TEST_PCHECK_MSG(pid2 > 0, "vfork failed"); + MaybeSave(); + + const int64_t end = MonotonicNow(); + + absl::Duration dur = absl::Nanoseconds(end - start); + + // The parent should resume execution only after pid2 exits, not + // when pid1 exits. + TEST_CHECK(dur >= kChildDelay); + + int status = 0; + TEST_PCHECK(RetryEINTR(waitpid)(pid1, &status, 0)); + TEST_CHECK(WIFEXITED(status)); + TEST_CHECK(WEXITSTATUS(status) == kChildExitCode); + + TEST_PCHECK(RetryEINTR(waitpid)(pid2, &status, 0)); + TEST_CHECK(WIFEXITED(status)); + TEST_CHECK(WEXITSTATUS(status) == kChildExitCode); + }; + + EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0)); +} + +int RunChild() { + SleepSafe(kChildDelay); + return kChildExitCode; +} + +} // namespace + +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + gvisor::testing::TestInit(&argc, &argv); + + if (FLAGS_vfork_test_child) { + return gvisor::testing::RunChild(); + } + + return RUN_ALL_TESTS(); +} diff --git a/test/syscalls/linux/vsyscall.cc b/test/syscalls/linux/vsyscall.cc new file mode 100644 index 000000000..cb6840cc6 --- /dev/null +++ b/test/syscalls/linux/vsyscall.cc @@ -0,0 +1,44 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <time.h> + +#include "gtest/gtest.h" +#include "test/util/proc_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +time_t vsyscall_time(time_t* t) { + constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; + return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t); +} + +TEST(VsyscallTest, VsyscallAlwaysAvailableOnGvisor) { + SKIP_IF(!IsRunningOnGvisor()); + // Vsyscall is always advertised by gvisor. + EXPECT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + // Vsyscall should always works on gvisor. + time_t t; + EXPECT_THAT(vsyscall_time(&t), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/wait.cc b/test/syscalls/linux/wait.cc new file mode 100644 index 000000000..0a4ec7c6a --- /dev/null +++ b/test/syscalls/linux/wait.cc @@ -0,0 +1,748 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <signal.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <functional> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/cleanup.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/posix_error.h" +#include "test/util/signal_util.h" +#include "test/util/test_util.h" + +using ::testing::UnorderedElementsAre; + +// These unit tests focus on the wait4(2) system call, but include a basic +// checks for the i386 waitpid(2) syscall, which is a subset of wait4(2). +// +// NOTE: Some functionality is not tested as +// it is not currently supported by gVisor: +// * UID in waitid(2) siginfo. +// * Process groups. +// * Core dump status (WCOREDUMP). +// * Linux only option __WNOTHREAD. +// +// Tests for waiting on stopped/continued children are in sigstop.cc. + +namespace gvisor { +namespace testing { + +namespace { + +// The CloneChild function seems to need more than one page of stack space. +static const size_t kStackSize = 2 * kPageSize; + +// The child thread created in CloneAndExit runs this function. +// This child does not have the TLS setup, so it must not use glibc functions. +int CloneChild(void* priv) { + int64_t sleep = reinterpret_cast<int64_t>(priv); + SleepSafe(absl::Seconds(sleep)); + + // glibc's _exit(2) function wrapper will helpfully call exit_group(2), + // exiting the entire process. + syscall(__NR_exit, 0); + return 1; +} + +// ForkAndExit forks a child process which exits with exit_code, after +// sleeping for the specified duration (seconds). +pid_t ForkAndExit(int exit_code, int64_t sleep) { + pid_t child = fork(); + if (child == 0) { + SleepSafe(absl::Seconds(sleep)); + _exit(exit_code); + } + return child; +} + +int64_t clock_gettime_nsecs(clockid_t id) { + struct timespec ts; + TEST_PCHECK(clock_gettime(id, &ts) == 0); + return (ts.tv_sec * 1000000000 + ts.tv_nsec); +} + +void spin(int64_t sec) { + int64_t ns = sec * 1000000000; + int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID); + int64_t end = start + ns; + + do { + constexpr int kLoopCount = 1000000; // large and arbitrary + // volatile to prevent the compiler from skipping this loop. + for (volatile int i = 0; i < kLoopCount; i++) { + } + } while (clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID) < end); +} + +// ForkSpinAndExit forks a child process which exits with exit_code, after +// spinning for the specified duration (seconds). +pid_t ForkSpinAndExit(int exit_code, int64_t spintime) { + pid_t child = fork(); + if (child == 0) { + spin(spintime); + _exit(exit_code); + } + return child; +} + +absl::Duration RusageCpuTime(const struct rusage& ru) { + return absl::DurationFromTimeval(ru.ru_utime) + + absl::DurationFromTimeval(ru.ru_stime); +} + +// Returns the address of the top of the stack. +// Free with FreeStack. +uintptr_t AllocStack() { + void* addr = mmap(nullptr, kStackSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (addr == MAP_FAILED) { + return reinterpret_cast<uintptr_t>(MAP_FAILED); + } + + return reinterpret_cast<uintptr_t>(addr) + kStackSize; +} + +// Frees a stack page allocated with AllocStack. +int FreeStack(uintptr_t addr) { + addr -= kStackSize; + return munmap(reinterpret_cast<void*>(addr), kPageSize); +} + +// CloneAndExit clones a child thread, which exits with 0 after sleeping for +// the specified duration (must be in seconds). extra_flags are ORed against +// the standard clone(2) flags. +int CloneAndExit(int64_t sleep, uintptr_t stack, int extra_flags) { + return clone(CloneChild, reinterpret_cast<void*>(stack), + CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_VM | extra_flags, + reinterpret_cast<void*>(sleep)); +} + +// Simple wrappers around wait4(2) and waitid(2) that ignore interrupts. +constexpr auto Wait4 = RetryEINTR(wait4); +constexpr auto Waitid = RetryEINTR(waitid); + +// Fixture for tests parameterized by a function that waits for any child to +// exit with the given options, checks that it exited with the given code, and +// then returns its PID. +// +// N.B. These tests run in a multi-threaded environment. We assume that +// background threads do not create child processes and are not themselves +// created with clone(... | SIGCHLD). Either may cause these tests to +// erroneously wait on child processes/threads. +class WaitAnyChildTest : public ::testing::TestWithParam< + std::function<PosixErrorOr<pid_t>(int, int)>> { + protected: + PosixErrorOr<pid_t> WaitAny(int code) { return WaitAnyWithOptions(code, 0); } + + PosixErrorOr<pid_t> WaitAnyWithOptions(int code, int options) { + return GetParam()(code, options); + } +}; + +// Wait for any child to exit. +TEST_P(WaitAnyChildTest, Fork) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child)); +} + +// Call wait4 for any process after the child has already exited. +TEST_P(WaitAnyChildTest, AfterExit) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + absl::SleepFor(absl::Seconds(5)); + + EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child)); +} + +// Wait for multiple children to exit, waiting for either at a time. +TEST_P(WaitAnyChildTest, MultipleFork) { + pid_t child1, child2; + ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds()); + ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds()); + + std::vector<pid_t> pids; + pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0))); + pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0))); + EXPECT_THAT(pids, UnorderedElementsAre(child1, child2)); +} + +// Wait for any child to exit. +// A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like +// a forked process. +TEST_P(WaitAnyChildTest, CloneSIGCHLD) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = Cleanup( + [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds()); + + EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child)); +} + +// Wait for a child thread and process. +TEST_P(WaitAnyChildTest, ForkAndClone) { + pid_t process; + ASSERT_THAT(process = ForkAndExit(0, 0), SyscallSucceeds()); + + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = Cleanup( + [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int thread; + // Send SIGCHLD for normal wait semantics. + ASSERT_THAT(thread = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds()); + + std::vector<pid_t> pids; + pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0))); + pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0))); + EXPECT_THAT(pids, UnorderedElementsAre(process, thread)); +} + +// Return immediately if no child has exited. +TEST_P(WaitAnyChildTest, WaitWNOHANG) { + EXPECT_THAT( + WaitAnyWithOptions(0, WNOHANG), + PosixErrorIs(ECHILD, ::testing::AnyOf(::testing::StrEq("waitid"), + ::testing::StrEq("wait4")))); +} + +// Bad options passed +TEST_P(WaitAnyChildTest, BadOption) { + EXPECT_THAT( + WaitAnyWithOptions(0, 123456), + PosixErrorIs(EINVAL, ::testing::AnyOf(::testing::StrEq("waitid"), + ::testing::StrEq("wait4")))); +} + +TEST_P(WaitAnyChildTest, WaitedChildRusage) { + struct rusage before; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds()); + + pid_t child; + constexpr absl::Duration kSpin = absl::Seconds(3); + ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)), + SyscallSucceeds()); + ASSERT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child)); + + struct rusage after; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds()); + + EXPECT_GE(RusageCpuTime(after) - RusageCpuTime(before), kSpin); +} + +TEST_P(WaitAnyChildTest, IgnoredChildRusage) { + // "POSIX.1-2001 specifies that if the disposition of SIGCHLD is + // set to SIG_IGN or the SA_NOCLDWAIT flag is set for SIGCHLD (see + // sigaction(2)), then children that terminate do not become zombies and a + // call to wait() or waitpid() will block until all children have terminated, + // and then fail with errno set to ECHILD." - waitpid(2) + // + // "RUSAGE_CHILDREN: Return resource usage statistics for all children of the + // calling process that have terminated *and been waited for*." - + // getrusage(2), emphasis added + + struct sigaction sa; + sa.sa_handler = SIG_IGN; + const auto cleanup_sigact = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa)); + + struct rusage before; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds()); + + const absl::Duration start = + absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC)); + + constexpr absl::Duration kSpin = absl::Seconds(3); + + // ForkAndSpin uses CLOCK_THREAD_CPUTIME_ID, which is lower resolution than, + // and may diverge from, CLOCK_MONOTONIC, so we allow a small grace period but + // still check that we blocked for a while. + constexpr absl::Duration kSpinGrace = absl::Milliseconds(100); + + pid_t child; + ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)), + SyscallSucceeds()); + ASSERT_THAT(WaitAny(0), PosixErrorIs(ECHILD, ::testing::AnyOf( + ::testing::StrEq("waitid"), + ::testing::StrEq("wait4")))); + const absl::Duration end = + absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC)); + EXPECT_GE(end - start, kSpin - kSpinGrace); + + struct rusage after; + ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds()); + EXPECT_EQ(before.ru_utime.tv_sec, after.ru_utime.tv_sec); + EXPECT_EQ(before.ru_utime.tv_usec, after.ru_utime.tv_usec); + EXPECT_EQ(before.ru_stime.tv_sec, after.ru_stime.tv_sec); + EXPECT_EQ(before.ru_stime.tv_usec, after.ru_stime.tv_usec); +} + +INSTANTIATE_TEST_CASE_P( + Waiters, WaitAnyChildTest, + ::testing::Values( + [](int code, int options) -> PosixErrorOr<pid_t> { + int status; + auto const pid = Wait4(-1, &status, options, nullptr); + MaybeSave(); + if (pid < 0) { + return PosixError(errno, "wait4"); + } + if (!WIFEXITED(status) || WEXITSTATUS(status) != code) { + return PosixError( + EINVAL, absl::StrCat("unexpected wait status: got ", status, + ", wanted ", code)); + } + return static_cast<pid_t>(pid); + }, + [](int code, int options) -> PosixErrorOr<pid_t> { + siginfo_t si; + auto const rv = Waitid(P_ALL, 0, &si, WEXITED | options); + MaybeSave(); + if (rv < 0) { + return PosixError(errno, "waitid"); + } + if (si.si_signo != SIGCHLD) { + return PosixError( + EINVAL, absl::StrCat("unexpected signo: got ", si.si_signo, + ", wanted ", SIGCHLD)); + } + if (si.si_status != code) { + return PosixError( + EINVAL, absl::StrCat("unexpected status: got ", si.si_status, + ", wanted ", code)); + } + if (si.si_code != CLD_EXITED) { + return PosixError(EINVAL, + absl::StrCat("unexpected code: got ", si.si_code, + ", wanted ", CLD_EXITED)); + } + auto const uid = getuid(); + if (si.si_uid != uid) { + return PosixError(EINVAL, + absl::StrCat("unexpected uid: got ", si.si_uid, + ", wanted ", uid)); + } + return static_cast<pid_t>(si.si_pid); + })); + +// Fixture for tests parameterized by a function that takes the PID of a +// specific child to wait for, waits for it to exit, and checks that it exits +// with the given code. +class WaitSpecificChildTest + : public ::testing::TestWithParam<std::function<PosixError(pid_t, int)>> { + protected: + PosixError WaitFor(pid_t pid, int code) { return GetParam()(pid, code); } +}; + +// Wait for specific child to exit. +TEST_P(WaitSpecificChildTest, Fork) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// Non-zero exit codes are correctly propagated. +TEST_P(WaitSpecificChildTest, NormalExit) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 42)); +} + +// Wait for multiple children to exit. +TEST_P(WaitSpecificChildTest, MultipleFork) { + pid_t child1, child2; + ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds()); + ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child1, 0)); + EXPECT_NO_ERRNO(WaitFor(child2, 0)); +} + +// Wait for multiple children to exit, out of the order they were created. +TEST_P(WaitSpecificChildTest, MultipleForkOutOfOrder) { + pid_t child1, child2; + ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds()); + ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child2, 0)); + EXPECT_NO_ERRNO(WaitFor(child1, 0)); +} + +// Wait for specific child to exit, entering wait4 before the exit occurs. +TEST_P(WaitSpecificChildTest, ForkSleep) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// Wait should block until the child exits. +TEST_P(WaitSpecificChildTest, ForkBlock) { + pid_t child; + + auto start = absl::Now(); + ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); + + EXPECT_GE(absl::Now() - start, absl::Seconds(5)); +} + +// Waiting after the child has already exited returns immediately. +TEST_P(WaitSpecificChildTest, AfterExit) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + + absl::SleepFor(absl::Seconds(5)); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// Wait for specific child to exit. +// A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like +// a forked process. +TEST_P(WaitSpecificChildTest, CloneSIGCHLD) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = Cleanup( + [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds()); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// Wait for specific child to exit. +// A non-CLONE_THREAD child which does not send SIGCHLD upon exit can be waited +// on, but returns ECHILD. +TEST_P(WaitSpecificChildTest, CloneNoSIGCHLD) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = Cleanup( + [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds()); + + EXPECT_THAT( + WaitFor(child, 0), + PosixErrorIs(ECHILD, ::testing::AnyOf(::testing::StrEq("waitid"), + ::testing::StrEq("wait4")))); +} + +// Waiting after the child has already exited returns immediately. +TEST_P(WaitSpecificChildTest, CloneAfterExit) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = Cleanup( + [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + // Send SIGCHLD for normal wait semantics. + ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds()); + + absl::SleepFor(absl::Seconds(5)); + + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +// A CLONE_THREAD child cannot be waited on. +TEST_P(WaitSpecificChildTest, CloneThread) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = Cleanup( + [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(15, stack, CLONE_THREAD), SyscallSucceeds()); + auto start = absl::Now(); + + EXPECT_THAT( + WaitFor(child, 0), + PosixErrorIs(ECHILD, ::testing::AnyOf(::testing::StrEq("waitid"), + ::testing::StrEq("wait4")))); + + // Ensure wait4 didn't block. + EXPECT_LE(absl::Now() - start, absl::Seconds(10)); + + // Since we can't wait on the child, we sleep to try to avoid freeing its + // stack before it exits. + absl::SleepFor(absl::Seconds(5)); +} + +// Return ECHILD for bad child. +TEST_P(WaitSpecificChildTest, BadChild) { + EXPECT_THAT( + WaitFor(42, 0), + PosixErrorIs(ECHILD, ::testing::AnyOf(::testing::StrEq("waitid"), + ::testing::StrEq("wait4")))); +} + +// Wait for a child process that only exits after calling execve(2) from a +// non-leader thread. +TEST_P(WaitSpecificChildTest, AfterChildExecve) { + ExecveArray const owned_child_argv = {"/bin/true"}; + char* const* const child_argv = owned_child_argv.get(); + + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = Cleanup( + [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + pid_t const child = fork(); + if (child == 0) { + // Give the parent some time to start waiting. + SleepSafe(absl::Seconds(5)); + // Pass CLONE_VFORK to block the original thread in the child process until + // the clone thread calls execve, annihilating them both. (This means that + // if clone returns at all, something went wrong.) + // + // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's + // x86_64 implementation is safe. See glibc + // sysdeps/unix/sysv/linux/x86_64/clone.S. + clone( + +[](void* arg) { + auto child_argv = static_cast<char* const*>(arg); + execve(child_argv[0], child_argv, /* envp = */ nullptr); + return errno; + }, + reinterpret_cast<void*>(stack), + CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM | + CLONE_VFORK, + const_cast<char**>(child_argv)); + _exit(errno); + } + EXPECT_NO_ERRNO(WaitFor(child, 0)); +} + +INSTANTIATE_TEST_CASE_P( + Waiters, WaitSpecificChildTest, + ::testing::Values( + [](pid_t pid, int code) -> PosixError { + int status; + auto const rv = Wait4(pid, &status, 0, nullptr); + MaybeSave(); + if (rv < 0) { + return PosixError(errno, "wait4"); + } else if (rv != pid) { + return PosixError(EINVAL, absl::StrCat("unexpected pid: got ", rv, + ", wanted ", pid)); + } + if (!WIFEXITED(status) || WEXITSTATUS(status) != code) { + return PosixError( + EINVAL, absl::StrCat("unexpected wait status: got ", status, + ", wanted ", code)); + } + return NoError(); + }, + [](pid_t pid, int code) -> PosixError { + siginfo_t si; + auto const rv = Waitid(P_PID, pid, &si, WEXITED); + MaybeSave(); + if (rv < 0) { + return PosixError(errno, "waitid"); + } + if (si.si_pid != pid) { + return PosixError(EINVAL, + absl::StrCat("unexpected pid: got ", si.si_pid, + ", wanted ", pid)); + } + if (si.si_signo != SIGCHLD) { + return PosixError( + EINVAL, absl::StrCat("unexpected signo: got ", si.si_signo, + ", wanted ", SIGCHLD)); + } + if (si.si_status != code) { + return PosixError( + EINVAL, absl::StrCat("unexpected status: got ", si.si_status, + ", wanted ", code)); + } + if (si.si_code != CLD_EXITED) { + return PosixError(EINVAL, + absl::StrCat("unexpected code: got ", si.si_code, + ", wanted ", CLD_EXITED)); + } + return NoError(); + })); + +// WIFEXITED, WIFSIGNALED, WTERMSIG indicate signal exit. +TEST(WaitTest, SignalExit) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 10), SyscallSucceeds()); + + EXPECT_THAT(kill(child, SIGKILL), SyscallSucceeds()); + + int status; + EXPECT_THAT(Wait4(child, &status, 0, nullptr), + SyscallSucceedsWithValue(child)); + + EXPECT_FALSE(WIFEXITED(status)); + EXPECT_TRUE(WIFSIGNALED(status)); + EXPECT_EQ(SIGKILL, WTERMSIG(status)); +} + +// A child that does not send a SIGCHLD on exit may be waited on with +// the __WCLONE flag. +TEST(WaitTest, CloneWCLONE) { + uintptr_t stack; + ASSERT_THAT(stack = AllocStack(), SyscallSucceeds()); + auto free = Cleanup( + [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); }); + + int child; + ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds()); + + EXPECT_THAT(Wait4(child, nullptr, __WCLONE, nullptr), + SyscallSucceedsWithValue(child)); +} + +// waitid requires at least one option. +TEST(WaitTest, WaitidOptions) { + EXPECT_THAT(Waitid(P_ALL, 0, nullptr, 0), SyscallFailsWithErrno(EINVAL)); +} + +// waitid does not wait for a child to exit if not passed WEXITED. +TEST(WaitTest, WaitidNoWEXITED) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds()); + EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WSTOPPED), + SyscallFailsWithErrno(ECHILD)); + EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WEXITED), SyscallSucceeds()); +} + +// WNOWAIT allows the same wait result to be returned again. +TEST(WaitTest, WaitidWNOWAIT) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds()); + + siginfo_t info; + ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED | WNOWAIT), + SyscallSucceeds()); + EXPECT_EQ(child, info.si_pid); + EXPECT_EQ(SIGCHLD, info.si_signo); + EXPECT_EQ(CLD_EXITED, info.si_code); + EXPECT_EQ(42, info.si_status); + + ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED), SyscallSucceeds()); + EXPECT_EQ(child, info.si_pid); + EXPECT_EQ(SIGCHLD, info.si_signo); + EXPECT_EQ(CLD_EXITED, info.si_code); + EXPECT_EQ(42, info.si_status); + + EXPECT_THAT(Waitid(P_PID, child, &info, WEXITED), + SyscallFailsWithErrno(ECHILD)); +} + +// waitpid(pid, status, options) is equivalent to +// wait4(pid, status, options, nullptr). +// This is a dedicated syscall on i386, glibc maps it to wait4 on amd64. +TEST(WaitTest, WaitPid) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), + SyscallSucceedsWithValue(child)); + + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(42, WEXITSTATUS(status)); +} + +// Test that signaling a zombie succeeds. This is a signals test that is in this +// file for some reason. +TEST(WaitTest, KillZombie) { + pid_t child; + ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds()); + + // Sleep for three seconds to ensure the child has exited. + absl::SleepFor(absl::Seconds(3)); + + // The child is now a zombie. Check that killing it returns 0. + EXPECT_THAT(kill(child, SIGTERM), SyscallSucceeds()); + EXPECT_THAT(kill(child, 0), SyscallSucceeds()); + + EXPECT_THAT(Wait4(child, nullptr, 0, nullptr), + SyscallSucceedsWithValue(child)); +} + +TEST(WaitTest, Wait4Rusage) { + pid_t child; + constexpr absl::Duration kSpin = absl::Seconds(3); + ASSERT_THAT(child = ForkSpinAndExit(21, absl::ToInt64Seconds(kSpin)), + SyscallSucceeds()); + + int status; + struct rusage rusage = {}; + ASSERT_THAT(Wait4(child, &status, 0, &rusage), + SyscallSucceedsWithValue(child)); + + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(21, WEXITSTATUS(status)); + + EXPECT_GE(RusageCpuTime(rusage), kSpin); +} + +TEST(WaitTest, WaitidRusage) { + pid_t child; + constexpr absl::Duration kSpin = absl::Seconds(3); + ASSERT_THAT(child = ForkSpinAndExit(27, absl::ToInt64Seconds(kSpin)), + SyscallSucceeds()); + + siginfo_t si = {}; + struct rusage rusage = {}; + + // From waitid(2): + // The raw waitid() system call takes a fifth argument, of type + // struct rusage *. If this argument is non-NULL, then it is used + // to return resource usage information about the child, in the + // same manner as wait4(2). + EXPECT_THAT( + RetryEINTR(syscall)(SYS_waitid, P_PID, child, &si, WEXITED, &rusage), + SyscallSucceeds()); + EXPECT_EQ(si.si_signo, SIGCHLD); + EXPECT_EQ(si.si_code, CLD_EXITED); + EXPECT_EQ(si.si_status, 27); + EXPECT_EQ(si.si_pid, child); + + EXPECT_GE(RusageCpuTime(rusage), kSpin); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/write.cc b/test/syscalls/linux/write.cc new file mode 100644 index 000000000..ca6aafd18 --- /dev/null +++ b/test/syscalls/linux/write.cc @@ -0,0 +1,134 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <time.h> +#include <unistd.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/cleanup.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { +// This test is currently very rudimentary. +// +// TODO: +// * bad buffer states (EFAULT). +// * bad fds (wrong permission, wrong type of file, EBADF). +// * check offset is incremented. +// * check for EOF. +// * writing to pipes, symlinks, special files. +class WriteTest : public ::testing::Test { + public: + ssize_t WriteBytes(int fd, int bytes) { + std::vector<char> buf(bytes); + std::fill(buf.begin(), buf.end(), 'a'); + return WriteFd(fd, buf.data(), buf.size()); + } +}; + +TEST_F(WriteTest, WriteNoExceedsRLimit) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + int fd; + struct rlimit setlim; + const int target_lim = 1024; + setlim.rlim_cur = target_lim; + setlim.rlim_max = RLIM_INFINITY; + const std::string pathname = NewTempAbsPath(); + ASSERT_THAT(fd = open(pathname.c_str(), O_WRONLY | O_CREAT, S_IRWXU), + SyscallSucceeds()); + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + + EXPECT_THAT(WriteBytes(fd, target_lim), SyscallSucceedsWithValue(target_lim)); + + std::vector<char> buf(target_lim + 1); + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(pwrite(fd, buf.data(), target_lim, 1), SyscallSucceeds()); + EXPECT_THAT(pwrite64(fd, buf.data(), target_lim, 1), SyscallSucceeds()); + + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_F(WriteTest, WriteExceedsRLimit) { + // Get the current rlimit and restore after test run. + struct rlimit initial_lim; + ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + auto cleanup = Cleanup([&initial_lim] { + EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds()); + }); + + int fd; + sigset_t filesize_mask; + sigemptyset(&filesize_mask); + sigaddset(&filesize_mask, SIGXFSZ); + + struct rlimit setlim; + const int target_lim = 1024; + setlim.rlim_cur = target_lim; + setlim.rlim_max = RLIM_INFINITY; + + const std::string pathname = NewTempAbsPath(); + ASSERT_THAT(fd = open(pathname.c_str(), O_WRONLY | O_CREAT, S_IRWXU), + SyscallSucceeds()); + ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds()); + ASSERT_THAT(sigprocmask(SIG_BLOCK, &filesize_mask, nullptr), + SyscallSucceeds()); + std::vector<char> buf(target_lim + 2); + std::fill(buf.begin(), buf.end(), 'a'); + + EXPECT_THAT(write(fd, buf.data(), target_lim + 1), + SyscallSucceedsWithValue(target_lim)); + EXPECT_THAT(write(fd, buf.data(), 1), SyscallFailsWithErrno(EFBIG)); + struct timespec timelimit = {0, 0}; + EXPECT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, nullptr, &timelimit), + SyscallSucceedsWithValue(SIGXFSZ)); + + EXPECT_THAT(pwrite(fd, buf.data(), target_lim + 1, 1), + SyscallSucceedsWithValue(target_lim - 1)); + EXPECT_THAT(pwrite(fd, buf.data(), 1, target_lim), + SyscallFailsWithErrno(EFBIG)); + EXPECT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, nullptr, &timelimit), + SyscallSucceedsWithValue(SIGXFSZ)); + + EXPECT_THAT(pwrite64(fd, buf.data(), target_lim + 1, 1), + SyscallSucceedsWithValue(target_lim - 1)); + EXPECT_THAT(pwrite64(fd, buf.data(), 1, target_lim), + SyscallFailsWithErrno(EFBIG)); + EXPECT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, nullptr, &timelimit), + SyscallSucceedsWithValue(SIGXFSZ)); + + ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &filesize_mask, nullptr), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor |