diff options
Diffstat (limited to 'test/syscalls/linux/proc.cc')
-rw-r--r-- | test/syscalls/linux/proc.cc | 2173 |
1 files changed, 2173 insertions, 0 deletions
diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc new file mode 100644 index 000000000..d6b875dbf --- /dev/null +++ b/test/syscalls/linux/proc.cc @@ -0,0 +1,2173 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <elf.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <signal.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/stat.h> +#include <sys/utsname.h> +#include <syscall.h> +#include <unistd.h> + +#include <algorithm> +#include <atomic> +#include <functional> +#include <iostream> +#include <map> +#include <memory> +#include <ostream> +#include <regex> +#include <string> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" +#include "test/util/time_util.h" +#include "test/util/timer_util.h" + +// NOTE(magi): No, this isn't really a syscall but this is a really simple +// way to get it tested on both gVisor, PTrace and Linux. + +using ::testing::AllOf; +using ::testing::AnyOf; +using ::testing::ContainerEq; +using ::testing::Contains; +using ::testing::ContainsRegex; +using ::testing::Eq; +using ::testing::Gt; +using ::testing::HasSubstr; +using ::testing::IsSupersetOf; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; +using ::testing::UnorderedElementsAreArray; + +// Exported by glibc. +extern char** environ; + +namespace gvisor { +namespace testing { +namespace { + +#ifndef SUID_DUMP_DISABLE +#define SUID_DUMP_DISABLE 0 +#endif /* SUID_DUMP_DISABLE */ +#ifndef SUID_DUMP_USER +#define SUID_DUMP_USER 1 +#endif /* SUID_DUMP_USER */ +#ifndef SUID_DUMP_ROOT +#define SUID_DUMP_ROOT 2 +#endif /* SUID_DUMP_ROOT */ + +#if defined(__x86_64__) || defined(__i386__) +// This list of "required" fields is taken from reading the file +// arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally +// printed by the kernel. +static const char* required_fields[] = { + "processor", + "vendor_id", + "cpu family", + "model\t\t:", + "model name", + "stepping", + "cpu MHz", + "fpu\t\t:", + "fpu_exception", + "cpuid level", + "wp", + "bogomips", + "clflush size", + "cache_alignment", + "address sizes", + "power management", +}; +#elif __aarch64__ +// This list of "required" fields is taken from reading the file +// arch/arm64/kernel/cpuinfo.c and seeing which fields will be unconditionally +// printed by the kernel. +static const char* required_fields[] = { + "processor", "BogoMIPS", "Features", "CPU implementer", + "CPU architecture", "CPU variant", "CPU part", "CPU revision", +}; +#else +#error "Unknown architecture" +#endif + +// Takes the subprocess command line and pid. +// If it returns !OK, WithSubprocess returns immediately. +using SubprocessCallback = std::function<PosixError(int)>; + +std::vector<std::string> saved_argv; // NOLINT + +// Helper function to dump /proc/{pid}/status and check the +// state data. State should = "Z" for zombied or "RSD" for +// running, interruptible sleeping (S), or uninterruptible sleep +// (D). +void CompareProcessState(absl::string_view state, int pid) { + auto status_file = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", pid, "/status"))); + // N.B. POSIX extended regexes don't support shorthand character classes (\w) + // inside of brackets. + EXPECT_THAT(status_file, + ContainsRegex(absl::StrCat("State:.[", state, + R"EOL(]\s+\([a-zA-Z ]+\))EOL"))); +} + +// Run callbacks while a subprocess is running, zombied, and/or exited. +PosixError WithSubprocess(SubprocessCallback const& running, + SubprocessCallback const& zombied, + SubprocessCallback const& exited) { + int pipe_fds[2] = {}; + if (pipe(pipe_fds) < 0) { + return PosixError(errno, "pipe"); + } + + int child_pid = fork(); + if (child_pid < 0) { + return PosixError(errno, "fork"); + } + + if (child_pid == 0) { + close(pipe_fds[0]); // Close the read end. + const DisableSave ds; // Timing issues. + + // Write to the pipe to tell it we're ready. + char buf = 'a'; + int res = 0; + res = WriteFd(pipe_fds[1], &buf, sizeof(buf)); + TEST_CHECK_MSG(res == sizeof(buf), "Write failure in subprocess"); + + while (true) { + SleepSafe(absl::Milliseconds(100)); + } + } + + close(pipe_fds[1]); // Close the write end. + + int status = 0; + auto wait_cleanup = Cleanup([child_pid, &status] { + EXPECT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); + }); + auto kill_cleanup = Cleanup([child_pid] { + EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); + }); + + // Wait for the child. + char buf = 0; + int res = ReadFd(pipe_fds[0], &buf, sizeof(buf)); + if (res < 0) { + return PosixError(errno, "Read from pipe"); + } else if (res == 0) { + return PosixError(EPIPE, "Unable to read from pipe: EOF"); + } + + if (running) { + // The first arg, RSD, refers to a "running process", or a process with a + // state of Running (R), Interruptable Sleep (S) or Uninterruptable + // Sleep (D). + CompareProcessState("RSD", child_pid); + RETURN_IF_ERRNO(running(child_pid)); + } + + // Kill the process. + kill_cleanup.Release()(); + siginfo_t info; + // Wait until the child process has exited (WEXITED flag) but don't + // reap the child (WNOWAIT flag). + EXPECT_THAT(waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED), + SyscallSucceeds()); + + if (zombied) { + // Arg of "Z" refers to a Zombied Process. + CompareProcessState("Z", child_pid); + RETURN_IF_ERRNO(zombied(child_pid)); + } + + // Wait on the process. + wait_cleanup.Release()(); + // If the process is reaped, then then this should return + // with ECHILD. + EXPECT_THAT(waitpid(child_pid, &status, WNOHANG), + SyscallFailsWithErrno(ECHILD)); + + if (exited) { + RETURN_IF_ERRNO(exited(child_pid)); + } + + return NoError(); +} + +// Access the file returned by name when a subprocess is running. +PosixError AccessWhileRunning(std::function<std::string(int pid)> name, + int flags, std::function<void(int fd)> access) { + FileDescriptor fd; + return WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); + + access(fd.get()); + return NoError(); + }, + nullptr, nullptr); +} + +// Access the file returned by name when the a subprocess is zombied. +PosixError AccessWhileZombied(std::function<std::string(int pid)> name, + int flags, std::function<void(int fd)> access) { + FileDescriptor fd; + return WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); + return NoError(); + }, + [&](int pid) -> PosixError { + // Zombied. + access(fd.get()); + return NoError(); + }, + nullptr); +} + +// Access the file returned by name when the a subprocess is exited. +PosixError AccessWhileExited(std::function<std::string(int pid)> name, + int flags, std::function<void(int fd)> access) { + FileDescriptor fd; + return WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); + return NoError(); + }, + nullptr, + [&](int pid) -> PosixError { + // Exited. + access(fd.get()); + return NoError(); + }); +} + +// ReadFd(fd=/proc/PID/basename) while PID is running. +int ReadWhileRunning(std::string const& basename, void* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileRunning( + [&](int pid) -> std::string { + return absl::StrCat("/proc/", pid, "/", basename); + }, + O_RDONLY, + [&](int fd) { + ret = ReadFd(fd, buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// ReadFd(fd=/proc/PID/basename) while PID is zombied. +int ReadWhileZombied(std::string const& basename, void* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileZombied( + [&](int pid) -> std::string { + return absl::StrCat("/proc/", pid, "/", basename); + }, + O_RDONLY, + [&](int fd) { + ret = ReadFd(fd, buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// ReadFd(fd=/proc/PID/basename) while PID is exited. +int ReadWhileExited(std::string const& basename, void* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileExited( + [&](int pid) -> std::string { + return absl::StrCat("/proc/", pid, "/", basename); + }, + O_RDONLY, + [&](int fd) { + ret = ReadFd(fd, buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// readlinkat(fd=/proc/PID/, basename) while PID is running. +int ReadlinkWhileRunning(std::string const& basename, char* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileRunning( + [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, + O_DIRECTORY, + [&](int fd) { + ret = readlinkat(fd, basename.c_str(), buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// readlinkat(fd=/proc/PID/, basename) while PID is zombied. +int ReadlinkWhileZombied(std::string const& basename, char* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileZombied( + [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, + O_DIRECTORY, + [&](int fd) { + ret = readlinkat(fd, basename.c_str(), buf, count); + err = errno; + })); + errno = err; + return ret; +} + +// readlinkat(fd=/proc/PID/, basename) while PID is exited. +int ReadlinkWhileExited(std::string const& basename, char* buf, size_t count) { + int ret = 0; + int err = 0; + EXPECT_NO_ERRNO(AccessWhileExited( + [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, + O_DIRECTORY, + [&](int fd) { + ret = readlinkat(fd, basename.c_str(), buf, count); + err = errno; + })); + errno = err; + return ret; +} + +TEST(ProcTest, NotFoundInRoot) { + struct stat s; + EXPECT_THAT(stat("/proc/foobar", &s), SyscallFailsWithErrno(ENOENT)); +} + +TEST(ProcSelfTest, IsThreadGroupLeader) { + ScopedThread([] { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_NE(tgid, tid); + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self")); + EXPECT_EQ(link, absl::StrCat(tgid)); + }); +} + +TEST(ProcThreadSelfTest, Basic) { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_EQ(tgid, tid); + auto link_threadself = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); + EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); + // Just read one file inside thread-self to ensure that the link is valid. + auto link_threadself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); + auto link_procself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); + EXPECT_EQ(link_threadself_exe, link_procself_exe); +} + +TEST(ProcThreadSelfTest, Thread) { + ScopedThread([] { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_NE(tgid, tid); + auto link_threadself = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); + + EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); + // Just read one file inside thread-self to ensure that the link is valid. + auto link_threadself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); + auto link_procself_exe = + ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); + EXPECT_EQ(link_threadself_exe, link_procself_exe); + // A thread should not have "/proc/<tid>/task". + struct stat s; + EXPECT_THAT(stat("/proc/thread-self/task", &s), + SyscallFailsWithErrno(ENOENT)); + }); +} + +// Returns the /proc/PID/maps entry for the MAP_PRIVATE | MAP_ANONYMOUS mapping +// m with start address addr and length len. +std::string AnonymousMapsEntry(uintptr_t addr, size_t len, int prot) { + return absl::StrCat(absl::Hex(addr, absl::PadSpec::kZeroPad8), "-", + absl::Hex(addr + len, absl::PadSpec::kZeroPad8), " ", + prot & PROT_READ ? "r" : "-", + prot & PROT_WRITE ? "w" : "-", + prot & PROT_EXEC ? "x" : "-", "p 00000000 00:00 0 "); +} + +std::string AnonymousMapsEntryForMapping(const Mapping& m, int prot) { + return AnonymousMapsEntry(m.addr(), m.len(), prot); +} + +PosixErrorOr<std::map<uint64_t, uint64_t>> ReadProcSelfAuxv() { + std::string auxv_file; + RETURN_IF_ERRNO(GetContents("/proc/self/auxv", &auxv_file)); + const Elf64_auxv_t* auxv_data = + reinterpret_cast<const Elf64_auxv_t*>(auxv_file.data()); + std::map<uint64_t, uint64_t> auxv_entries; + for (int i = 0; auxv_data[i].a_type != AT_NULL; i++) { + auto a_type = auxv_data[i].a_type; + EXPECT_EQ(0, auxv_entries.count(a_type)) << "a_type: " << a_type; + auxv_entries.emplace(a_type, auxv_data[i].a_un.a_val); + } + return auxv_entries; +} + +TEST(ProcSelfAuxv, EntryPresence) { + auto auxv_entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); + + EXPECT_EQ(auxv_entries.count(AT_ENTRY), 1); + EXPECT_EQ(auxv_entries.count(AT_PHDR), 1); + EXPECT_EQ(auxv_entries.count(AT_PHENT), 1); + EXPECT_EQ(auxv_entries.count(AT_PHNUM), 1); + EXPECT_EQ(auxv_entries.count(AT_BASE), 1); + EXPECT_EQ(auxv_entries.count(AT_UID), 1); + EXPECT_EQ(auxv_entries.count(AT_EUID), 1); + EXPECT_EQ(auxv_entries.count(AT_GID), 1); + EXPECT_EQ(auxv_entries.count(AT_EGID), 1); + EXPECT_EQ(auxv_entries.count(AT_SECURE), 1); + EXPECT_EQ(auxv_entries.count(AT_CLKTCK), 1); + EXPECT_EQ(auxv_entries.count(AT_RANDOM), 1); + EXPECT_EQ(auxv_entries.count(AT_EXECFN), 1); + EXPECT_EQ(auxv_entries.count(AT_PAGESZ), 1); + EXPECT_EQ(auxv_entries.count(AT_SYSINFO_EHDR), 1); +} + +TEST(ProcSelfAuxv, EntryValues) { + auto proc_auxv = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); + + // We need to find the ELF auxiliary vector. The section of memory pointed to + // by envp contains some pointers to non-null pointers, followed by a single + // pointer to a null pointer, followed by the auxiliary vector. + char** envpi = environ; + while (*envpi) { + ++envpi; + } + + const Elf64_auxv_t* envp_auxv = + reinterpret_cast<const Elf64_auxv_t*>(envpi + 1); + int i; + for (i = 0; envp_auxv[i].a_type != AT_NULL; i++) { + auto a_type = envp_auxv[i].a_type; + EXPECT_EQ(proc_auxv.count(a_type), 1); + EXPECT_EQ(proc_auxv[a_type], envp_auxv[i].a_un.a_val) + << "a_type: " << a_type; + } + EXPECT_EQ(i, proc_auxv.size()); +} + +// Just open and read /proc/self/maps, check that we can find [stack] +TEST(ProcSelfMaps, Basic) { + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> stacks; + // Make sure there's a stack in there. + for (const auto& str : strings) { + if (str.find("[stack]") != std::string::npos) { + stacks.push_back(str); + } + } + ASSERT_EQ(1, stacks.size()) << "[stack] not found in: " << proc_self_maps; + // Linux pads to 73 characters then we add 7. + EXPECT_EQ(80, stacks[0].length()); +} + +TEST(ProcSelfMaps, Map1) { + Mapping mapping = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_READ, MAP_PRIVATE)); + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> addrs; + // Make sure if is listed. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(mapping, PROT_READ)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); +} + +TEST(ProcSelfMaps, Map2) { + // NOTE(magi): The permissions must be different or the pages will get merged. + Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); + Mapping map2 = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); + + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> addrs; + // Make sure if is listed. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); + addrs.clear(); + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); +} + +TEST(ProcSelfMaps, MapUnmap) { + Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); + Mapping map2 = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); + + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + std::vector<std::string> addrs; + // Make sure if is listed. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()) << proc_self_maps; + addrs.clear(); + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); + + map2.reset(); + + // Read it again. + proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + strings = absl::StrSplit(proc_self_maps, '\n'); + // First entry should be there. + addrs.clear(); + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { + addrs.push_back(str); + } + } + ASSERT_EQ(1, addrs.size()); + addrs.clear(); + // But not the second. + for (const auto& str : strings) { + if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { + addrs.push_back(str); + } + } + ASSERT_EQ(0, addrs.size()); +} + +TEST(ProcSelfMaps, Mprotect) { + // FIXME(jamieliu): Linux's mprotect() sometimes fails to merge VMAs in this + // case. + SKIP_IF(!IsRunningOnGvisor()); + + // Reserve 5 pages of address space. + Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(5 * kPageSize, PROT_NONE, MAP_PRIVATE)); + + // Change the permissions on the middle 3 pages. (The first and last pages may + // be merged with other vmas on either side, so they aren't tested directly; + // they just ensure that the middle 3 pages are bracketed by VMAs with + // incompatible permissions.) + ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + kPageSize), + 3 * kPageSize, PROT_READ), + SyscallSucceeds()); + + // Check that the middle 3 pages make up a single VMA. + auto proc_self_maps = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); + EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, + 3 * kPageSize, PROT_READ))); + + // Change the permissions on the middle page only. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), + kPageSize, PROT_READ | PROT_WRITE), + SyscallSucceeds()); + + // Check that the single VMA has been split into 3 VMAs. + proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + strings = absl::StrSplit(proc_self_maps, '\n'); + EXPECT_THAT( + strings, + IsSupersetOf( + {AnonymousMapsEntry(m.addr() + kPageSize, kPageSize, PROT_READ), + AnonymousMapsEntry(m.addr() + 2 * kPageSize, kPageSize, + PROT_READ | PROT_WRITE), + AnonymousMapsEntry(m.addr() + 3 * kPageSize, kPageSize, + PROT_READ)})); + + // Change the permissions on the middle page back. + ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), + kPageSize, PROT_READ), + SyscallSucceeds()); + + // Check that the 3 VMAs have been merged back into a single VMA. + proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); + strings = absl::StrSplit(proc_self_maps, '\n'); + EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, + 3 * kPageSize, PROT_READ))); +} + +TEST(ProcSelfFd, OpenFd) { + int pipe_fds[2]; + ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds()); + + // Reopen the write end. + const std::string path = absl::StrCat("/proc/self/fd/", pipe_fds[1]); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_WRONLY)); + + // Ensure that a read/write works. + const std::string data = "hello"; + std::unique_ptr<char[]> buffer(new char[data.size()]); + EXPECT_THAT(write(fd.get(), data.c_str(), data.size()), + SyscallSucceedsWithValue(5)); + EXPECT_THAT(read(pipe_fds[0], buffer.get(), data.size()), + SyscallSucceedsWithValue(5)); + EXPECT_EQ(strncmp(buffer.get(), data.c_str(), data.size()), 0); + + // Cleanup. + ASSERT_THAT(close(pipe_fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(pipe_fds[1]), SyscallSucceeds()); +} + +TEST(ProcSelfFdInfo, CorrectFds) { + // Make sure there is at least one open file. + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); + + // Get files in /proc/self/fd. + auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fd", false)); + + // Get files in /proc/self/fdinfo. + auto fdinfo_files = + ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fdinfo", false)); + + // They should contain the same fds. + EXPECT_THAT(fd_files, UnorderedElementsAreArray(fdinfo_files)); + + // Both should contain fd. + auto fd_s = absl::StrCat(fd.get()); + EXPECT_THAT(fd_files, Contains(fd_s)); +} + +TEST(ProcSelfFdInfo, Flags) { + std::string path = NewTempAbsPath(); + + // Create file here with O_CREAT to test that O_CREAT does not appear in + // fdinfo flags. + int flags = O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC; + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, flags, 0644)); + + // Automatically delete path. + TempPath temp_path(path); + + // O_CREAT does not appear in fdinfo flags. + flags &= ~O_CREAT; + + // O_LARGEFILE always appears (on x86_64). + flags |= kOLargeFile; + + auto fd_info = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/self/fdinfo/", fd.get()))); + EXPECT_THAT(fd_info, HasSubstr(absl::StrFormat("flags:\t%#o", flags))); +} + +TEST(ProcSelfExe, Absolute) { + auto exe = ASSERT_NO_ERRNO_AND_VALUE( + ReadLink(absl::StrCat("/proc/", getpid(), "/exe"))); + EXPECT_EQ(exe[0], '/'); +} + +// Sanity check for /proc/cpuinfo fields that must be present. +TEST(ProcCpuinfo, RequiredFieldsArePresent) { + std::string proc_cpuinfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo")); + ASSERT_FALSE(proc_cpuinfo.empty()); + std::vector<std::string> cpuinfo_fields = absl::StrSplit(proc_cpuinfo, '\n'); + + // Check that the usual fields are there. We don't really care about the + // contents. + for (const std::string& field : required_fields) { + EXPECT_THAT(proc_cpuinfo, HasSubstr(field)); + } +} + +TEST(ProcCpuinfo, DeniesWriteNonRoot) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + // Also drops capabilities. + constexpr int kNobody = 65534; + EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds()); + EXPECT_THAT(open("/proc/cpuinfo", O_WRONLY), SyscallFailsWithErrno(EACCES)); + // TODO(gvisor.dev/issue/1193): Properly support setting size attributes in + // kernfs. + if (!IsRunningOnGvisor() || IsRunningWithVFS1()) { + EXPECT_THAT(truncate("/proc/cpuinfo", 123), + SyscallFailsWithErrno(EACCES)); + } + }); +} + +// With root privileges, it is possible to open /proc/cpuinfo with write mode, +// but all write operations will return EIO. +TEST(ProcCpuinfo, DeniesWriteRoot) { + // VFS1 does not behave differently for root/non-root. + SKIP_IF(IsRunningWithVFS1()); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); + + int fd; + EXPECT_THAT(fd = open("/proc/cpuinfo", O_WRONLY), SyscallSucceeds()); + if (fd > 0) { + EXPECT_THAT(write(fd, "x", 1), SyscallFailsWithErrno(EIO)); + EXPECT_THAT(pwrite(fd, "x", 1, 123), SyscallFailsWithErrno(EIO)); + } + // TODO(gvisor.dev/issue/1193): Properly support setting size attributes in + // kernfs. + if (!IsRunningOnGvisor() || IsRunningWithVFS1()) { + if (fd > 0) { + EXPECT_THAT(ftruncate(fd, 123), SyscallFailsWithErrno(EIO)); + } + EXPECT_THAT(truncate("/proc/cpuinfo", 123), SyscallFailsWithErrno(EIO)); + } +} + +// Sanity checks that uptime is present. +TEST(ProcUptime, IsPresent) { + std::string proc_uptime = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); + ASSERT_FALSE(proc_uptime.empty()); + std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' '); + + // Parse once. + double uptime0, uptime1, idletime0, idletime1; + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime0)); + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime0)); + + // Sleep for one second. + absl::SleepFor(absl::Seconds(1)); + + // Parse again. + proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); + ASSERT_FALSE(proc_uptime.empty()); + uptime_parts = absl::StrSplit(proc_uptime, ' '); + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime1)); + ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime1)); + + // Sanity check. + // + // We assert that between 0.99 and 59.99 seconds have passed. If more than a + // minute has passed, then we must be executing really, really slowly. + EXPECT_GE(uptime0, 0.0); + EXPECT_GE(idletime0, 0.0); + EXPECT_GT(uptime1, uptime0); + EXPECT_GE(uptime1, uptime0 + 0.99); + EXPECT_LE(uptime1, uptime0 + 59.99); + EXPECT_GE(idletime1, idletime0); +} + +TEST(ProcMeminfo, ContainsBasicFields) { + std::string proc_meminfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo")); + EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"), + ContainsRegex(R"(MemFree:\s+[0-9]+ kB)"))); +} + +TEST(ProcStat, ContainsBasicFields) { + std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + + std::vector<std::string> names; + for (auto const& line : absl::StrSplit(proc_stat, '\n')) { + std::vector<std::string> fields = + absl::StrSplit(line, ' ', absl::SkipWhitespace()); + if (fields.empty()) { + continue; + } + names.push_back(fields[0]); + } + + EXPECT_THAT(names, + IsSupersetOf({"cpu", "intr", "ctxt", "btime", "processes", + "procs_running", "procs_blocked", "softirq"})); +} + +TEST(ProcStat, EndsWithNewline) { + std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + EXPECT_EQ(proc_stat.back(), '\n'); +} + +TEST(ProcStat, Fields) { + std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); + + std::vector<std::string> names; + for (auto const& line : absl::StrSplit(proc_stat, '\n')) { + std::vector<std::string> fields = + absl::StrSplit(line, ' ', absl::SkipWhitespace()); + if (fields.empty()) { + continue; + } + + if (absl::StartsWith(fields[0], "cpu")) { + // As of Linux 3.11, each CPU entry has 10 fields, plus the name. + EXPECT_GE(fields.size(), 11) << proc_stat; + } else if (fields[0] == "ctxt") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "btime") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "itime") { + // Single field. + ASSERT_EQ(fields.size(), 2) << proc_stat; + // This is the only floating point field. + double val; + EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_stat; + continue; + } else if (fields[0] == "processes") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "procs_running") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "procs_blocked") { + // Single field. + EXPECT_EQ(fields.size(), 2) << proc_stat; + } else if (fields[0] == "softirq") { + // As of Linux 3.11, there are 10 softirqs. 12 fields for name + total. + EXPECT_GE(fields.size(), 12) << proc_stat; + } + + // All fields besides itime are valid base 10 numbers. + for (size_t i = 1; i < fields.size(); i++) { + uint64_t val; + EXPECT_TRUE(absl::SimpleAtoi(fields[i], &val)) << proc_stat; + } + } +} + +TEST(ProcLoadavg, EndsWithNewline) { + std::string proc_loadvg = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); + EXPECT_EQ(proc_loadvg.back(), '\n'); +} + +TEST(ProcLoadavg, Fields) { + std::string proc_loadvg = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); + std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n'); + + // Single line. + EXPECT_EQ(lines.size(), 2) << proc_loadvg; + + std::vector<std::string> fields = + absl::StrSplit(lines[0], absl::ByAnyChar(" /"), absl::SkipWhitespace()); + + // Six fields. + EXPECT_EQ(fields.size(), 6) << proc_loadvg; + + double val; + uint64_t val2; + // First three fields are floating point numbers. + EXPECT_TRUE(absl::SimpleAtod(fields[0], &val)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtod(fields[2], &val)) << proc_loadvg; + // Rest of the fields are valid base 10 numbers. + EXPECT_TRUE(absl::SimpleAtoi(fields[3], &val2)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtoi(fields[4], &val2)) << proc_loadvg; + EXPECT_TRUE(absl::SimpleAtoi(fields[5], &val2)) << proc_loadvg; +} + +// NOTE: Tests in priority.cc also check certain priority related fields in +// /proc/self/stat. + +class ProcPidStatTest : public ::testing::TestWithParam<std::string> {}; + +TEST_P(ProcPidStatTest, HasBasicFields) { + std::string proc_pid_stat = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", GetParam(), "/stat"))); + + ASSERT_FALSE(proc_pid_stat.empty()); + std::vector<std::string> fields = absl::StrSplit(proc_pid_stat, ' '); + ASSERT_GE(fields.size(), 24); + EXPECT_EQ(absl::StrCat(getpid()), fields[0]); + // fields[1] is the thread name. + EXPECT_EQ("R", fields[2]); // task state + EXPECT_EQ(absl::StrCat(getppid()), fields[3]); + + // If the test starts up quickly, then the process start time and the kernel + // boot time will be very close, and the proc starttime field (which is the + // delta of the two times) will be 0. For that unfortunate reason, we can + // only check that starttime >= 0, and not that it is strictly > 0. + uint64_t starttime; + ASSERT_TRUE(absl::SimpleAtoi(fields[21], &starttime)); + EXPECT_GE(starttime, 0); + + uint64_t vss; + ASSERT_TRUE(absl::SimpleAtoi(fields[22], &vss)); + EXPECT_GT(vss, 0); + + uint64_t rss; + ASSERT_TRUE(absl::SimpleAtoi(fields[23], &rss)); + EXPECT_GT(rss, 0); + + uint64_t rsslim; + ASSERT_TRUE(absl::SimpleAtoi(fields[24], &rsslim)); + EXPECT_GT(rsslim, 0); +} + +INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatTest, + ::testing::Values("self", absl::StrCat(getpid()))); + +using ProcPidStatmTest = ::testing::TestWithParam<std::string>; + +TEST_P(ProcPidStatmTest, HasBasicFields) { + std::string proc_pid_statm = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", GetParam(), "/statm"))); + ASSERT_FALSE(proc_pid_statm.empty()); + std::vector<std::string> fields = absl::StrSplit(proc_pid_statm, ' '); + ASSERT_GE(fields.size(), 7); + + uint64_t vss; + ASSERT_TRUE(absl::SimpleAtoi(fields[0], &vss)); + EXPECT_GT(vss, 0); + + uint64_t rss; + ASSERT_TRUE(absl::SimpleAtoi(fields[1], &rss)); + EXPECT_GT(rss, 0); +} + +INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatmTest, + ::testing::Values("self", absl::StrCat(getpid()))); + +PosixErrorOr<uint64_t> CurrentRSS() { + ASSIGN_OR_RETURN_ERRNO(auto proc_self_stat, GetContents("/proc/self/stat")); + if (proc_self_stat.empty()) { + return PosixError(EINVAL, "empty /proc/self/stat"); + } + + std::vector<std::string> fields = absl::StrSplit(proc_self_stat, ' '); + if (fields.size() < 24) { + return PosixError( + EINVAL, + absl::StrCat("/proc/self/stat has too few fields: ", proc_self_stat)); + } + + uint64_t rss; + if (!absl::SimpleAtoi(fields[23], &rss)) { + return PosixError( + EINVAL, absl::StrCat("/proc/self/stat RSS field is not a number: ", + fields[23])); + } + + // RSS is given in number of pages. + return rss * kPageSize; +} + +// The size of mapping created by MapPopulateRSS. +constexpr uint64_t kMappingSize = 100 << 20; + +// Tolerance on RSS comparisons to account for background thread mappings, +// reclaimed pages, newly faulted pages, etc. +constexpr uint64_t kRSSTolerance = 10 << 20; + +// Capture RSS before and after an anonymous mapping with passed prot. +void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) { + *before = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); + + // N.B. The kernel asynchronously accumulates per-task RSS counters into the + // mm RSS, which is exposed by /proc/PID/stat. Task exit is a synchronization + // point (kernel/exit.c:do_exit -> sync_mm_rss), so perform the mapping on + // another thread to ensure it is reflected in RSS after the thread exits. + Mapping mapping; + ScopedThread t([&mapping, prot] { + mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kMappingSize, prot, MAP_PRIVATE | MAP_POPULATE)); + }); + t.Join(); + + *after = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); +} + +// TODO(b/73896574): Test for PROT_READ + MAP_POPULATE anonymous mappings. Their +// semantics are more subtle: +// +// Small pages -> Zero page mapped, not counted in RSS +// (mm/memory.c:do_anonymous_page). +// +// Huge pages (THP enabled, use_zero_page=0) -> Pages committed +// (mm/memory.c:__handle_mm_fault -> create_huge_pmd). +// +// Huge pages (THP enabled, use_zero_page=1) -> Zero page mapped, not counted in +// RSS (mm/huge_memory.c:do_huge_pmd_anonymous_page). + +// PROT_WRITE + MAP_POPULATE anonymous mappings are always committed. +TEST(ProcSelfStat, PopulateWriteRSS) { + uint64_t before, after; + MapPopulateRSS(PROT_READ | PROT_WRITE, &before, &after); + + // Mapping is committed. + EXPECT_NEAR(before + kMappingSize, after, kRSSTolerance); +} + +// PROT_NONE + MAP_POPULATE anonymous mappings are never committed. +TEST(ProcSelfStat, PopulateNoneRSS) { + uint64_t before, after; + MapPopulateRSS(PROT_NONE, &before, &after); + + // Mapping not committed. + EXPECT_NEAR(before, after, kRSSTolerance); +} + +// Returns the calling thread's name. +PosixErrorOr<std::string> ThreadName() { + // "The buffer should allow space for up to 16 bytes; the returned std::string + // will be null-terminated if it is shorter than that." - prctl(2). But we + // always want the thread name to be null-terminated. + char thread_name[17]; + int rc = prctl(PR_GET_NAME, thread_name, 0, 0, 0); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "prctl(PR_GET_NAME)"); + } + thread_name[16] = '\0'; + return std::string(thread_name); +} + +// Parses the contents of a /proc/[pid]/status file into a collection of +// key-value pairs. +PosixErrorOr<std::map<std::string, std::string>> ParseProcStatus( + absl::string_view status_str) { + std::map<std::string, std::string> fields; + for (absl::string_view const line : + absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { + const std::pair<absl::string_view, absl::string_view> kv = + absl::StrSplit(line, absl::MaxSplits(":\t", 1)); + if (kv.first.empty()) { + return PosixError( + EINVAL, absl::StrCat("failed to parse key in line \"", line, "\"")); + } + std::string key(kv.first); + if (fields.count(key)) { + return PosixError(EINVAL, + absl::StrCat("duplicate key \"", kv.first, "\"")); + } + std::string value(kv.second); + absl::StripLeadingAsciiWhitespace(&value); + fields.emplace(std::move(key), std::move(value)); + } + return fields; +} + +TEST(ParseProcStatusTest, ParsesSimpleStatusFileWithMixedWhitespaceCorrectly) { + EXPECT_THAT( + ParseProcStatus( + "Name:\tinit\nState:\tS (sleeping)\nCapEff:\t 0000001fffffffff\n"), + IsPosixErrorOkAndHolds(UnorderedElementsAre( + Pair("Name", "init"), Pair("State", "S (sleeping)"), + Pair("CapEff", "0000001fffffffff")))); +} + +TEST(ParseProcStatusTest, DetectsDuplicateKeys) { + auto proc_status_or = ParseProcStatus("Name:\tfoo\nName:\tfoo\n"); + EXPECT_THAT(proc_status_or, + PosixErrorIs(EINVAL, ::testing::StrEq("duplicate key \"Name\""))); +} + +TEST(ParseProcStatusTest, DetectsMissingTabs) { + EXPECT_THAT(ParseProcStatus("Name:foo\nPid: 1\n"), + IsPosixErrorOkAndHolds(UnorderedElementsAre(Pair("Name:foo", ""), + Pair("Pid: 1", "")))); +} + +TEST(ProcPidStatusTest, HasBasicFields) { + // Do this on a separate thread since we want tgid != tid. + ScopedThread([] { + const pid_t tgid = getpid(); + const pid_t tid = syscall(SYS_gettid); + EXPECT_NE(tgid, tid); + const auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(ThreadName()); + + std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", tid, "/status"))); + + ASSERT_FALSE(status_str.empty()); + const auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); + EXPECT_THAT(status, IsSupersetOf({Pair("Name", thread_name), + Pair("Tgid", absl::StrCat(tgid)), + Pair("Pid", absl::StrCat(tid)), + Pair("PPid", absl::StrCat(getppid()))})); + }); +} + +TEST(ProcPidStatusTest, StateRunning) { + // Task must be running when reading the file. + const pid_t tid = syscall(SYS_gettid); + std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(absl::StrCat("/proc/", tid, "/status"))); + + EXPECT_THAT(ParseProcStatus(status_str), + IsPosixErrorOkAndHolds(Contains(Pair("State", "R (running)")))); +} + +TEST(ProcPidStatusTest, StateSleeping_NoRandomSave) { + // Starts a child process that blocks and checks that State is sleeping. + auto res = WithSubprocess( + [&](int pid) -> PosixError { + // Because this test is timing based we will disable cooperative saving + // and the test itself also has random saving disabled. + const DisableSave ds; + // Try multiple times in case the child isn't sleeping when status file + // is read. + MonotonicTimer timer; + timer.Start(); + for (;;) { + ASSIGN_OR_RETURN_ERRNO( + std::string status_str, + GetContents(absl::StrCat("/proc/", pid, "/status"))); + ASSIGN_OR_RETURN_ERRNO(auto map, ParseProcStatus(status_str)); + if (map["State"] == std::string("S (sleeping)")) { + // Test passed! + return NoError(); + } + if (timer.Duration() > absl::Seconds(10)) { + return PosixError(ETIMEDOUT, "Timeout waiting for child to sleep"); + } + absl::SleepFor(absl::Milliseconds(10)); + } + }, + nullptr, nullptr); + ASSERT_NO_ERRNO(res); +} + +TEST(ProcPidStatusTest, ValuesAreTabDelimited) { + std::string status_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); + ASSERT_FALSE(status_str.empty()); + for (absl::string_view const line : + absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { + EXPECT_NE(std::string::npos, line.find(":\t")); + } +} + +// Threads properly counts running threads. +// +// TODO(mpratt): Test zombied threads while the thread group leader is still +// running with generalized fork and clone children from the wait test. +TEST(ProcPidStatusTest, Threads) { + char buf[4096] = {}; + EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf) - 1), + SyscallSucceedsWithValue(Gt(0))); + + auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); + auto it = status.find("Threads"); + ASSERT_NE(it, status.end()); + int threads = -1; + EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) + << "Threads value " << it->second << " is not a number"; + // Don't make assumptions about the exact number of threads, as it may not be + // constant. + EXPECT_GE(threads, 1); + + memset(buf, 0, sizeof(buf)); + EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf) - 1), + SyscallSucceedsWithValue(Gt(0))); + + status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); + it = status.find("Threads"); + ASSERT_NE(it, status.end()); + threads = -1; + EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) + << "Threads value " << it->second << " is not a number"; + // There must be only the thread group leader remaining, zombied. + EXPECT_EQ(threads, 1); +} + +// Returns true if all characters in s are digits. +bool IsDigits(absl::string_view s) { + return std::all_of(s.begin(), s.end(), absl::ascii_isdigit); +} + +TEST(ProcPidStatTest, VmStats) { + std::string status_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); + ASSERT_FALSE(status_str.empty()); + auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); + + const auto vss_it = status.find("VmSize"); + ASSERT_NE(vss_it, status.end()); + + absl::string_view vss_str(vss_it->second); + + // Room for the " kB" suffix plus at least one digit. + ASSERT_GT(vss_str.length(), 3); + EXPECT_TRUE(absl::EndsWith(vss_str, " kB")); + // Everything else is part of a number. + EXPECT_TRUE(IsDigits(vss_str.substr(0, vss_str.length() - 3))) << vss_str; + // ... which is not 0. + EXPECT_NE('0', vss_str[0]); + + const auto rss_it = status.find("VmRSS"); + ASSERT_NE(rss_it, status.end()); + + absl::string_view rss_str(rss_it->second); + + // Room for the " kB" suffix plus at least one digit. + ASSERT_GT(rss_str.length(), 3); + EXPECT_TRUE(absl::EndsWith(rss_str, " kB")); + // Everything else is part of a number. + EXPECT_TRUE(IsDigits(rss_str.substr(0, rss_str.length() - 3))) << rss_str; + // ... which is not 0. + EXPECT_NE('0', rss_str[0]); + + const auto data_it = status.find("VmData"); + ASSERT_NE(data_it, status.end()); + + absl::string_view data_str(data_it->second); + + // Room for the " kB" suffix plus at least one digit. + ASSERT_GT(data_str.length(), 3); + EXPECT_TRUE(absl::EndsWith(data_str, " kB")); + // Everything else is part of a number. + EXPECT_TRUE(IsDigits(data_str.substr(0, data_str.length() - 3))) << data_str; + // ... which is not 0. + EXPECT_NE('0', data_str[0]); +} + +// Parse an array of NUL-terminated char* arrays, returning a vector of +// strings. +std::vector<std::string> ParseNulTerminatedStrings(std::string contents) { + EXPECT_EQ('\0', contents.back()); + // The split will leave an empty string if the NUL-byte remains, so pop + // it. + contents.pop_back(); + + return absl::StrSplit(contents, '\0'); +} + +TEST(ProcPidCmdline, MatchesArgv) { + std::vector<std::string> proc_cmdline = ParseNulTerminatedStrings( + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); + EXPECT_THAT(saved_argv, ContainerEq(proc_cmdline)); +} + +TEST(ProcPidEnviron, MatchesEnviron) { + std::vector<std::string> proc_environ = ParseNulTerminatedStrings( + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/environ"))); + // Get the environment from the environ variable, which we will compare with + // /proc/self/environ. + std::vector<std::string> env; + for (char** v = environ; *v; v++) { + env.push_back(*v); + } + EXPECT_THAT(env, ContainerEq(proc_environ)); +} + +TEST(ProcPidCmdline, SubprocessForkSameCmdline) { + std::vector<std::string> proc_cmdline_parent; + std::vector<std::string> proc_cmdline; + proc_cmdline_parent = ParseNulTerminatedStrings( + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); + auto res = WithSubprocess( + [&](int pid) -> PosixError { + ASSIGN_OR_RETURN_ERRNO( + auto raw_cmdline, + GetContents(absl::StrCat("/proc/", pid, "/cmdline"))); + proc_cmdline = ParseNulTerminatedStrings(raw_cmdline); + return NoError(); + }, + nullptr, nullptr); + ASSERT_NO_ERRNO(res); + + for (size_t i = 0; i < proc_cmdline_parent.size(); i++) { + EXPECT_EQ(proc_cmdline_parent[i], proc_cmdline[i]); + } +} + +// Test whether /proc/PID/ symlinks can be read for a running process. +TEST(ProcPidSymlink, SubprocessRunning) { + char buf[1]; + + EXPECT_THAT(ReadlinkWhileRunning("exe", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadlinkWhileRunning("ns/net", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadlinkWhileRunning("ns/pid", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadlinkWhileRunning("ns/user", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST(ProcPidSymlink, SubprocessZombied) { + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + char buf[1]; + + int want = EACCES; + if (!IsRunningOnGvisor()) { + auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); + if (version.major > 4 || (version.major == 4 && version.minor > 3)) { + want = ENOENT; + } + } + + EXPECT_THAT(ReadlinkWhileZombied("exe", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + + if (!IsRunningOnGvisor()) { + EXPECT_THAT(ReadlinkWhileZombied("ns/net", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + } + + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between linux on proc + // files. + // + // ~4.3: Syscall fails with EACCES. + // 4.17: Syscall succeeds and returns 1. + // + if (!IsRunningOnGvisor()) { + return; + } + + EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + + EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); +} + +// Test whether /proc/PID/ symlinks can be read for an exited process. +TEST(ProcPidSymlink, SubprocessExited) { + char buf[1]; + + EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadlinkWhileExited("ns/net", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadlinkWhileExited("ns/pid", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadlinkWhileExited("ns/user", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); +} + +// /proc/PID/exe points to the correct binary. +TEST(ProcPidExe, Subprocess) { + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); + auto expected_absolute_path = + ASSERT_NO_ERRNO_AND_VALUE(MakeAbsolute(link, "")); + + char actual[PATH_MAX + 1] = {}; + ASSERT_THAT(ReadlinkWhileRunning("exe", actual, sizeof(actual)), + SyscallSucceedsWithValue(Gt(0))); + EXPECT_EQ(actual, expected_absolute_path); +} + +// Test whether /proc/PID/ files can be read for a running process. +TEST(ProcPidFile, SubprocessRunning) { + char buf[1]; + + EXPECT_THAT(ReadWhileRunning("auxv", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("cmdline", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("comm", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("gid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("io", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("maps", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("stat", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("oom_score", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("oom_score_adj", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + +// Test whether /proc/PID/ files can be read for a zombie process. +TEST(ProcPidFile, SubprocessZombie) { + char buf[1]; + + // FIXME(gvisor.dev/issue/164): Loosen requirement due to inconsistent + // behavior on different kernels. + // + // ~4.3: Succeds and returns 0. + // 4.17: Succeeds and returns 1. + // gVisor: Succeeds and returns 0. + EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds()); + + EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); + + EXPECT_THAT(ReadWhileZombied("comm", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("gid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("maps", buf, sizeof(buf)), + SyscallSucceedsWithValue(0)); + + EXPECT_THAT(ReadWhileZombied("stat", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("oom_score", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("oom_score_adj", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux + // on proc files. + // + // ~4.3: Fails and returns EACCES. + // gVisor & 4.17: Succeeds and returns 1. + // + // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)), + // SyscallFailsWithErrno(EACCES)); +} + +// Test whether /proc/PID/ files can be read for an exited process. +TEST(ProcPidFile, SubprocessExited) { + char buf[1]; + + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels. + // + // ~4.3: Fails and returns ESRCH. + // gVisor: Fails with ESRCH. + // 4.17: Succeeds and returns 1. + // + // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)), + // SyscallFailsWithErrno(ESRCH)); + + EXPECT_THAT(ReadWhileExited("cmdline", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("comm", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("gid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("io", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Returns EOF on gVisor. + EXPECT_THAT(ReadWhileExited("maps", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("stat", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("status", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("oom_score", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("oom_score_adj", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); +} + +PosixError DirContainsImpl(absl::string_view path, + const std::vector<std::string>& targets, + bool strict) { + ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false)); + bool success = true; + + for (auto& expected_entry : targets) { + auto cursor = std::find(listing.begin(), listing.end(), expected_entry); + if (cursor == listing.end()) { + success = false; + } + } + + if (!success) { + return PosixError( + ENOENT, + absl::StrCat("Failed to find one or more paths in '", path, "'")); + } + + if (strict) { + if (targets.size() != listing.size()) { + return PosixError( + EINVAL, + absl::StrCat("Expected to find ", targets.size(), " elements in '", + path, "', but found ", listing.size())); + } + } + + return NoError(); +} + +PosixError DirContains(absl::string_view path, + const std::vector<std::string>& targets) { + return DirContainsImpl(path, targets, false); +} + +PosixError DirContainsExactly(absl::string_view path, + const std::vector<std::string>& targets) { + return DirContainsImpl(path, targets, true); +} + +PosixError EventuallyDirContainsExactly( + absl::string_view path, const std::vector<std::string>& targets) { + constexpr int kRetryCount = 100; + const absl::Duration kRetryDelay = absl::Milliseconds(100); + + for (int i = 0; i < kRetryCount; ++i) { + auto res = DirContainsExactly(path, targets); + if (res.ok()) { + return res; + } else if (i < kRetryCount - 1) { + // Sleep if this isn't the final iteration. + absl::SleepFor(kRetryDelay); + } + } + return PosixError(ETIMEDOUT, + "Timed out while waiting for directory to contain files "); +} + +TEST(ProcTask, Basic) { + EXPECT_NO_ERRNO( + DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())})); +} + +std::vector<std::string> TaskFiles( + const std::vector<std::string>& initial_contents, + const std::vector<pid_t>& pids) { + return VecCat<std::string>( + initial_contents, + ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); }, + pids)); +} + +std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) { + return TaskFiles({".", "..", absl::StrCat(getpid())}, pids); +} + +// Helper class for creating a new task in the current thread group. +class BlockingChild { + public: + BlockingChild() : thread_([=] { Start(); }) {} + ~BlockingChild() { Join(); } + + pid_t Tid() const { + absl::MutexLock ml(&mu_); + mu_.Await(absl::Condition(&tid_ready_)); + return tid_; + } + + void Join() { Stop(); } + + private: + void Start() { + absl::MutexLock ml(&mu_); + tid_ = syscall(__NR_gettid); + tid_ready_ = true; + mu_.Await(absl::Condition(&stop_)); + } + + void Stop() { + absl::MutexLock ml(&mu_); + stop_ = true; + } + + mutable absl::Mutex mu_; + bool stop_ ABSL_GUARDED_BY(mu_) = false; + pid_t tid_; + bool tid_ready_ ABSL_GUARDED_BY(mu_) = false; + + // Must be last to ensure that the destructor for the thread is run before + // any other member of the object is destroyed. + ScopedThread thread_; +}; + +TEST(ProcTask, NewThreadAppears) { + auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task", false)); + BlockingChild child1; + EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", + TaskFiles(initial, {child1.Tid()}))); +} + +TEST(ProcTask, KilledThreadsDisappear) { + auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task/", false)); + + BlockingChild child1; + EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", + TaskFiles(initial, {child1.Tid()}))); + + // Stat child1's task file. Regression test for b/32097707. + struct stat statbuf; + const std::string child1_task_file = + absl::StrCat("/proc/self/task/", child1.Tid()); + EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallSucceeds()); + + BlockingChild child2; + EXPECT_NO_ERRNO(DirContainsExactly( + "/proc/self/task", TaskFiles(initial, {child1.Tid(), child2.Tid()}))); + + BlockingChild child3; + BlockingChild child4; + BlockingChild child5; + EXPECT_NO_ERRNO(DirContainsExactly( + "/proc/self/task", + TaskFiles(initial, {child1.Tid(), child2.Tid(), child3.Tid(), + child4.Tid(), child5.Tid()}))); + + child2.Join(); + EXPECT_NO_ERRNO(EventuallyDirContainsExactly( + "/proc/self/task", TaskFiles(initial, {child1.Tid(), child3.Tid(), + child4.Tid(), child5.Tid()}))); + + child1.Join(); + child4.Join(); + EXPECT_NO_ERRNO(EventuallyDirContainsExactly( + "/proc/self/task", TaskFiles(initial, {child3.Tid(), child5.Tid()}))); + + // Stat child1's task file again. This time it should fail. See b/32097707. + EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), + SyscallFailsWithErrno(ENOENT)); + + child3.Join(); + child5.Join(); + EXPECT_NO_ERRNO(EventuallyDirContainsExactly("/proc/self/task", initial)); +} + +TEST(ProcTask, ChildTaskDir) { + BlockingChild child1; + EXPECT_NO_ERRNO(DirContains("/proc/self/task", TaskFiles({child1.Tid()}))); + EXPECT_NO_ERRNO(DirContains(absl::StrCat("/proc/", child1.Tid(), "/task"), + TaskFiles({child1.Tid()}))); +} + +PosixError VerifyPidDir(std::string path) { + return DirContains(path, {"exe", "fd", "io", "maps", "ns", "stat", "status"}); +} + +TEST(ProcTask, VerifyTaskDir) { + EXPECT_NO_ERRNO(VerifyPidDir("/proc/self")); + + EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", getpid()))); + BlockingChild child1; + EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", child1.Tid()))); + + // Only the first level of task directories should contain the 'task' + // directory. That is: + // + // /proc/1234/task <- should exist + // /proc/1234/task/1234/task <- should not exist + // /proc/1234/task/1235/task <- should not exist (where 1235 is in the same + // thread group as 1234). + EXPECT_FALSE( + DirContains(absl::StrCat("/proc/self/task/", getpid()), {"task"}).ok()) + << "Found 'task' directory in an inner directory."; +} + +TEST(ProcTask, TaskDirCannotBeDeleted) { + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + EXPECT_THAT(rmdir("/proc/self/task"), SyscallFails()); + EXPECT_THAT(rmdir(absl::StrCat("/proc/self/task/", getpid()).c_str()), + SyscallFailsWithErrno(EACCES)); +} + +TEST(ProcTask, TaskDirHasCorrectMetadata) { + struct stat st; + EXPECT_THAT(stat("/proc/self/task", &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISDIR(st.st_mode)); + + // Verify file is readable and executable by everyone. + mode_t expected_permissions = + S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + mode_t permissions = st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO); + EXPECT_EQ(expected_permissions, permissions); +} + +TEST(ProcTask, TaskDirCanSeekToEnd) { + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/task", O_RDONLY)); + EXPECT_THAT(lseek(dirfd.get(), 0, SEEK_END), SyscallSucceeds()); +} + +TEST(ProcTask, VerifyTaskDirNlinks) { + // A task directory will have 3 links if the taskgroup has a single + // thread. For example, the following shows where the links to + // '/proc/12345/task comes' from for a single threaded process with pid 12345: + // + // /proc/12345/task <-- 1 link for the directory itself + // . <-- link from "." + // .. + // 12345 + // . + // .. <-- link from ".." to parent. + // <other contents of a task dir> + // + // We can't assert an absolute number of links since we don't control how many + // threads the test framework spawns. Instead, we'll ensure creating a new + // thread increases the number of links as expected. + + // Once we reach the test body, we can count on the thread count being stable + // unless we spawn a new one. + uint64_t initial_links = ASSERT_NO_ERRNO_AND_VALUE(Links("/proc/self/task")); + ASSERT_GE(initial_links, 3); + + // For each new subtask, we should gain a new link. + BlockingChild child1; + EXPECT_THAT(Links("/proc/self/task"), + IsPosixErrorOkAndHolds(initial_links + 1)); + BlockingChild child2; + EXPECT_THAT(Links("/proc/self/task"), + IsPosixErrorOkAndHolds(initial_links + 2)); +} + +TEST(ProcTask, CommContainsThreadNameAndTrailingNewline) { + constexpr char kThreadName[] = "TestThread12345"; + ASSERT_THAT(prctl(PR_SET_NAME, kThreadName), SyscallSucceeds()); + + auto thread_name = ASSERT_NO_ERRNO_AND_VALUE( + GetContents(JoinPath("/proc", absl::StrCat(getpid()), "task", + absl::StrCat(syscall(SYS_gettid)), "comm"))); + EXPECT_EQ(absl::StrCat(kThreadName, "\n"), thread_name); +} + +TEST(ProcTaskNs, NsDirExistsAndHasCorrectMetadata) { + EXPECT_NO_ERRNO(DirContains("/proc/self/ns", {"net", "pid", "user"})); + + // Let's just test the 'pid' entry, all of them are very similar. + struct stat st; + EXPECT_THAT(lstat("/proc/self/ns/pid", &st), SyscallSucceeds()); + EXPECT_TRUE(S_ISLNK(st.st_mode)); + + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/ns/pid")); + EXPECT_THAT(link, ::testing::StartsWith("pid:[")); +} + +TEST(ProcTaskNs, AccessOnNsNodeSucceeds) { + EXPECT_THAT(access("/proc/self/ns/pid", F_OK), SyscallSucceeds()); +} + +TEST(ProcSysKernelHostname, Exists) { + EXPECT_THAT(open("/proc/sys/kernel/hostname", O_RDONLY), SyscallSucceeds()); +} + +TEST(ProcSysKernelHostname, MatchesUname) { + struct utsname buf; + EXPECT_THAT(uname(&buf), SyscallSucceeds()); + const std::string hostname = absl::StrCat(buf.nodename, "\n"); + auto procfs_hostname = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/hostname")); + EXPECT_EQ(procfs_hostname, hostname); +} + +TEST(ProcSysVmMmapMinAddr, HasNumericValue) { + const std::string mmap_min_addr_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/mmap_min_addr")); + uintptr_t mmap_min_addr; + EXPECT_TRUE(absl::SimpleAtoi(mmap_min_addr_str, &mmap_min_addr)) + << "/proc/sys/vm/mmap_min_addr does not contain a numeric value: " + << mmap_min_addr_str; +} + +TEST(ProcSysVmOvercommitMemory, HasNumericValue) { + const std::string overcommit_memory_str = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/overcommit_memory")); + uintptr_t overcommit_memory; + EXPECT_TRUE(absl::SimpleAtoi(overcommit_memory_str, &overcommit_memory)) + << "/proc/sys/vm/overcommit_memory does not contain a numeric value: " + << overcommit_memory; +} + +// Check that link for proc fd entries point the target node, not the +// symlink itself. Regression test for b/31155070. +TEST(ProcTaskFd, FstatatFollowsSymlink) { + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); + + struct stat sproc = {}; + EXPECT_THAT( + fstatat(-1, absl::StrCat("/proc/self/fd/", fd.get()).c_str(), &sproc, 0), + SyscallSucceeds()); + + struct stat sfile = {}; + EXPECT_THAT(fstatat(-1, file.path().c_str(), &sfile, 0), SyscallSucceeds()); + + // If fstatat follows the fd symlink, the device and inode numbers should + // match at a minimum. + EXPECT_EQ(sproc.st_dev, sfile.st_dev); + EXPECT_EQ(sproc.st_ino, sfile.st_ino); + EXPECT_EQ(0, memcmp(&sfile, &sproc, sizeof(sfile))); +} + +TEST(ProcFilesystems, Bug65172365) { + std::string proc_filesystems = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/filesystems")); + ASSERT_FALSE(proc_filesystems.empty()); +} + +TEST(ProcFilesystems, PresenceOfShmMaxMniAll) { + uint64_t shmmax = 0; + uint64_t shmall = 0; + uint64_t shmmni = 0; + std::string proc_file; + proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmax")); + ASSERT_FALSE(proc_file.empty()); + ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmax)); + proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmall")); + ASSERT_FALSE(proc_file.empty()); + ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmall)); + proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmni")); + ASSERT_FALSE(proc_file.empty()); + ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmni)); + + ASSERT_GT(shmmax, 0); + ASSERT_GT(shmall, 0); + ASSERT_GT(shmmni, 0); + ASSERT_LE(shmall, shmmax); + + // These values should never be higher than this by default, for more + // information see uapi/linux/shm.h + ASSERT_LE(shmmax, ULONG_MAX - (1UL << 24)); + ASSERT_LE(shmall, ULONG_MAX - (1UL << 24)); +} + +// Check that /proc/mounts is a symlink to self/mounts. +TEST(ProcMounts, IsSymlink) { + auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/mounts")); + EXPECT_EQ(link, "self/mounts"); +} + +TEST(ProcSelfMountinfo, RequiredFieldsArePresent) { + auto mountinfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mountinfo")); + EXPECT_THAT( + mountinfo, + AllOf( + // Root mount. + ContainsRegex( + R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ /\S* / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"), + // Proc mount - always rw. + ContainsRegex( + R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)"))); +} + +// Check that /proc/self/mounts looks something like a real mounts file. +TEST(ProcSelfMounts, RequiredFieldsArePresent) { + auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts")); + EXPECT_THAT(mounts, + AllOf( + // Root mount. + ContainsRegex(R"(\S+ / \S+ (rw|ro)\S* [0-9]+ [0-9]+\s)"), + // Root mount. + ContainsRegex(R"(\S+ /proc \S+ rw\S* [0-9]+ [0-9]+\s)"))); +} + +void CheckDuplicatesRecursively(std::string path) { + std::vector<std::string> child_dirs; + + // There is the known issue of the linux procfs, that two consequent calls of + // readdir can return the same entry twice if between these calls one or more + // entries have been removed from this directory. + int max_attempts = 5; + for (int i = 0; i < max_attempts; i++) { + child_dirs.clear(); + errno = 0; + bool success = true; + DIR* dir = opendir(path.c_str()); + if (dir == nullptr) { + // Ignore any directories we can't read or missing directories as the + // directory could have been deleted/mutated from the time the parent + // directory contents were read. + return; + } + auto dir_closer = Cleanup([&dir]() { closedir(dir); }); + std::unordered_set<std::string> children; + while (true) { + // Readdir(3): If the end of the directory stream is reached, NULL is + // returned and errno is not changed. If an error occurs, NULL is + // returned and errno is set appropriately. To distinguish end of stream + // and from an error, set errno to zero before calling readdir() and then + // check the value of errno if NULL is returned. + errno = 0; + struct dirent* dp = readdir(dir); + if (dp == nullptr) { + // Linux will return EINVAL when calling getdents on a /proc/tid/net + // file corresponding to a zombie task. + // See fs/proc/proc_net.c:proc_tgid_net_readdir(). + // + // We just ignore the directory in this case. + if (errno == EINVAL && absl::StartsWith(path, "/proc/") && + absl::EndsWith(path, "/net")) { + break; + } + + // Otherwise, no errors are allowed. + ASSERT_EQ(errno, 0) << path; + break; // We're done. + } + + const std::string name = dp->d_name; + + if (name == "." || name == "..") { + continue; + } + + // Ignore a duplicate entry if it isn't the last attempt. + if (i == max_attempts - 1) { + ASSERT_EQ(children.find(name), children.end()) + << absl::StrCat(path, "/", name); + } else if (children.find(name) != children.end()) { + std::cerr << "Duplicate entry: " << i << ":" + << absl::StrCat(path, "/", name) << std::endl; + success = false; + break; + } + children.insert(name); + + if (dp->d_type == DT_DIR) { + child_dirs.push_back(name); + } + } + if (success) { + break; + } + } + for (auto dname = child_dirs.begin(); dname != child_dirs.end(); dname++) { + CheckDuplicatesRecursively(absl::StrCat(path, "/", *dname)); + } +} + +TEST(Proc, NoDuplicates) { CheckDuplicatesRecursively("/proc"); } + +// Most /proc/PID files are owned by the task user with SUID_DUMP_USER. +TEST(ProcPid, UserDumpableOwner) { + int before; + ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); + auto cleanup = Cleanup([before] { + ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); + }); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds()); + + // This applies to the task directory itself and files inside. + struct stat st; + ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); + EXPECT_EQ(st.st_uid, geteuid()); + EXPECT_EQ(st.st_gid, getegid()); + + ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); + EXPECT_EQ(st.st_uid, geteuid()); + EXPECT_EQ(st.st_gid, getegid()); +} + +// /proc/PID files are owned by root with SUID_DUMP_DISABLE. +TEST(ProcPid, RootDumpableOwner) { + int before; + ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); + auto cleanup = Cleanup([before] { + ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); + }); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds()); + + // This *does not* applies to the task directory itself (or other 0555 + // directories), but does to files inside. + struct stat st; + ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); + EXPECT_EQ(st.st_uid, geteuid()); + EXPECT_EQ(st.st_gid, getegid()); + + // This file is owned by root. Also allow nobody in case this test is running + // in a userns without root mapped. + ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); + EXPECT_THAT(st.st_uid, AnyOf(Eq(0), Eq(65534))); + EXPECT_THAT(st.st_gid, AnyOf(Eq(0), Eq(65534))); +} + +TEST(Proc, GetdentsEnoent) { + FileDescriptor fd; + ASSERT_NO_ERRNO(WithSubprocess( + [&](int pid) -> PosixError { + // Running. + ASSIGN_OR_RETURN_ERRNO(fd, Open(absl::StrCat("/proc/", pid, "/task"), + O_RDONLY | O_DIRECTORY)); + + return NoError(); + }, + nullptr, nullptr)); + char buf[1024]; + ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)), + SyscallFailsWithErrno(ENOENT)); +} + +void CheckSyscwFromIOFile(const std::string& path, const std::string& regex) { + std::string output; + ASSERT_NO_ERRNO(GetContents(path, &output)); + ASSERT_THAT(output, ContainsRegex(absl::StrCat("syscw:\\s+", regex, "\n"))); +} + +// Checks that there is variable accounting of IO between threads/tasks. +TEST(Proc, PidTidIOAccounting) { + absl::Notification notification; + + // Run a thread with a bunch of writes. Check that io account records exactly + // the number of write calls. File open/close is there to prevent buffering. + ScopedThread writer([¬ification] { + const int num_writes = 100; + for (int i = 0; i < num_writes; i++) { + auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_NO_ERRNO(SetContents(path.path(), "a")); + } + notification.Notify(); + const std::string& writer_dir = + absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); + + CheckSyscwFromIOFile(writer_dir, std::to_string(num_writes)); + }); + + // Run a thread and do no writes. Check that no writes are recorded. + ScopedThread noop([¬ification] { + notification.WaitForNotification(); + const std::string& noop_dir = + absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); + + CheckSyscwFromIOFile(noop_dir, "0"); + }); + + writer.Join(); + noop.Join(); +} + +} // namespace +} // namespace testing +} // namespace gvisor + +int main(int argc, char** argv) { + for (int i = 0; i < argc; ++i) { + gvisor::testing::saved_argv.emplace_back(std::string(argv[i])); + } + + gvisor::testing::TestInit(&argc, &argv); + return gvisor::testing::RunAllTests(); +} |