// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include <elf.h> #include <errno.h> #include <fcntl.h> #include <limits.h> #include <linux/magic.h> #include <linux/sem.h> #include <sched.h> #include <signal.h> #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/mman.h> #include <sys/prctl.h> #include <sys/ptrace.h> #include <sys/stat.h> #include <sys/statfs.h> #include <sys/utsname.h> #include <syscall.h> #include <unistd.h> #include <algorithm> #include <atomic> #include <functional> #include <iostream> #include <map> #include <memory> #include <ostream> #include <regex> #include <string> #include <unordered_set> #include <utility> #include <vector> #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/container/node_hash_set.h" #include "absl/strings/ascii.h" #include "absl/strings/match.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" #include "absl/synchronization/notification.h" #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/util/capability_util.h" #include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" #include "test/util/memory_util.h" #include "test/util/posix_error.h" #include "test/util/proc_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" #include "test/util/time_util.h" #include "test/util/timer_util.h" // NOTE(magi): No, this isn't really a syscall but this is a really simple // way to get it tested on both gVisor, PTrace and Linux. using ::testing::AllOf; using ::testing::AnyOf; using ::testing::ContainerEq; using ::testing::Contains; using ::testing::ContainsRegex; using ::testing::Eq; using ::testing::Gt; using ::testing::HasSubstr; using ::testing::IsSupersetOf; using ::testing::Pair; using ::testing::UnorderedElementsAre; using ::testing::UnorderedElementsAreArray; // Exported by glibc. extern char** environ; namespace gvisor { namespace testing { namespace { #ifndef SUID_DUMP_DISABLE #define SUID_DUMP_DISABLE 0 #endif /* SUID_DUMP_DISABLE */ #ifndef SUID_DUMP_USER #define SUID_DUMP_USER 1 #endif /* SUID_DUMP_USER */ #ifndef SUID_DUMP_ROOT #define SUID_DUMP_ROOT 2 #endif /* SUID_DUMP_ROOT */ #if defined(__x86_64__) || defined(__i386__) // This list of "required" fields is taken from reading the file // arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally // printed by the kernel. static const char* required_fields[] = { "processor", "vendor_id", "cpu family", "model\t\t:", "model name", "stepping", "cpu MHz", "fpu\t\t:", "fpu_exception", "cpuid level", "wp", "bogomips", "clflush size", "cache_alignment", "address sizes", "power management", }; #elif __aarch64__ // This list of "required" fields is taken from reading the file // arch/arm64/kernel/cpuinfo.c and seeing which fields will be unconditionally // printed by the kernel. static const char* required_fields[] = { "processor", "BogoMIPS", "Features", "CPU implementer", "CPU architecture", "CPU variant", "CPU part", "CPU revision", }; #else #error "Unknown architecture" #endif // Takes the subprocess command line and pid. // If it returns !OK, WithSubprocess returns immediately. using SubprocessCallback = std::function<PosixError(int)>; std::vector<std::string> saved_argv; // NOLINT // Helper function to dump /proc/{pid}/status and check the // state data. State should = "Z" for zombied or "RSD" for // running, interruptible sleeping (S), or uninterruptible sleep // (D). void CompareProcessState(absl::string_view state, int pid) { auto status_file = ASSERT_NO_ERRNO_AND_VALUE( GetContents(absl::StrCat("/proc/", pid, "/status"))); // N.B. POSIX extended regexes don't support shorthand character classes (\w) // inside of brackets. EXPECT_THAT(status_file, ContainsRegex(absl::StrCat("State:.[", state, R"EOL(]\s+\([a-zA-Z ]+\))EOL"))); } // Run callbacks while a subprocess is running, zombied, and/or exited. PosixError WithSubprocess(SubprocessCallback const& running, SubprocessCallback const& zombied, SubprocessCallback const& exited) { int pipe_fds[2] = {}; if (pipe(pipe_fds) < 0) { return PosixError(errno, "pipe"); } int child_pid = fork(); if (child_pid < 0) { return PosixError(errno, "fork"); } if (child_pid == 0) { close(pipe_fds[0]); // Close the read end. const DisableSave ds; // Timing issues. // Write to the pipe to tell it we're ready. char buf = 'a'; int res = 0; res = WriteFd(pipe_fds[1], &buf, sizeof(buf)); TEST_CHECK_MSG(res == sizeof(buf), "Write failure in subprocess"); while (true) { SleepSafe(absl::Milliseconds(100)); } } close(pipe_fds[1]); // Close the write end. int status = 0; auto wait_cleanup = Cleanup([child_pid, &status] { EXPECT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); }); auto kill_cleanup = Cleanup([child_pid] { EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); }); // Wait for the child. char buf = 0; int res = ReadFd(pipe_fds[0], &buf, sizeof(buf)); if (res < 0) { return PosixError(errno, "Read from pipe"); } else if (res == 0) { return PosixError(EPIPE, "Unable to read from pipe: EOF"); } if (running) { // The first arg, RSD, refers to a "running process", or a process with a // state of Running (R), Interruptable Sleep (S) or Uninterruptable // Sleep (D). CompareProcessState("RSD", child_pid); RETURN_IF_ERRNO(running(child_pid)); } // Kill the process. kill_cleanup.Release()(); siginfo_t info; // Wait until the child process has exited (WEXITED flag) but don't // reap the child (WNOWAIT flag). EXPECT_THAT(waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED), SyscallSucceeds()); if (zombied) { // Arg of "Z" refers to a Zombied Process. CompareProcessState("Z", child_pid); RETURN_IF_ERRNO(zombied(child_pid)); } // Wait on the process. wait_cleanup.Release()(); // If the process is reaped, then then this should return // with ECHILD. EXPECT_THAT(waitpid(child_pid, &status, WNOHANG), SyscallFailsWithErrno(ECHILD)); if (exited) { RETURN_IF_ERRNO(exited(child_pid)); } return NoError(); } // Access the file returned by name when a subprocess is running. PosixError AccessWhileRunning(std::function<std::string(int pid)> name, int flags, std::function<void(int fd)> access) { FileDescriptor fd; return WithSubprocess( [&](int pid) -> PosixError { // Running. ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); access(fd.get()); return NoError(); }, nullptr, nullptr); } // Access the file returned by name when the a subprocess is zombied. PosixError AccessWhileZombied(std::function<std::string(int pid)> name, int flags, std::function<void(int fd)> access) { FileDescriptor fd; return WithSubprocess( [&](int pid) -> PosixError { // Running. ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); return NoError(); }, [&](int pid) -> PosixError { // Zombied. access(fd.get()); return NoError(); }, nullptr); } // Access the file returned by name when the a subprocess is exited. PosixError AccessWhileExited(std::function<std::string(int pid)> name, int flags, std::function<void(int fd)> access) { FileDescriptor fd; return WithSubprocess( [&](int pid) -> PosixError { // Running. ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags)); return NoError(); }, nullptr, [&](int pid) -> PosixError { // Exited. access(fd.get()); return NoError(); }); } // ReadFd(fd=/proc/PID/basename) while PID is running. int ReadWhileRunning(std::string const& basename, void* buf, size_t count) { int ret = 0; int err = 0; EXPECT_NO_ERRNO(AccessWhileRunning( [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/", basename); }, O_RDONLY, [&](int fd) { ret = ReadFd(fd, buf, count); err = errno; })); errno = err; return ret; } // ReadFd(fd=/proc/PID/basename) while PID is zombied. int ReadWhileZombied(std::string const& basename, void* buf, size_t count) { int ret = 0; int err = 0; EXPECT_NO_ERRNO(AccessWhileZombied( [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/", basename); }, O_RDONLY, [&](int fd) { ret = ReadFd(fd, buf, count); err = errno; })); errno = err; return ret; } // ReadFd(fd=/proc/PID/basename) while PID is exited. int ReadWhileExited(std::string const& basename, void* buf, size_t count) { int ret = 0; int err = 0; EXPECT_NO_ERRNO(AccessWhileExited( [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/", basename); }, O_RDONLY, [&](int fd) { ret = ReadFd(fd, buf, count); err = errno; })); errno = err; return ret; } // readlinkat(fd=/proc/PID/, basename) while PID is running. int ReadlinkWhileRunning(std::string const& basename, char* buf, size_t count) { int ret = 0; int err = 0; EXPECT_NO_ERRNO(AccessWhileRunning( [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, O_DIRECTORY, [&](int fd) { ret = readlinkat(fd, basename.c_str(), buf, count); err = errno; })); errno = err; return ret; } // readlinkat(fd=/proc/PID/, basename) while PID is zombied. int ReadlinkWhileZombied(std::string const& basename, char* buf, size_t count) { int ret = 0; int err = 0; EXPECT_NO_ERRNO(AccessWhileZombied( [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, O_DIRECTORY, [&](int fd) { ret = readlinkat(fd, basename.c_str(), buf, count); err = errno; })); errno = err; return ret; } // readlinkat(fd=/proc/PID/, basename) while PID is exited. int ReadlinkWhileExited(std::string const& basename, char* buf, size_t count) { int ret = 0; int err = 0; EXPECT_NO_ERRNO(AccessWhileExited( [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); }, O_DIRECTORY, [&](int fd) { ret = readlinkat(fd, basename.c_str(), buf, count); err = errno; })); errno = err; return ret; } TEST(ProcTest, NotFoundInRoot) { struct stat s; EXPECT_THAT(stat("/proc/foobar", &s), SyscallFailsWithErrno(ENOENT)); } TEST(ProcSelfTest, IsThreadGroupLeader) { ScopedThread([] { const pid_t tgid = getpid(); const pid_t tid = syscall(SYS_gettid); EXPECT_NE(tgid, tid); auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self")); EXPECT_EQ(link, absl::StrCat(tgid)); }); } TEST(ProcThreadSelfTest, Basic) { const pid_t tgid = getpid(); const pid_t tid = syscall(SYS_gettid); EXPECT_EQ(tgid, tid); auto link_threadself = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); // Just read one file inside thread-self to ensure that the link is valid. auto link_threadself_exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); auto link_procself_exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); EXPECT_EQ(link_threadself_exe, link_procself_exe); } TEST(ProcThreadSelfTest, Thread) { ScopedThread([] { const pid_t tgid = getpid(); const pid_t tid = syscall(SYS_gettid); EXPECT_NE(tgid, tid); auto link_threadself = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self")); EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid)); // Just read one file inside thread-self to ensure that the link is valid. auto link_threadself_exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe")); auto link_procself_exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); EXPECT_EQ(link_threadself_exe, link_procself_exe); // A thread should not have "/proc/<tid>/task". struct stat s; EXPECT_THAT(stat("/proc/thread-self/task", &s), SyscallFailsWithErrno(ENOENT)); }); } // Returns the /proc/PID/maps entry for the MAP_PRIVATE | MAP_ANONYMOUS mapping // m with start address addr and length len. std::string AnonymousMapsEntry(uintptr_t addr, size_t len, int prot) { return absl::StrCat(absl::Hex(addr, absl::PadSpec::kZeroPad8), "-", absl::Hex(addr + len, absl::PadSpec::kZeroPad8), " ", prot & PROT_READ ? "r" : "-", prot & PROT_WRITE ? "w" : "-", prot & PROT_EXEC ? "x" : "-", "p 00000000 00:00 0 "); } std::string AnonymousMapsEntryForMapping(const Mapping& m, int prot) { return AnonymousMapsEntry(m.addr(), m.len(), prot); } PosixErrorOr<std::map<uint64_t, uint64_t>> ReadProcSelfAuxv() { std::string auxv_file; RETURN_IF_ERRNO(GetContents("/proc/self/auxv", &auxv_file)); const Elf64_auxv_t* auxv_data = reinterpret_cast<const Elf64_auxv_t*>(auxv_file.data()); std::map<uint64_t, uint64_t> auxv_entries; for (int i = 0; auxv_data[i].a_type != AT_NULL; i++) { auto a_type = auxv_data[i].a_type; EXPECT_EQ(0, auxv_entries.count(a_type)) << "a_type: " << a_type; auxv_entries.emplace(a_type, auxv_data[i].a_un.a_val); } return auxv_entries; } TEST(ProcSelfAuxv, EntryPresence) { auto auxv_entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); EXPECT_EQ(auxv_entries.count(AT_ENTRY), 1); EXPECT_EQ(auxv_entries.count(AT_PHDR), 1); EXPECT_EQ(auxv_entries.count(AT_PHENT), 1); EXPECT_EQ(auxv_entries.count(AT_PHNUM), 1); EXPECT_EQ(auxv_entries.count(AT_BASE), 1); EXPECT_EQ(auxv_entries.count(AT_UID), 1); EXPECT_EQ(auxv_entries.count(AT_EUID), 1); EXPECT_EQ(auxv_entries.count(AT_GID), 1); EXPECT_EQ(auxv_entries.count(AT_EGID), 1); EXPECT_EQ(auxv_entries.count(AT_SECURE), 1); EXPECT_EQ(auxv_entries.count(AT_CLKTCK), 1); EXPECT_EQ(auxv_entries.count(AT_RANDOM), 1); EXPECT_EQ(auxv_entries.count(AT_EXECFN), 1); EXPECT_EQ(auxv_entries.count(AT_PAGESZ), 1); EXPECT_EQ(auxv_entries.count(AT_SYSINFO_EHDR), 1); } TEST(ProcSelfAuxv, EntryValues) { auto proc_auxv = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv()); // We need to find the ELF auxiliary vector. The section of memory pointed to // by envp contains some pointers to non-null pointers, followed by a single // pointer to a null pointer, followed by the auxiliary vector. char** envpi = environ; while (*envpi) { ++envpi; } const Elf64_auxv_t* envp_auxv = reinterpret_cast<const Elf64_auxv_t*>(envpi + 1); int i; for (i = 0; envp_auxv[i].a_type != AT_NULL; i++) { auto a_type = envp_auxv[i].a_type; EXPECT_EQ(proc_auxv.count(a_type), 1); EXPECT_EQ(proc_auxv[a_type], envp_auxv[i].a_un.a_val) << "a_type: " << a_type; } EXPECT_EQ(i, proc_auxv.size()); } // Just open and read a part of /proc/self/mem, check that we can read an item. TEST(ProcPidMem, Read) { auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY)); char input[] = "hello-world"; char output[sizeof(input)]; ASSERT_THAT(pread(memfd.get(), output, sizeof(output), reinterpret_cast<off_t>(input)), SyscallSucceedsWithValue(sizeof(input))); ASSERT_STREQ(input, output); } // Perform read on an unmapped region. TEST(ProcPidMem, Unmapped) { // Strategy: map then unmap, so we have a guaranteed unmapped region auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY)); Mapping mapping = ASSERT_NO_ERRNO_AND_VALUE( MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); // Fill it with things memset(mapping.ptr(), 'x', mapping.len()); char expected = 'x', output; ASSERT_THAT(pread(memfd.get(), &output, sizeof(output), reinterpret_cast<off_t>(mapping.ptr())), SyscallSucceedsWithValue(sizeof(output))); ASSERT_EQ(expected, output); // Unmap region again ASSERT_THAT(munmap(mapping.ptr(), mapping.len()), SyscallSucceeds()); // Now we want EIO error ASSERT_THAT(pread(memfd.get(), &output, sizeof(output), reinterpret_cast<off_t>(mapping.ptr())), SyscallFailsWithErrno(EIO)); } // Perform read repeatedly to verify offset change. TEST(ProcPidMem, RepeatedRead) { auto const num_reads = 3; char expected[] = "01234567890abcdefghijkl"; char output[sizeof(expected) / num_reads]; auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY)); ASSERT_THAT(lseek(memfd.get(), reinterpret_cast<off_t>(&expected), SEEK_SET), SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected))); for (auto i = 0; i < num_reads; i++) { ASSERT_THAT(read(memfd.get(), &output, sizeof(output)), SyscallSucceedsWithValue(sizeof(output))); ASSERT_EQ(strncmp(&expected[i * sizeof(output)], output, sizeof(output)), 0); } } // Perform seek operations repeatedly. TEST(ProcPidMem, RepeatedSeek) { auto const num_reads = 3; char expected[] = "01234567890abcdefghijkl"; char output[sizeof(expected) / num_reads]; auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY)); ASSERT_THAT(lseek(memfd.get(), reinterpret_cast<off_t>(&expected), SEEK_SET), SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected))); // Read from start ASSERT_THAT(read(memfd.get(), &output, sizeof(output)), SyscallSucceedsWithValue(sizeof(output))); ASSERT_EQ(strncmp(&expected[0 * sizeof(output)], output, sizeof(output)), 0); // Skip ahead one read ASSERT_THAT(lseek(memfd.get(), sizeof(output), SEEK_CUR), SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected) + sizeof(output) * 2)); // Do read again ASSERT_THAT(read(memfd.get(), &output, sizeof(output)), SyscallSucceedsWithValue(sizeof(output))); ASSERT_EQ(strncmp(&expected[2 * sizeof(output)], output, sizeof(output)), 0); // Skip back three reads ASSERT_THAT(lseek(memfd.get(), -3 * sizeof(output), SEEK_CUR), SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected))); // Do read again ASSERT_THAT(read(memfd.get(), &output, sizeof(output)), SyscallSucceedsWithValue(sizeof(output))); ASSERT_EQ(strncmp(&expected[0 * sizeof(output)], output, sizeof(output)), 0); // Check that SEEK_END does not work ASSERT_THAT(lseek(memfd.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL)); } // Perform read past an allocated memory region. TEST(ProcPidMem, PartialRead) { // Strategy: map large region, then do unmap and remap smaller region auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY)); Mapping mapping = ASSERT_NO_ERRNO_AND_VALUE( MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); ASSERT_THAT(munmap(mapping.ptr(), mapping.len()), SyscallSucceeds()); Mapping smaller_mapping = ASSERT_NO_ERRNO_AND_VALUE( Mmap(mapping.ptr(), kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); // Fill it with things memset(smaller_mapping.ptr(), 'x', smaller_mapping.len()); // Now we want no error char expected[] = {'x'}; std::unique_ptr<char[]> output(new char[kPageSize]); off_t read_offset = reinterpret_cast<off_t>(smaller_mapping.ptr()) + kPageSize - 1; ASSERT_THAT( pread(memfd.get(), output.get(), sizeof(output.get()), read_offset), SyscallSucceedsWithValue(sizeof(expected))); // Since output is larger, than expected we have to do manual compare ASSERT_EQ(expected[0], (output).get()[0]); } // Perform read on /proc/[pid]/mem after exit. TEST(ProcPidMem, AfterExit) { int pfd1[2] = {}; int pfd2[2] = {}; char expected[] = "hello-world"; ASSERT_THAT(pipe(pfd1), SyscallSucceeds()); ASSERT_THAT(pipe(pfd2), SyscallSucceeds()); // Create child process pid_t const child_pid = fork(); if (child_pid == 0) { // Close reading end of first pipe close(pfd1[0]); // Tell parent about location of input char ok = 1; TEST_CHECK(WriteFd(pfd1[1], &ok, sizeof(ok)) == sizeof(ok)); TEST_PCHECK(close(pfd1[1]) == 0); // Close writing end of second pipe TEST_PCHECK(close(pfd2[1]) == 0); // Await parent OK to die ok = 0; TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok)); // Close rest pipes TEST_PCHECK(close(pfd2[0]) == 0); _exit(0); } // In parent process. ASSERT_THAT(child_pid, SyscallSucceeds()); // Close writing end of first pipe EXPECT_THAT(close(pfd1[1]), SyscallSucceeds()); // Wait for child to be alive and well char ok = 0; EXPECT_THAT(ReadFd(pfd1[0], &ok, sizeof(ok)), SyscallSucceedsWithValue(sizeof(ok))); // Close reading end of first pipe EXPECT_THAT(close(pfd1[0]), SyscallSucceeds()); // Open /proc/pid/mem fd std::string mempath = absl::StrCat("/proc/", child_pid, "/mem"); auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY)); // Expect that we can read char output[sizeof(expected)]; EXPECT_THAT(pread(memfd.get(), &output, sizeof(output), reinterpret_cast<off_t>(&expected)), SyscallSucceedsWithValue(sizeof(output))); EXPECT_STREQ(expected, output); // Tell proc its ok to go EXPECT_THAT(close(pfd2[0]), SyscallSucceeds()); ok = 1; EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)), SyscallSucceedsWithValue(sizeof(ok))); EXPECT_THAT(close(pfd2[1]), SyscallSucceeds()); // Expect termination int status; ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); // Expect that we can't read anymore EXPECT_THAT(pread(memfd.get(), &output, sizeof(output), reinterpret_cast<off_t>(&expected)), SyscallSucceedsWithValue(0)); } // Read from /proc/[pid]/mem with different UID/GID and attached state. TEST(ProcPidMem, DifferentUserAttached) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_DAC_OVERRIDE))); SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_PTRACE))); int pfd1[2] = {}; int pfd2[2] = {}; ASSERT_THAT(pipe(pfd1), SyscallSucceeds()); ASSERT_THAT(pipe(pfd2), SyscallSucceeds()); // Create child process pid_t const child_pid = fork(); if (child_pid == 0) { // Close reading end of first pipe close(pfd1[0]); // Tell parent about location of input char input[] = "hello-world"; off_t input_location = reinterpret_cast<off_t>(input); TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) == sizeof(input_location)); TEST_PCHECK(close(pfd1[1]) == 0); // Close writing end of second pipe TEST_PCHECK(close(pfd2[1]) == 0); // Await parent OK to die char ok = 0; TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok)); // Close rest pipes TEST_PCHECK(close(pfd2[0]) == 0); _exit(0); } // In parent process. ASSERT_THAT(child_pid, SyscallSucceeds()); // Close writing end of first pipe EXPECT_THAT(close(pfd1[1]), SyscallSucceeds()); // Read target location from child off_t target_location; EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)), SyscallSucceedsWithValue(sizeof(target_location))); // Close reading end of first pipe EXPECT_THAT(close(pfd1[0]), SyscallSucceeds()); ScopedThread([&] { // Attach to child subprocess without stopping it EXPECT_THAT(ptrace(PTRACE_SEIZE, child_pid, NULL, NULL), SyscallSucceeds()); // Keep capabilities after setuid EXPECT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds()); constexpr int kNobody = 65534; EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds()); // Only restore CAP_SYS_PTRACE and CAP_DAC_OVERRIDE EXPECT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, true)); EXPECT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, true)); // Open /proc/pid/mem fd std::string mempath = absl::StrCat("/proc/", child_pid, "/mem"); auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY)); char expected[] = "hello-world"; char output[sizeof(expected)]; EXPECT_THAT(pread(memfd.get(), output, sizeof(output), reinterpret_cast<off_t>(target_location)), SyscallSucceedsWithValue(sizeof(output))); EXPECT_STREQ(expected, output); // Tell proc its ok to go EXPECT_THAT(close(pfd2[0]), SyscallSucceeds()); char ok = 1; EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)), SyscallSucceedsWithValue(sizeof(ok))); EXPECT_THAT(close(pfd2[1]), SyscallSucceeds()); // Expect termination int status; ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << " status " << status; }); } // Attempt to read from /proc/[pid]/mem with different UID/GID. TEST(ProcPidMem, DifferentUser) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); int pfd1[2] = {}; int pfd2[2] = {}; ASSERT_THAT(pipe(pfd1), SyscallSucceeds()); ASSERT_THAT(pipe(pfd2), SyscallSucceeds()); // Create child process pid_t const child_pid = fork(); if (child_pid == 0) { // Close reading end of first pipe close(pfd1[0]); // Tell parent about location of input char input[] = "hello-world"; off_t input_location = reinterpret_cast<off_t>(input); TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) == sizeof(input_location)); TEST_PCHECK(close(pfd1[1]) == 0); // Close writing end of second pipe TEST_PCHECK(close(pfd2[1]) == 0); // Await parent OK to die char ok = 0; TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok)); // Close rest pipes TEST_PCHECK(close(pfd2[0]) == 0); _exit(0); } // In parent process. ASSERT_THAT(child_pid, SyscallSucceeds()); // Close writing end of first pipe EXPECT_THAT(close(pfd1[1]), SyscallSucceeds()); // Read target location from child off_t target_location; EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)), SyscallSucceedsWithValue(sizeof(target_location))); // Close reading end of first pipe EXPECT_THAT(close(pfd1[0]), SyscallSucceeds()); ScopedThread([&] { constexpr int kNobody = 65534; EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds()); // Attempt to open /proc/[child_pid]/mem std::string mempath = absl::StrCat("/proc/", child_pid, "/mem"); EXPECT_THAT(open(mempath.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES)); // Tell proc its ok to go EXPECT_THAT(close(pfd2[0]), SyscallSucceeds()); char ok = 1; EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)), SyscallSucceedsWithValue(sizeof(ok))); EXPECT_THAT(close(pfd2[1]), SyscallSucceeds()); // Expect termination int status; ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); }); } // Perform read on /proc/[pid]/mem with same UID/GID. TEST(ProcPidMem, SameUser) { int pfd1[2] = {}; int pfd2[2] = {}; ASSERT_THAT(pipe(pfd1), SyscallSucceeds()); ASSERT_THAT(pipe(pfd2), SyscallSucceeds()); // Create child process pid_t const child_pid = fork(); if (child_pid == 0) { // Close reading end of first pipe close(pfd1[0]); // Tell parent about location of input char input[] = "hello-world"; off_t input_location = reinterpret_cast<off_t>(input); TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) == sizeof(input_location)); TEST_PCHECK(close(pfd1[1]) == 0); // Close writing end of second pipe TEST_PCHECK(close(pfd2[1]) == 0); // Await parent OK to die char ok = 0; TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok)); // Close rest pipes TEST_PCHECK(close(pfd2[0]) == 0); _exit(0); } // In parent process. ASSERT_THAT(child_pid, SyscallSucceeds()); // Close writing end of first pipe EXPECT_THAT(close(pfd1[1]), SyscallSucceeds()); // Read target location from child off_t target_location; EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)), SyscallSucceedsWithValue(sizeof(target_location))); // Close reading end of first pipe EXPECT_THAT(close(pfd1[0]), SyscallSucceeds()); // Open /proc/pid/mem fd std::string mempath = absl::StrCat("/proc/", child_pid, "/mem"); auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY)); char expected[] = "hello-world"; char output[sizeof(expected)]; EXPECT_THAT(pread(memfd.get(), output, sizeof(output), reinterpret_cast<off_t>(target_location)), SyscallSucceedsWithValue(sizeof(output))); EXPECT_STREQ(expected, output); // Tell proc its ok to go EXPECT_THAT(close(pfd2[0]), SyscallSucceeds()); char ok = 1; EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)), SyscallSucceedsWithValue(sizeof(ok))); EXPECT_THAT(close(pfd2[1]), SyscallSucceeds()); // Expect termination int status; ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); } // Just open and read /proc/self/maps, check that we can find [stack] TEST(ProcSelfMaps, Basic) { auto proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); std::vector<std::string> stacks; // Make sure there's a stack in there. for (const auto& str : strings) { if (str.find("[stack]") != std::string::npos) { stacks.push_back(str); } } ASSERT_EQ(1, stacks.size()) << "[stack] not found in: " << proc_self_maps; // Linux pads to 73 characters then we add 7. EXPECT_EQ(80, stacks[0].length()); } TEST(ProcSelfMaps, Map1) { Mapping mapping = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_READ, MAP_PRIVATE)); auto proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); std::vector<std::string> addrs; // Make sure if is listed. for (const auto& str : strings) { if (str == AnonymousMapsEntryForMapping(mapping, PROT_READ)) { addrs.push_back(str); } } ASSERT_EQ(1, addrs.size()); } TEST(ProcSelfMaps, Map2) { // NOTE(magi): The permissions must be different or the pages will get merged. Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); Mapping map2 = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); auto proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); std::vector<std::string> addrs; // Make sure if is listed. for (const auto& str : strings) { if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { addrs.push_back(str); } } ASSERT_EQ(1, addrs.size()); addrs.clear(); for (const auto& str : strings) { if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { addrs.push_back(str); } } ASSERT_EQ(1, addrs.size()); } TEST(ProcSelfMaps, MapUnmap) { Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE( MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE)); Mapping map2 = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE)); auto proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); std::vector<std::string> addrs; // Make sure if is listed. for (const auto& str : strings) { if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { addrs.push_back(str); } } ASSERT_EQ(1, addrs.size()) << proc_self_maps; addrs.clear(); for (const auto& str : strings) { if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { addrs.push_back(str); } } ASSERT_EQ(1, addrs.size()); map2.reset(); // Read it again. proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); strings = absl::StrSplit(proc_self_maps, '\n'); // First entry should be there. addrs.clear(); for (const auto& str : strings) { if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) { addrs.push_back(str); } } ASSERT_EQ(1, addrs.size()); addrs.clear(); // But not the second. for (const auto& str : strings) { if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) { addrs.push_back(str); } } ASSERT_EQ(0, addrs.size()); } TEST(ProcSelfMaps, Mprotect) { // FIXME(jamieliu): Linux's mprotect() sometimes fails to merge VMAs in this // case. SKIP_IF(!IsRunningOnGvisor()); // Reserve 5 pages of address space. Mapping m = ASSERT_NO_ERRNO_AND_VALUE( MmapAnon(5 * kPageSize, PROT_NONE, MAP_PRIVATE)); // Change the permissions on the middle 3 pages. (The first and last pages may // be merged with other vmas on either side, so they aren't tested directly; // they just ensure that the middle 3 pages are bracketed by VMAs with // incompatible permissions.) ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + kPageSize), 3 * kPageSize, PROT_READ), SyscallSucceeds()); // Check that the middle 3 pages make up a single VMA. auto proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n'); EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, 3 * kPageSize, PROT_READ))); // Change the permissions on the middle page only. ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), kPageSize, PROT_READ | PROT_WRITE), SyscallSucceeds()); // Check that the single VMA has been split into 3 VMAs. proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); strings = absl::StrSplit(proc_self_maps, '\n'); EXPECT_THAT( strings, IsSupersetOf( {AnonymousMapsEntry(m.addr() + kPageSize, kPageSize, PROT_READ), AnonymousMapsEntry(m.addr() + 2 * kPageSize, kPageSize, PROT_READ | PROT_WRITE), AnonymousMapsEntry(m.addr() + 3 * kPageSize, kPageSize, PROT_READ)})); // Change the permissions on the middle page back. ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), kPageSize, PROT_READ), SyscallSucceeds()); // Check that the 3 VMAs have been merged back into a single VMA. proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); strings = absl::StrSplit(proc_self_maps, '\n'); EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize, 3 * kPageSize, PROT_READ))); } TEST(ProcSelfMaps, SharedAnon) { const Mapping m = ASSERT_NO_ERRNO_AND_VALUE( MmapAnon(kPageSize, PROT_READ, MAP_SHARED | MAP_ANONYMOUS)); const auto proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps")); for (const auto& line : absl::StrSplit(proc_self_maps, '\n')) { const auto entry = ASSERT_NO_ERRNO_AND_VALUE(ParseProcMapsLine(line)); if (entry.start <= m.addr() && m.addr() < entry.end) { // cf. proc(5), "/proc/[pid]/map_files/" EXPECT_EQ(entry.filename, "/dev/zero (deleted)"); return; } } FAIL() << "no maps entry containing mapping at " << m.ptr(); } TEST(ProcSelfFd, OpenFd) { int pipe_fds[2]; ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds()); // Reopen the write end. const std::string path = absl::StrCat("/proc/self/fd/", pipe_fds[1]); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_WRONLY)); // Ensure that a read/write works. const std::string data = "hello"; std::unique_ptr<char[]> buffer(new char[data.size()]); EXPECT_THAT(write(fd.get(), data.c_str(), data.size()), SyscallSucceedsWithValue(5)); EXPECT_THAT(read(pipe_fds[0], buffer.get(), data.size()), SyscallSucceedsWithValue(5)); EXPECT_EQ(strncmp(buffer.get(), data.c_str(), data.size()), 0); // Cleanup. ASSERT_THAT(close(pipe_fds[0]), SyscallSucceeds()); ASSERT_THAT(close(pipe_fds[1]), SyscallSucceeds()); } static void CheckFdDirGetdentsDuplicates(const std::string& path) { const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.c_str(), O_RDONLY | O_DIRECTORY)); // Open a FD whose value is supposed to be much larger than // the number of FDs opened by current process. auto newfd = fcntl(fd.get(), F_DUPFD, 1024); EXPECT_GE(newfd, 1024); auto fd_closer = Cleanup([newfd]() { close(newfd); }); auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir(path.c_str(), false)); absl::node_hash_set<std::string> fd_files_dedup(fd_files.begin(), fd_files.end()); EXPECT_EQ(fd_files.size(), fd_files_dedup.size()); } // This is a regression test for gvisor.dev/issues/3894 TEST(ProcSelfFd, GetdentsDuplicates) { CheckFdDirGetdentsDuplicates("/proc/self/fd"); } // This is a regression test for gvisor.dev/issues/3894 TEST(ProcSelfFdInfo, GetdentsDuplicates) { CheckFdDirGetdentsDuplicates("/proc/self/fdinfo"); } TEST(ProcSelfFdInfo, CorrectFds) { // Make sure there is at least one open file. auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY)); // Get files in /proc/self/fd. auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fd", false)); // Get files in /proc/self/fdinfo. auto fdinfo_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fdinfo", false)); // They should contain the same fds. EXPECT_THAT(fd_files, UnorderedElementsAreArray(fdinfo_files)); // Both should contain fd. auto fd_s = absl::StrCat(fd.get()); EXPECT_THAT(fd_files, Contains(fd_s)); } TEST(ProcSelfFdInfo, Flags) { std::string path = NewTempAbsPath(); // Create file here with O_CREAT to test that O_CREAT does not appear in // fdinfo flags. int flags = O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC; const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, flags, 0644)); // Automatically delete path. TempPath temp_path(path); // O_CREAT does not appear in fdinfo flags. flags &= ~O_CREAT; // O_LARGEFILE always appears (on x86_64). flags |= kOLargeFile; auto fd_info = ASSERT_NO_ERRNO_AND_VALUE( GetContents(absl::StrCat("/proc/self/fdinfo/", fd.get()))); EXPECT_THAT(fd_info, HasSubstr(absl::StrFormat("flags:\t%#o", flags))); } TEST(ProcSelfExe, Absolute) { auto exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); EXPECT_EQ(exe[0], '/'); } TEST(ProcSelfCwd, Absolute) { auto exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/cwd")); EXPECT_EQ(exe[0], '/'); } // Sanity check for /proc/cpuinfo fields that must be present. TEST(ProcCpuinfo, RequiredFieldsArePresent) { std::string proc_cpuinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo")); ASSERT_FALSE(proc_cpuinfo.empty()); std::vector<std::string> cpuinfo_fields = absl::StrSplit(proc_cpuinfo, '\n'); // Check that the usual fields are there. We don't really care about the // contents. for (const std::string& field : required_fields) { EXPECT_THAT(proc_cpuinfo, HasSubstr(field)); } } TEST(ProcCpuinfo, DeniesWriteNonRoot) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); // Do setuid in a separate thread so that after finishing this test, the // process can still open files the test harness created before starting this // test. Otherwise, the files are created by root (UID before the test), but // cannot be opened by the `uid` set below after the test. After calling // setuid(non-zero-UID), there is no way to get root privileges back. ScopedThread([&] { // Use syscall instead of glibc setuid wrapper because we want this setuid // call to only apply to this task. POSIX threads, however, require that all // threads have the same UIDs, so using the setuid wrapper sets all threads' // real UID. // Also drops capabilities. constexpr int kNobody = 65534; EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds()); EXPECT_THAT(open("/proc/cpuinfo", O_WRONLY), SyscallFailsWithErrno(EACCES)); EXPECT_THAT(truncate("/proc/cpuinfo", 123), SyscallFailsWithErrno(EACCES)); }); } // With root privileges, it is possible to open /proc/cpuinfo with write mode, // but all write operations should fail. TEST(ProcCpuinfo, DeniesWriteRoot) { // VFS1 does not behave differently for root/non-root. SKIP_IF(IsRunningWithVFS1()); SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); int fd; EXPECT_THAT(fd = open("/proc/cpuinfo", O_WRONLY), SyscallSucceeds()); if (fd > 0) { // Truncate is not tested--it may succeed on some kernels without doing // anything. EXPECT_THAT(write(fd, "x", 1), SyscallFails()); EXPECT_THAT(pwrite(fd, "x", 1, 123), SyscallFails()); } } // Sanity checks that uptime is present. TEST(ProcUptime, IsPresent) { std::string proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); ASSERT_FALSE(proc_uptime.empty()); std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' '); // Parse once. double uptime0, uptime1, idletime0, idletime1; ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime0)); ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime0)); // Sleep for one second. absl::SleepFor(absl::Seconds(1)); // Parse again. proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime")); ASSERT_FALSE(proc_uptime.empty()); uptime_parts = absl::StrSplit(proc_uptime, ' '); ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime1)); ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime1)); // Sanity check. // // We assert that between 0.99 and 59.99 seconds have passed. If more than a // minute has passed, then we must be executing really, really slowly. EXPECT_GE(uptime0, 0.0); EXPECT_GE(idletime0, 0.0); EXPECT_GT(uptime1, uptime0); EXPECT_GE(uptime1, uptime0 + 0.99); EXPECT_LE(uptime1, uptime0 + 59.99); EXPECT_GE(idletime1, idletime0); } TEST(ProcMeminfo, ContainsBasicFields) { std::string proc_meminfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo")); EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"), ContainsRegex(R"(MemFree:\s+[0-9]+ kB)"))); } TEST(ProcStat, ContainsBasicFields) { std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); std::vector<std::string> names; for (auto const& line : absl::StrSplit(proc_stat, '\n')) { std::vector<std::string> fields = absl::StrSplit(line, ' ', absl::SkipWhitespace()); if (fields.empty()) { continue; } names.push_back(fields[0]); } EXPECT_THAT(names, IsSupersetOf({"cpu", "intr", "ctxt", "btime", "processes", "procs_running", "procs_blocked", "softirq"})); } TEST(ProcStat, EndsWithNewline) { std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); EXPECT_EQ(proc_stat.back(), '\n'); } TEST(ProcStat, Fields) { std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat")); std::vector<std::string> names; for (auto const& line : absl::StrSplit(proc_stat, '\n')) { std::vector<std::string> fields = absl::StrSplit(line, ' ', absl::SkipWhitespace()); if (fields.empty()) { continue; } if (absl::StartsWith(fields[0], "cpu")) { // As of Linux 3.11, each CPU entry has 10 fields, plus the name. EXPECT_GE(fields.size(), 11) << proc_stat; } else if (fields[0] == "ctxt") { // Single field. EXPECT_EQ(fields.size(), 2) << proc_stat; } else if (fields[0] == "btime") { // Single field. EXPECT_EQ(fields.size(), 2) << proc_stat; } else if (fields[0] == "itime") { // Single field. ASSERT_EQ(fields.size(), 2) << proc_stat; // This is the only floating point field. double val; EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_stat; continue; } else if (fields[0] == "processes") { // Single field. EXPECT_EQ(fields.size(), 2) << proc_stat; } else if (fields[0] == "procs_running") { // Single field. EXPECT_EQ(fields.size(), 2) << proc_stat; } else if (fields[0] == "procs_blocked") { // Single field. EXPECT_EQ(fields.size(), 2) << proc_stat; } else if (fields[0] == "softirq") { // As of Linux 3.11, there are 10 softirqs. 12 fields for name + total. EXPECT_GE(fields.size(), 12) << proc_stat; } // All fields besides itime are valid base 10 numbers. for (size_t i = 1; i < fields.size(); i++) { uint64_t val; EXPECT_TRUE(absl::SimpleAtoi(fields[i], &val)) << proc_stat; } } } TEST(ProcLoadavg, EndsWithNewline) { std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); EXPECT_EQ(proc_loadvg.back(), '\n'); } TEST(ProcLoadavg, Fields) { std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg")); std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n'); // Single line. EXPECT_EQ(lines.size(), 2) << proc_loadvg; std::vector<std::string> fields = absl::StrSplit(lines[0], absl::ByAnyChar(" /"), absl::SkipWhitespace()); // Six fields. EXPECT_EQ(fields.size(), 6) << proc_loadvg; double val; uint64_t val2; // First three fields are floating point numbers. EXPECT_TRUE(absl::SimpleAtod(fields[0], &val)) << proc_loadvg; EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_loadvg; EXPECT_TRUE(absl::SimpleAtod(fields[2], &val)) << proc_loadvg; // Rest of the fields are valid base 10 numbers. EXPECT_TRUE(absl::SimpleAtoi(fields[3], &val2)) << proc_loadvg; EXPECT_TRUE(absl::SimpleAtoi(fields[4], &val2)) << proc_loadvg; EXPECT_TRUE(absl::SimpleAtoi(fields[5], &val2)) << proc_loadvg; } // NOTE: Tests in priority.cc also check certain priority related fields in // /proc/self/stat. class ProcPidStatTest : public ::testing::TestWithParam<std::string> {}; TEST_P(ProcPidStatTest, HasBasicFields) { std::string proc_pid_stat = ASSERT_NO_ERRNO_AND_VALUE( GetContents(absl::StrCat("/proc/", GetParam(), "/stat"))); ASSERT_FALSE(proc_pid_stat.empty()); std::vector<std::string> fields = absl::StrSplit(proc_pid_stat, ' '); ASSERT_GE(fields.size(), 24); EXPECT_EQ(absl::StrCat(getpid()), fields[0]); // fields[1] is the thread name. EXPECT_EQ("R", fields[2]); // task state EXPECT_EQ(absl::StrCat(getppid()), fields[3]); // If the test starts up quickly, then the process start time and the kernel // boot time will be very close, and the proc starttime field (which is the // delta of the two times) will be 0. For that unfortunate reason, we can // only check that starttime >= 0, and not that it is strictly > 0. uint64_t starttime; ASSERT_TRUE(absl::SimpleAtoi(fields[21], &starttime)); EXPECT_GE(starttime, 0); uint64_t vss; ASSERT_TRUE(absl::SimpleAtoi(fields[22], &vss)); EXPECT_GT(vss, 0); uint64_t rss; ASSERT_TRUE(absl::SimpleAtoi(fields[23], &rss)); EXPECT_GT(rss, 0); uint64_t rsslim; ASSERT_TRUE(absl::SimpleAtoi(fields[24], &rsslim)); EXPECT_GT(rsslim, 0); } INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatTest, ::testing::Values("self", absl::StrCat(getpid()))); using ProcPidStatmTest = ::testing::TestWithParam<std::string>; TEST_P(ProcPidStatmTest, HasBasicFields) { std::string proc_pid_statm = ASSERT_NO_ERRNO_AND_VALUE( GetContents(absl::StrCat("/proc/", GetParam(), "/statm"))); ASSERT_FALSE(proc_pid_statm.empty()); std::vector<std::string> fields = absl::StrSplit(proc_pid_statm, ' '); ASSERT_GE(fields.size(), 7); uint64_t vss; ASSERT_TRUE(absl::SimpleAtoi(fields[0], &vss)); EXPECT_GT(vss, 0); uint64_t rss; ASSERT_TRUE(absl::SimpleAtoi(fields[1], &rss)); EXPECT_GT(rss, 0); } INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatmTest, ::testing::Values("self", absl::StrCat(getpid()))); PosixErrorOr<uint64_t> CurrentRSS() { ASSIGN_OR_RETURN_ERRNO(auto proc_self_stat, GetContents("/proc/self/stat")); if (proc_self_stat.empty()) { return PosixError(EINVAL, "empty /proc/self/stat"); } std::vector<std::string> fields = absl::StrSplit(proc_self_stat, ' '); if (fields.size() < 24) { return PosixError( EINVAL, absl::StrCat("/proc/self/stat has too few fields: ", proc_self_stat)); } uint64_t rss; if (!absl::SimpleAtoi(fields[23], &rss)) { return PosixError( EINVAL, absl::StrCat("/proc/self/stat RSS field is not a number: ", fields[23])); } // RSS is given in number of pages. return rss * kPageSize; } // The size of mapping created by MapPopulateRSS. constexpr uint64_t kMappingSize = 100 << 20; // Tolerance on RSS comparisons to account for background thread mappings, // reclaimed pages, newly faulted pages, etc. constexpr uint64_t kRSSTolerance = 10 << 20; // Capture RSS before and after an anonymous mapping with passed prot. void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) { *before = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); // N.B. The kernel asynchronously accumulates per-task RSS counters into the // mm RSS, which is exposed by /proc/PID/stat. Task exit is a synchronization // point (kernel/exit.c:do_exit -> sync_mm_rss), so perform the mapping on // another thread to ensure it is reflected in RSS after the thread exits. Mapping mapping; ScopedThread t([&mapping, prot] { mapping = ASSERT_NO_ERRNO_AND_VALUE( MmapAnon(kMappingSize, prot, MAP_PRIVATE | MAP_POPULATE)); }); t.Join(); *after = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); } // TODO(b/73896574): Test for PROT_READ + MAP_POPULATE anonymous mappings. Their // semantics are more subtle: // // Small pages -> Zero page mapped, not counted in RSS // (mm/memory.c:do_anonymous_page). // // Huge pages (THP enabled, use_zero_page=0) -> Pages committed // (mm/memory.c:__handle_mm_fault -> create_huge_pmd). // // Huge pages (THP enabled, use_zero_page=1) -> Zero page mapped, not counted in // RSS (mm/huge_memory.c:do_huge_pmd_anonymous_page). // PROT_WRITE + MAP_POPULATE anonymous mappings are always committed. TEST(ProcSelfStat, PopulateWriteRSS) { uint64_t before, after; MapPopulateRSS(PROT_READ | PROT_WRITE, &before, &after); // Mapping is committed. EXPECT_NEAR(before + kMappingSize, after, kRSSTolerance); } // PROT_NONE + MAP_POPULATE anonymous mappings are never committed. TEST(ProcSelfStat, PopulateNoneRSS) { uint64_t before, after; MapPopulateRSS(PROT_NONE, &before, &after); // Mapping not committed. EXPECT_NEAR(before, after, kRSSTolerance); } // Returns the calling thread's name. PosixErrorOr<std::string> ThreadName() { // "The buffer should allow space for up to 16 bytes; the returned std::string // will be null-terminated if it is shorter than that." - prctl(2). But we // always want the thread name to be null-terminated. char thread_name[17]; int rc = prctl(PR_GET_NAME, thread_name, 0, 0, 0); MaybeSave(); if (rc < 0) { return PosixError(errno, "prctl(PR_GET_NAME)"); } thread_name[16] = '\0'; return std::string(thread_name); } // Parses the contents of a /proc/[pid]/status file into a collection of // key-value pairs. PosixErrorOr<std::map<std::string, std::string>> ParseProcStatus( absl::string_view status_str) { std::map<std::string, std::string> fields; for (absl::string_view const line : absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { const std::pair<absl::string_view, absl::string_view> kv = absl::StrSplit(line, absl::MaxSplits(":\t", 1)); if (kv.first.empty()) { return PosixError( EINVAL, absl::StrCat("failed to parse key in line \"", line, "\"")); } std::string key(kv.first); if (fields.count(key)) { return PosixError(EINVAL, absl::StrCat("duplicate key \"", kv.first, "\"")); } std::string value(kv.second); absl::StripLeadingAsciiWhitespace(&value); fields.emplace(std::move(key), std::move(value)); } return fields; } TEST(ParseProcStatusTest, ParsesSimpleStatusFileWithMixedWhitespaceCorrectly) { EXPECT_THAT( ParseProcStatus( "Name:\tinit\nState:\tS (sleeping)\nCapEff:\t 0000001fffffffff\n"), IsPosixErrorOkAndHolds(UnorderedElementsAre( Pair("Name", "init"), Pair("State", "S (sleeping)"), Pair("CapEff", "0000001fffffffff")))); } TEST(ParseProcStatusTest, DetectsDuplicateKeys) { auto proc_status_or = ParseProcStatus("Name:\tfoo\nName:\tfoo\n"); EXPECT_THAT(proc_status_or, PosixErrorIs(EINVAL, ::testing::StrEq("duplicate key \"Name\""))); } TEST(ParseProcStatusTest, DetectsMissingTabs) { EXPECT_THAT(ParseProcStatus("Name:foo\nPid: 1\n"), IsPosixErrorOkAndHolds(UnorderedElementsAre(Pair("Name:foo", ""), Pair("Pid: 1", "")))); } TEST(ProcPidStatusTest, HasBasicFields) { // Do this on a separate thread since we want tgid != tid. ScopedThread([] { const pid_t tgid = getpid(); const pid_t tid = syscall(SYS_gettid); EXPECT_NE(tgid, tid); const auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(ThreadName()); std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( GetContents(absl::StrCat("/proc/", tid, "/status"))); ASSERT_FALSE(status_str.empty()); const auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); EXPECT_THAT(status, IsSupersetOf({Pair("Name", thread_name), Pair("Tgid", absl::StrCat(tgid)), Pair("Pid", absl::StrCat(tid)), Pair("PPid", absl::StrCat(getppid()))})); }); } TEST(ProcPidStatusTest, StateRunning) { // Task must be running when reading the file. const pid_t tid = syscall(SYS_gettid); std::string status_str = ASSERT_NO_ERRNO_AND_VALUE( GetContents(absl::StrCat("/proc/", tid, "/status"))); EXPECT_THAT(ParseProcStatus(status_str), IsPosixErrorOkAndHolds(Contains(Pair("State", "R (running)")))); } TEST(ProcPidStatusTest, StateSleeping_NoRandomSave) { // Starts a child process that blocks and checks that State is sleeping. auto res = WithSubprocess( [&](int pid) -> PosixError { // Because this test is timing based we will disable cooperative saving // and the test itself also has random saving disabled. const DisableSave ds; // Try multiple times in case the child isn't sleeping when status file // is read. MonotonicTimer timer; timer.Start(); for (;;) { ASSIGN_OR_RETURN_ERRNO( std::string status_str, GetContents(absl::StrCat("/proc/", pid, "/status"))); ASSIGN_OR_RETURN_ERRNO(auto map, ParseProcStatus(status_str)); if (map["State"] == std::string("S (sleeping)")) { // Test passed! return NoError(); } if (timer.Duration() > absl::Seconds(10)) { return PosixError(ETIMEDOUT, "Timeout waiting for child to sleep"); } absl::SleepFor(absl::Milliseconds(10)); } }, nullptr, nullptr); ASSERT_NO_ERRNO(res); } TEST(ProcPidStatusTest, ValuesAreTabDelimited) { std::string status_str = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); ASSERT_FALSE(status_str.empty()); for (absl::string_view const line : absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) { EXPECT_NE(std::string::npos, line.find(":\t")); } } // Threads properly counts running threads. // // TODO(mpratt): Test zombied threads while the thread group leader is still // running with generalized fork and clone children from the wait test. TEST(ProcPidStatusTest, Threads) { char buf[4096] = {}; EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf) - 1), SyscallSucceedsWithValue(Gt(0))); auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); auto it = status.find("Threads"); ASSERT_NE(it, status.end()); int threads = -1; EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) << "Threads value " << it->second << " is not a number"; // Don't make assumptions about the exact number of threads, as it may not be // constant. EXPECT_GE(threads, 1); memset(buf, 0, sizeof(buf)); EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf) - 1), SyscallSucceedsWithValue(Gt(0))); status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf)); it = status.find("Threads"); ASSERT_NE(it, status.end()); threads = -1; EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads)) << "Threads value " << it->second << " is not a number"; // There must be only the thread group leader remaining, zombied. EXPECT_EQ(threads, 1); } // Returns true if all characters in s are digits. bool IsDigits(absl::string_view s) { return std::all_of(s.begin(), s.end(), absl::ascii_isdigit); } TEST(ProcPidStatTest, VmStats) { std::string status_str = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status")); ASSERT_FALSE(status_str.empty()); auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str)); const auto vss_it = status.find("VmSize"); ASSERT_NE(vss_it, status.end()); absl::string_view vss_str(vss_it->second); // Room for the " kB" suffix plus at least one digit. ASSERT_GT(vss_str.length(), 3); EXPECT_TRUE(absl::EndsWith(vss_str, " kB")); // Everything else is part of a number. EXPECT_TRUE(IsDigits(vss_str.substr(0, vss_str.length() - 3))) << vss_str; // ... which is not 0. EXPECT_NE('0', vss_str[0]); const auto rss_it = status.find("VmRSS"); ASSERT_NE(rss_it, status.end()); absl::string_view rss_str(rss_it->second); // Room for the " kB" suffix plus at least one digit. ASSERT_GT(rss_str.length(), 3); EXPECT_TRUE(absl::EndsWith(rss_str, " kB")); // Everything else is part of a number. EXPECT_TRUE(IsDigits(rss_str.substr(0, rss_str.length() - 3))) << rss_str; // ... which is not 0. EXPECT_NE('0', rss_str[0]); const auto data_it = status.find("VmData"); ASSERT_NE(data_it, status.end()); absl::string_view data_str(data_it->second); // Room for the " kB" suffix plus at least one digit. ASSERT_GT(data_str.length(), 3); EXPECT_TRUE(absl::EndsWith(data_str, " kB")); // Everything else is part of a number. EXPECT_TRUE(IsDigits(data_str.substr(0, data_str.length() - 3))) << data_str; // ... which is not 0. EXPECT_NE('0', data_str[0]); } // Parse an array of NUL-terminated char* arrays, returning a vector of // strings. std::vector<std::string> ParseNulTerminatedStrings(std::string contents) { EXPECT_EQ('\0', contents.back()); // The split will leave an empty string if the NUL-byte remains, so pop // it. contents.pop_back(); return absl::StrSplit(contents, '\0'); } TEST(ProcPidCmdline, MatchesArgv) { std::vector<std::string> proc_cmdline = ParseNulTerminatedStrings( ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); EXPECT_THAT(saved_argv, ContainerEq(proc_cmdline)); } TEST(ProcPidEnviron, MatchesEnviron) { std::vector<std::string> proc_environ = ParseNulTerminatedStrings( ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/environ"))); // Get the environment from the environ variable, which we will compare with // /proc/self/environ. std::vector<std::string> env; for (char** v = environ; *v; v++) { env.push_back(*v); } EXPECT_THAT(env, ContainerEq(proc_environ)); } TEST(ProcPidCmdline, SubprocessForkSameCmdline) { std::vector<std::string> proc_cmdline_parent; std::vector<std::string> proc_cmdline; proc_cmdline_parent = ParseNulTerminatedStrings( ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline"))); auto res = WithSubprocess( [&](int pid) -> PosixError { ASSIGN_OR_RETURN_ERRNO( auto raw_cmdline, GetContents(absl::StrCat("/proc/", pid, "/cmdline"))); proc_cmdline = ParseNulTerminatedStrings(raw_cmdline); return NoError(); }, nullptr, nullptr); ASSERT_NO_ERRNO(res); for (size_t i = 0; i < proc_cmdline_parent.size(); i++) { EXPECT_EQ(proc_cmdline_parent[i], proc_cmdline[i]); } } // Test whether /proc/PID/ symlinks can be read for a running process. TEST(ProcPidSymlink, SubprocessRunning) { char buf[1]; EXPECT_THAT(ReadlinkWhileRunning("exe", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadlinkWhileRunning("ns/net", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadlinkWhileRunning("ns/pid", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadlinkWhileRunning("ns/user", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); } TEST(ProcPidSymlink, SubprocessZombied) { ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); char buf[1]; int want = EACCES; if (!IsRunningOnGvisor()) { auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); if (version.major > 4 || (version.major == 4 && version.minor > 3)) { want = ENOENT; } } EXPECT_THAT(ReadlinkWhileZombied("exe", buf, sizeof(buf)), SyscallFailsWithErrno(want)); if (!IsRunningOnGvisor()) { EXPECT_THAT(ReadlinkWhileZombied("ns/net", buf, sizeof(buf)), SyscallFailsWithErrno(want)); } // FIXME(gvisor.dev/issue/164): Inconsistent behavior between linux on proc // files. // // ~4.3: Syscall fails with EACCES. // 4.17: Syscall succeeds and returns 1. // if (!IsRunningOnGvisor()) { return; } EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), SyscallFailsWithErrno(want)); EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), SyscallFailsWithErrno(want)); } // Test whether /proc/PID/ symlinks can be read for an exited process. TEST(ProcPidSymlink, SubprocessExited) { char buf[1]; EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); EXPECT_THAT(ReadlinkWhileExited("ns/net", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); EXPECT_THAT(ReadlinkWhileExited("ns/pid", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); EXPECT_THAT(ReadlinkWhileExited("ns/user", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); } // /proc/PID/exe points to the correct binary. TEST(ProcPidExe, Subprocess) { auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe")); auto expected_absolute_path = ASSERT_NO_ERRNO_AND_VALUE(MakeAbsolute(link, "")); char actual[PATH_MAX + 1] = {}; ASSERT_THAT(ReadlinkWhileRunning("exe", actual, sizeof(actual)), SyscallSucceedsWithValue(Gt(0))); EXPECT_EQ(actual, expected_absolute_path); } // /proc/PID/cwd points to the correct directory. TEST(ProcPidCwd, Subprocess) { auto want = ASSERT_NO_ERRNO_AND_VALUE(GetCWD()); char got[PATH_MAX + 1] = {}; ASSERT_THAT(ReadlinkWhileRunning("cwd", got, sizeof(got)), SyscallSucceedsWithValue(Gt(0))); EXPECT_EQ(got, want); } // Test whether /proc/PID/ files can be read for a running process. TEST(ProcPidFile, SubprocessRunning) { char buf[1]; EXPECT_THAT(ReadWhileRunning("auxv", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("cmdline", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("comm", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("gid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("io", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("maps", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("stat", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("oom_score", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileRunning("oom_score_adj", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); } // Test whether /proc/PID/ files can be read for a zombie process. TEST(ProcPidFile, SubprocessZombie) { char buf[1]; // FIXME(gvisor.dev/issue/164): Loosen requirement due to inconsistent // behavior on different kernels. // // ~4.3: Succeds and returns 0. // 4.17: Succeeds and returns 1. // gVisor: Succeeds and returns 0. EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds()); EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)), SyscallSucceedsWithValue(0)); EXPECT_THAT(ReadWhileZombied("comm", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileZombied("gid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileZombied("maps", buf, sizeof(buf)), SyscallSucceedsWithValue(0)); EXPECT_THAT(ReadWhileZombied("stat", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileZombied("oom_score", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); EXPECT_THAT(ReadWhileZombied("oom_score_adj", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux // on proc files. // // ~4.3: Fails and returns EACCES. // gVisor & 4.17: Succeeds and returns 1. // // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)), // SyscallFailsWithErrno(EACCES)); } // Test whether /proc/PID/ files can be read for an exited process. TEST(ProcPidFile, SubprocessExited) { char buf[1]; // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels. // // ~4.3: Fails and returns ESRCH. // gVisor: Fails with ESRCH. // 4.17: Succeeds and returns 1. // // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)), // SyscallFailsWithErrno(ESRCH)); EXPECT_THAT(ReadWhileExited("cmdline", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); if (!IsRunningOnGvisor()) { // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. EXPECT_THAT(ReadWhileExited("comm", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); } EXPECT_THAT(ReadWhileExited("gid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); if (!IsRunningOnGvisor()) { // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. EXPECT_THAT(ReadWhileExited("io", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); } if (!IsRunningOnGvisor()) { // FIXME(gvisor.dev/issue/164): Returns EOF on gVisor. EXPECT_THAT(ReadWhileExited("maps", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); } if (!IsRunningOnGvisor()) { // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. EXPECT_THAT(ReadWhileExited("stat", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); } if (!IsRunningOnGvisor()) { // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. EXPECT_THAT(ReadWhileExited("status", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); } EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); if (!IsRunningOnGvisor()) { // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. EXPECT_THAT(ReadWhileExited("oom_score", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); } EXPECT_THAT(ReadWhileExited("oom_score_adj", buf, sizeof(buf)), SyscallFailsWithErrno(ESRCH)); } PosixError DirContainsImpl(absl::string_view path, const std::vector<std::string>& targets, bool strict) { ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false)); bool success = true; for (auto& expected_entry : targets) { auto cursor = std::find(listing.begin(), listing.end(), expected_entry); if (cursor == listing.end()) { success = false; } } if (!success) { return PosixError( ENOENT, absl::StrCat("Failed to find one or more paths in '", path, "'")); } if (strict) { if (targets.size() != listing.size()) { return PosixError( EINVAL, absl::StrCat("Expected to find ", targets.size(), " elements in '", path, "', but found ", listing.size())); } } return NoError(); } PosixError DirContains(absl::string_view path, const std::vector<std::string>& targets) { return DirContainsImpl(path, targets, false); } PosixError DirContainsExactly(absl::string_view path, const std::vector<std::string>& targets) { return DirContainsImpl(path, targets, true); } PosixError EventuallyDirContainsExactly( absl::string_view path, const std::vector<std::string>& targets) { constexpr int kRetryCount = 100; const absl::Duration kRetryDelay = absl::Milliseconds(100); for (int i = 0; i < kRetryCount; ++i) { auto res = DirContainsExactly(path, targets); if (res.ok()) { return res; } else if (i < kRetryCount - 1) { // Sleep if this isn't the final iteration. absl::SleepFor(kRetryDelay); } } return PosixError(ETIMEDOUT, "Timed out while waiting for directory to contain files "); } TEST(ProcTask, Basic) { EXPECT_NO_ERRNO( DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())})); } std::vector<std::string> TaskFiles( const std::vector<std::string>& initial_contents, const std::vector<pid_t>& pids) { return VecCat<std::string>( initial_contents, ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); }, pids)); } std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) { return TaskFiles({".", "..", absl::StrCat(getpid())}, pids); } // Helper class for creating a new task in the current thread group. class BlockingChild { public: BlockingChild() : thread_([=] { Start(); }) {} ~BlockingChild() { Join(); } pid_t Tid() const { absl::MutexLock ml(&mu_); mu_.Await(absl::Condition(&tid_ready_)); return tid_; } void Join() { Stop(); } private: void Start() { absl::MutexLock ml(&mu_); tid_ = syscall(__NR_gettid); tid_ready_ = true; mu_.Await(absl::Condition(&stop_)); } void Stop() { absl::MutexLock ml(&mu_); stop_ = true; } mutable absl::Mutex mu_; bool stop_ ABSL_GUARDED_BY(mu_) = false; pid_t tid_; bool tid_ready_ ABSL_GUARDED_BY(mu_) = false; // Must be last to ensure that the destructor for the thread is run before // any other member of the object is destroyed. ScopedThread thread_; }; TEST(ProcTask, NewThreadAppears) { auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task", false)); BlockingChild child1; EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", TaskFiles(initial, {child1.Tid()}))); } TEST(ProcTask, KilledThreadsDisappear) { auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task/", false)); BlockingChild child1; EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", TaskFiles(initial, {child1.Tid()}))); // Stat child1's task file. Regression test for b/32097707. struct stat statbuf; const std::string child1_task_file = absl::StrCat("/proc/self/task/", child1.Tid()); EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallSucceeds()); BlockingChild child2; EXPECT_NO_ERRNO(DirContainsExactly( "/proc/self/task", TaskFiles(initial, {child1.Tid(), child2.Tid()}))); BlockingChild child3; BlockingChild child4; BlockingChild child5; EXPECT_NO_ERRNO(DirContainsExactly( "/proc/self/task", TaskFiles(initial, {child1.Tid(), child2.Tid(), child3.Tid(), child4.Tid(), child5.Tid()}))); child2.Join(); EXPECT_NO_ERRNO(EventuallyDirContainsExactly( "/proc/self/task", TaskFiles(initial, {child1.Tid(), child3.Tid(), child4.Tid(), child5.Tid()}))); child1.Join(); child4.Join(); EXPECT_NO_ERRNO(EventuallyDirContainsExactly( "/proc/self/task", TaskFiles(initial, {child3.Tid(), child5.Tid()}))); // Stat child1's task file again. This time it should fail. See b/32097707. EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallFailsWithErrno(ENOENT)); child3.Join(); child5.Join(); EXPECT_NO_ERRNO(EventuallyDirContainsExactly("/proc/self/task", initial)); } TEST(ProcTask, ChildTaskDir) { BlockingChild child1; EXPECT_NO_ERRNO(DirContains("/proc/self/task", TaskFiles({child1.Tid()}))); EXPECT_NO_ERRNO(DirContains(absl::StrCat("/proc/", child1.Tid(), "/task"), TaskFiles({child1.Tid()}))); } PosixError VerifyPidDir(std::string path) { return DirContains(path, {"exe", "fd", "io", "maps", "ns", "stat", "status"}); } TEST(ProcTask, VerifyTaskDir) { EXPECT_NO_ERRNO(VerifyPidDir("/proc/self")); EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", getpid()))); BlockingChild child1; EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", child1.Tid()))); // Only the first level of task directories should contain the 'task' // directory. That is: // // /proc/1234/task <- should exist // /proc/1234/task/1234/task <- should not exist // /proc/1234/task/1235/task <- should not exist (where 1235 is in the same // thread group as 1234). EXPECT_FALSE( DirContains(absl::StrCat("/proc/self/task/", getpid()), {"task"}).ok()) << "Found 'task' directory in an inner directory."; } TEST(ProcTask, TaskDirCannotBeDeleted) { // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); EXPECT_THAT(rmdir("/proc/self/task"), SyscallFails()); EXPECT_THAT(rmdir(absl::StrCat("/proc/self/task/", getpid()).c_str()), SyscallFailsWithErrno(EACCES)); } TEST(ProcTask, TaskDirHasCorrectMetadata) { struct stat st; EXPECT_THAT(stat("/proc/self/task", &st), SyscallSucceeds()); EXPECT_TRUE(S_ISDIR(st.st_mode)); // Verify file is readable and executable by everyone. mode_t expected_permissions = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; mode_t permissions = st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO); EXPECT_EQ(expected_permissions, permissions); } TEST(ProcTask, TaskDirCanSeekToEnd) { const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/task", O_RDONLY)); EXPECT_THAT(lseek(dirfd.get(), 0, SEEK_END), SyscallSucceeds()); } TEST(ProcTask, VerifyTaskDirNlinks) { // A task directory will have 3 links if the taskgroup has a single // thread. For example, the following shows where the links to // '/proc/12345/task comes' from for a single threaded process with pid 12345: // // /proc/12345/task <-- 1 link for the directory itself // . <-- link from "." // .. // 12345 // . // .. <-- link from ".." to parent. // <other contents of a task dir> // // We can't assert an absolute number of links since we don't control how many // threads the test framework spawns. Instead, we'll ensure creating a new // thread increases the number of links as expected. // Once we reach the test body, we can count on the thread count being stable // unless we spawn a new one. uint64_t initial_links = ASSERT_NO_ERRNO_AND_VALUE(Links("/proc/self/task")); ASSERT_GE(initial_links, 3); // For each new subtask, we should gain a new link. BlockingChild child1; EXPECT_THAT(Links("/proc/self/task"), IsPosixErrorOkAndHolds(initial_links + 1)); BlockingChild child2; EXPECT_THAT(Links("/proc/self/task"), IsPosixErrorOkAndHolds(initial_links + 2)); } TEST(ProcTask, CommContainsThreadNameAndTrailingNewline) { constexpr char kThreadName[] = "TestThread12345"; ASSERT_THAT(prctl(PR_SET_NAME, kThreadName), SyscallSucceeds()); auto thread_name = ASSERT_NO_ERRNO_AND_VALUE( GetContents(JoinPath("/proc", absl::StrCat(getpid()), "task", absl::StrCat(syscall(SYS_gettid)), "comm"))); EXPECT_EQ(absl::StrCat(kThreadName, "\n"), thread_name); } TEST(ProcTaskNs, NsDirExistsAndHasCorrectMetadata) { EXPECT_NO_ERRNO(DirContains("/proc/self/ns", {"net", "pid", "user"})); // Let's just test the 'pid' entry, all of them are very similar. struct stat st; EXPECT_THAT(lstat("/proc/self/ns/pid", &st), SyscallSucceeds()); EXPECT_TRUE(S_ISLNK(st.st_mode)); auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/ns/pid")); EXPECT_THAT(link, ::testing::StartsWith("pid:[")); } TEST(ProcTaskNs, AccessOnNsNodeSucceeds) { EXPECT_THAT(access("/proc/self/ns/pid", F_OK), SyscallSucceeds()); } TEST(ProcSysKernelHostname, Exists) { EXPECT_THAT(open("/proc/sys/kernel/hostname", O_RDONLY), SyscallSucceeds()); } TEST(ProcSysKernelHostname, MatchesUname) { struct utsname buf; EXPECT_THAT(uname(&buf), SyscallSucceeds()); const std::string hostname = absl::StrCat(buf.nodename, "\n"); auto procfs_hostname = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/hostname")); EXPECT_EQ(procfs_hostname, hostname); } TEST(ProcSysVmMmapMinAddr, HasNumericValue) { const std::string mmap_min_addr_str = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/mmap_min_addr")); uintptr_t mmap_min_addr; EXPECT_TRUE(absl::SimpleAtoi(mmap_min_addr_str, &mmap_min_addr)) << "/proc/sys/vm/mmap_min_addr does not contain a numeric value: " << mmap_min_addr_str; } TEST(ProcSysVmOvercommitMemory, HasNumericValue) { const std::string overcommit_memory_str = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/overcommit_memory")); uintptr_t overcommit_memory; EXPECT_TRUE(absl::SimpleAtoi(overcommit_memory_str, &overcommit_memory)) << "/proc/sys/vm/overcommit_memory does not contain a numeric value: " << overcommit_memory; } // Check that link for proc fd entries point the target node, not the // symlink itself. Regression test for b/31155070. TEST(ProcTaskFd, FstatatFollowsSymlink) { const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY)); struct stat sproc = {}; EXPECT_THAT( fstatat(-1, absl::StrCat("/proc/self/fd/", fd.get()).c_str(), &sproc, 0), SyscallSucceeds()); struct stat sfile = {}; EXPECT_THAT(fstatat(-1, file.path().c_str(), &sfile, 0), SyscallSucceeds()); // If fstatat follows the fd symlink, the device and inode numbers should // match at a minimum. EXPECT_EQ(sproc.st_dev, sfile.st_dev); EXPECT_EQ(sproc.st_ino, sfile.st_ino); EXPECT_EQ(0, memcmp(&sfile, &sproc, sizeof(sfile))); } TEST(ProcFilesystems, Bug65172365) { std::string proc_filesystems = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/filesystems")); ASSERT_FALSE(proc_filesystems.empty()); } TEST(ProcFilesystems, PresenceOfShmMaxMniAll) { uint64_t shmmax = 0; uint64_t shmall = 0; uint64_t shmmni = 0; std::string proc_file; proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmax")); ASSERT_FALSE(proc_file.empty()); ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmax)); proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmall")); ASSERT_FALSE(proc_file.empty()); ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmall)); proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmni")); ASSERT_FALSE(proc_file.empty()); ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmni)); ASSERT_GT(shmmax, 0); ASSERT_GT(shmall, 0); ASSERT_GT(shmmni, 0); ASSERT_LE(shmall, shmmax); // These values should never be higher than this by default, for more // information see uapi/linux/shm.h ASSERT_LE(shmmax, ULONG_MAX - (1UL << 24)); ASSERT_LE(shmall, ULONG_MAX - (1UL << 24)); } TEST(ProcFilesystems, PresenceOfSem) { uint32_t semmsl = 0; uint32_t semmns = 0; uint32_t semopm = 0; uint32_t semmni = 0; std::string proc_file; proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/sem")); ASSERT_FALSE(proc_file.empty()); std::vector<absl::string_view> sem_limits = absl::StrSplit(proc_file, absl::ByAnyChar("\t"), absl::SkipWhitespace()); ASSERT_EQ(sem_limits.size(), 4); ASSERT_TRUE(absl::SimpleAtoi(sem_limits[0], &semmsl)); ASSERT_TRUE(absl::SimpleAtoi(sem_limits[1], &semmns)); ASSERT_TRUE(absl::SimpleAtoi(sem_limits[2], &semopm)); ASSERT_TRUE(absl::SimpleAtoi(sem_limits[3], &semmni)); ASSERT_EQ(semmsl, SEMMSL); ASSERT_EQ(semmns, SEMMNS); ASSERT_EQ(semopm, SEMOPM); ASSERT_EQ(semmni, SEMMNI); } // Check that /proc/mounts is a symlink to self/mounts. TEST(ProcMounts, IsSymlink) { auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/mounts")); EXPECT_EQ(link, "self/mounts"); } TEST(ProcSelfMountinfo, RequiredFieldsArePresent) { auto mountinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mountinfo")); EXPECT_THAT( mountinfo, AllOf( // Root mount. ContainsRegex( R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ /\S* / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"), // Proc mount - always rw. ContainsRegex( R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)"))); } // Check that /proc/self/mounts looks something like a real mounts file. TEST(ProcSelfMounts, RequiredFieldsArePresent) { auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts")); EXPECT_THAT(mounts, AllOf( // Root mount. ContainsRegex(R"(\S+ / \S+ (rw|ro)\S* [0-9]+ [0-9]+\s)"), // Root mount. ContainsRegex(R"(\S+ /proc \S+ rw\S* [0-9]+ [0-9]+\s)"))); } void CheckDuplicatesRecursively(std::string path) { std::vector<std::string> child_dirs; // There is the known issue of the linux procfs, that two consequent calls of // readdir can return the same entry twice if between these calls one or more // entries have been removed from this directory. int max_attempts = 5; for (int i = 0; i < max_attempts; i++) { child_dirs.clear(); errno = 0; bool success = true; DIR* dir = opendir(path.c_str()); if (dir == nullptr) { // Ignore any directories we can't read or missing directories as the // directory could have been deleted/mutated from the time the parent // directory contents were read. return; } auto dir_closer = Cleanup([&dir]() { closedir(dir); }); absl::node_hash_set<std::string> children; while (true) { // Readdir(3): If the end of the directory stream is reached, NULL is // returned and errno is not changed. If an error occurs, NULL is // returned and errno is set appropriately. To distinguish end of stream // and from an error, set errno to zero before calling readdir() and then // check the value of errno if NULL is returned. errno = 0; struct dirent* dp = readdir(dir); if (dp == nullptr) { // Linux will return EINVAL when calling getdents on a /proc/tid/net // file corresponding to a zombie task. // See fs/proc/proc_net.c:proc_tgid_net_readdir(). // // We just ignore the directory in this case. if (errno == EINVAL && absl::StartsWith(path, "/proc/") && absl::EndsWith(path, "/net")) { break; } // We may also see permission failures traversing some files. if (errno == EACCES && absl::StartsWith(path, "/proc/")) { break; } // Otherwise, no errors are allowed. ASSERT_EQ(errno, 0) << path; break; // We're done. } const std::string name = dp->d_name; if (name == "." || name == "..") { continue; } // Ignore a duplicate entry if it isn't the last attempt. if (i == max_attempts - 1) { ASSERT_EQ(children.find(name), children.end()) << absl::StrCat(path, "/", name); } else if (children.find(name) != children.end()) { std::cerr << "Duplicate entry: " << i << ":" << absl::StrCat(path, "/", name) << std::endl; success = false; break; } children.insert(name); if (dp->d_type == DT_DIR) { child_dirs.push_back(name); } } if (success) { break; } } for (auto dname = child_dirs.begin(); dname != child_dirs.end(); dname++) { CheckDuplicatesRecursively(absl::StrCat(path, "/", *dname)); } } TEST(Proc, NoDuplicates) { CheckDuplicatesRecursively("/proc"); } // Most /proc/PID files are owned by the task user with SUID_DUMP_USER. TEST(ProcPid, UserDumpableOwner) { int before; ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); auto cleanup = Cleanup([before] { ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); }); EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds()); // This applies to the task directory itself and files inside. struct stat st; ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); EXPECT_EQ(st.st_uid, geteuid()); EXPECT_EQ(st.st_gid, getegid()); ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); EXPECT_EQ(st.st_uid, geteuid()); EXPECT_EQ(st.st_gid, getegid()); } // /proc/PID files are owned by root with SUID_DUMP_DISABLE. TEST(ProcPid, RootDumpableOwner) { int before; ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); auto cleanup = Cleanup([before] { ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); }); EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds()); // This *does not* applies to the task directory itself (or other 0555 // directories), but does to files inside. struct stat st; ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); EXPECT_EQ(st.st_uid, geteuid()); EXPECT_EQ(st.st_gid, getegid()); // This file is owned by root. Also allow nobody in case this test is running // in a userns without root mapped. ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); EXPECT_THAT(st.st_uid, AnyOf(Eq(0), Eq(65534))); EXPECT_THAT(st.st_gid, AnyOf(Eq(0), Eq(65534))); } TEST(Proc, GetdentsEnoent) { FileDescriptor fd; ASSERT_NO_ERRNO(WithSubprocess( [&](int pid) -> PosixError { // Running. ASSIGN_OR_RETURN_ERRNO(fd, Open(absl::StrCat("/proc/", pid, "/task"), O_RDONLY | O_DIRECTORY)); return NoError(); }, nullptr, nullptr)); char buf[1024]; ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(ENOENT)); } void CheckSyscwFromIOFile(const std::string& path, const std::string& regex) { std::string output; ASSERT_NO_ERRNO(GetContents(path, &output)); ASSERT_THAT(output, ContainsRegex(absl::StrCat("syscw:\\s+", regex, "\n"))); } // Checks that there is variable accounting of IO between threads/tasks. TEST(Proc, PidTidIOAccounting) { absl::Notification notification; // Run a thread with a bunch of writes. Check that io account records exactly // the number of write calls. File open/close is there to prevent buffering. ScopedThread writer([¬ification] { const int num_writes = 100; for (int i = 0; i < num_writes; i++) { auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); ASSERT_NO_ERRNO(SetContents(path.path(), "a")); } notification.Notify(); const std::string& writer_dir = absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); CheckSyscwFromIOFile(writer_dir, std::to_string(num_writes)); }); // Run a thread and do no writes. Check that no writes are recorded. ScopedThread noop([¬ification] { notification.WaitForNotification(); const std::string& noop_dir = absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); CheckSyscwFromIOFile(noop_dir, "0"); }); writer.Join(); noop.Join(); } TEST(Proc, Statfs) { struct statfs st; EXPECT_THAT(statfs("/proc", &st), SyscallSucceeds()); if (IsRunningWithVFS1()) { EXPECT_EQ(st.f_type, ANON_INODE_FS_MAGIC); } else { EXPECT_EQ(st.f_type, PROC_SUPER_MAGIC); } EXPECT_EQ(st.f_bsize, getpagesize()); EXPECT_EQ(st.f_namelen, NAME_MAX); } } // namespace } // namespace testing } // namespace gvisor int main(int argc, char** argv) { for (int i = 0; i < argc; ++i) { gvisor::testing::saved_argv.emplace_back(std::string(argv[i])); } gvisor::testing::TestInit(&argc, &argv); return gvisor::testing::RunAllTests(); }