diff options
Diffstat (limited to 'test/syscalls/linux/mmap.cc')
-rw-r--r-- | test/syscalls/linux/mmap.cc | 1676 |
1 files changed, 1676 insertions, 0 deletions
diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc new file mode 100644 index 000000000..6d3227ab6 --- /dev/null +++ b/test/syscalls/linux/mmap.cc @@ -0,0 +1,1676 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <fcntl.h> +#include <linux/magic.h> +#include <linux/unistd.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/statfs.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/escaping.h" +#include "absl/strings/str_split.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +using ::testing::Gt; + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<int64_t> VirtualMemorySize() { + ASSIGN_OR_RETURN_ERRNO(auto contents, GetContents("/proc/self/statm")); + std::vector<std::string> parts = absl::StrSplit(contents, ' '); + if (parts.empty()) { + return PosixError(EINVAL, "Unable to parse /proc/self/statm"); + } + ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi<int64_t>(parts[0])); + return pages * getpagesize(); +} + +class MMapTest : public ::testing::Test { + protected: + // Unmap mapping, if one was made. + void TearDown() override { + if (addr_) { + EXPECT_THAT(Unmap(), SyscallSucceeds()); + } + } + + // Remembers mapping, so it can be automatically unmapped. + uintptr_t Map(uintptr_t addr, size_t length, int prot, int flags, int fd, + off_t offset) { + void* ret = + mmap(reinterpret_cast<void*>(addr), length, prot, flags, fd, offset); + + if (ret != MAP_FAILED) { + addr_ = ret; + length_ = length; + } + + return reinterpret_cast<uintptr_t>(ret); + } + + // Unmap previous mapping + int Unmap() { + if (!addr_) { + return -1; + } + + int ret = munmap(addr_, length_); + + addr_ = nullptr; + length_ = 0; + + return ret; + } + + // Msync the mapping. + int Msync() { return msync(addr_, length_, MS_SYNC); } + + // Mlock the mapping. + int Mlock() { return mlock(addr_, length_); } + + // Munlock the mapping. + int Munlock() { return munlock(addr_, length_); } + + int Protect(uintptr_t addr, size_t length, int prot) { + return mprotect(reinterpret_cast<void*>(addr), length, prot); + } + + void* addr_ = nullptr; + size_t length_ = 0; +}; + +// Matches if arg contains the same contents as string str. +MATCHER_P(EqualsMemory, str, "") { + if (0 == memcmp(arg, str.c_str(), str.size())) { + return true; + } + + *result_listener << "Memory did not match. Got:\n" + << absl::BytesToHexString( + std::string(static_cast<char*>(arg), str.size())) + << "Want:\n" + << absl::BytesToHexString(str); + return false; +} + +// We can't map pipes, but for different reasons. +TEST_F(MMapTest, MapPipe) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[0], 0), + SyscallFailsWithErrno(ENODEV)); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[1], 0), + SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(close(fds[0]), SyscallSucceeds()); + ASSERT_THAT(close(fds[1]), SyscallSucceeds()); +} + +// It's very common to mmap /dev/zero because anonymous mappings aren't part +// of POSIX although they are widely supported. So a zero initialized memory +// region would actually come from a "file backed" /dev/zero mapping. +TEST_F(MMapTest, MapDevZeroShared) { + // This test will verify that we're able to map a page backed by /dev/zero + // as MAP_SHARED. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + // Test that we can create a RW SHARED mapping of /dev/zero. + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), + SyscallSucceeds()); +} + +TEST_F(MMapTest, MapDevZeroPrivate) { + // This test will verify that we're able to map a page backed by /dev/zero + // as MAP_PRIVATE. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + // Test that we can create a RW SHARED mapping of /dev/zero. + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0), + SyscallSucceeds()); +} + +TEST_F(MMapTest, MapDevZeroNoPersistence) { + // This test will verify that two independent mappings of /dev/zero do not + // appear to reference the same "backed file." + + const FileDescriptor dev_zero1 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + const FileDescriptor dev_zero2 = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero1.get(), 0), + SyscallSucceeds()); + + // Create a second mapping via the second /dev/zero fd. + void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + dev_zero2.get(), 0); + ASSERT_THAT(reinterpret_cast<intptr_t>(psec_map), SyscallSucceeds()); + + // Always unmap. + auto cleanup_psec_map = Cleanup( + [&] { EXPECT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); }); + + // Verify that we have independently addressed pages. + ASSERT_NE(psec_map, addr_); + + std::string buf_zero(kPageSize, 0x00); + std::string buf_ones(kPageSize, 0xFF); + + // Verify the first is actually all zeros after mmap. + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + + // Let's fill in the first mapping with 0xFF. + memcpy(addr_, buf_ones.data(), kPageSize); + + // Verify that the memcpy actually stuck in the page. + EXPECT_THAT(addr_, EqualsMemory(buf_ones)); + + // Verify that it didn't affect the second page which should be all zeros. + EXPECT_THAT(psec_map, EqualsMemory(buf_zero)); +} + +TEST_F(MMapTest, MapDevZeroSharedMultiplePages) { + // This will test that we're able to map /dev/zero over multiple pages. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + // Test that we can create a RW SHARED mapping of /dev/zero. + ASSERT_THAT(Map(0, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE, + dev_zero.get(), 0), + SyscallSucceeds()); + + std::string buf_zero(kPageSize * 2, 0x00); + std::string buf_ones(kPageSize * 2, 0xFF); + + // Verify the two pages are actually all zeros after mmap. + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + + // Fill out the pages with all ones. + memcpy(addr_, buf_ones.data(), kPageSize * 2); + + // Verify that the memcpy actually stuck in the pages. + EXPECT_THAT(addr_, EqualsMemory(buf_ones)); +} + +TEST_F(MMapTest, MapDevZeroSharedFdNoPersistence) { + // This test will verify that two independent mappings of /dev/zero do not + // appear to reference the same "backed file" even when mapped from the + // same initial fd. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + ASSERT_THAT( + Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), + SyscallSucceeds()); + + // Create a second mapping via the same fd. + void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + dev_zero.get(), 0); + ASSERT_THAT(reinterpret_cast<int64_t>(psec_map), SyscallSucceeds()); + + // Always unmap. + auto cleanup_psec_map = Cleanup( + [&] { ASSERT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); }); + + // Verify that we have independently addressed pages. + ASSERT_NE(psec_map, addr_); + + std::string buf_zero(kPageSize, 0x00); + std::string buf_ones(kPageSize, 0xFF); + + // Verify the first is actually all zeros after mmap. + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + + // Let's fill in the first mapping with 0xFF. + memcpy(addr_, buf_ones.data(), kPageSize); + + // Verify that the memcpy actually stuck in the page. + EXPECT_THAT(addr_, EqualsMemory(buf_ones)); + + // Verify that it didn't affect the second page which should be all zeros. + EXPECT_THAT(psec_map, EqualsMemory(buf_zero)); +} + +TEST_F(MMapTest, MapDevZeroSegfaultAfterUnmap) { + SetupGvisorDeathTest(); + + // This test will verify that we're able to map a page backed by /dev/zero + // as MAP_SHARED and after it's unmapped any access results in a SIGSEGV. + // This test is redundant but given the special nature of /dev/zero mappings + // it doesn't hurt. + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + + const auto rest = [&] { + // Test that we can create a RW SHARED mapping of /dev/zero. + TEST_PCHECK(Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + dev_zero.get(), + 0) != reinterpret_cast<uintptr_t>(MAP_FAILED)); + + // Confirm that accesses after the unmap result in a SIGSEGV. + // + // N.B. We depend on this process being single-threaded to ensure there + // can't be another mmap to map addr before the dereference below. + void* addr_saved = addr_; // Unmap resets addr_. + TEST_PCHECK(Unmap() == 0); + *reinterpret_cast<volatile int*>(addr_saved) = 0xFF; + }; + + EXPECT_THAT(InForkedProcess(rest), + IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV))); +} + +TEST_F(MMapTest, MapDevZeroUnaligned) { + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR)); + const size_t size = kPageSize + kPageSize / 2; + const std::string buf_zero(size, 0x00); + + ASSERT_THAT( + Map(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + ASSERT_THAT( + Map(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(addr_, EqualsMemory(buf_zero)); +} + +// We can't map _some_ character devices. +TEST_F(MMapTest, MapCharDevice) { + const FileDescriptor cdevfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/random", 0, 0)); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, cdevfd.get(), 0), + SyscallFailsWithErrno(ENODEV)); +} + +// We can't map directories. +TEST_F(MMapTest, MapDirectory) { + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), 0, 0)); + EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, dirfd.get(), 0), + SyscallFailsWithErrno(ENODEV)); +} + +// We can map *something* +TEST_F(MMapTest, MapAnything) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceedsWithValue(Gt(0))); +} + +// Map length < PageSize allowed +TEST_F(MMapTest, SmallMap) { + EXPECT_THAT(Map(0, 128, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); +} + +// Hint address doesn't break anything. +// Note: there is no requirement we actually get the hint address +TEST_F(MMapTest, HintAddress) { + EXPECT_THAT( + Map(0x30000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); +} + +// MAP_FIXED gives us exactly the requested address +TEST_F(MMapTest, MapFixed) { + EXPECT_THAT(Map(0x30000000, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), + SyscallSucceedsWithValue(0x30000000)); +} + +// 64-bit addresses work too +#if defined(__x86_64__) || defined(__aarch64__) +TEST_F(MMapTest, MapFixed64) { + EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), + SyscallSucceedsWithValue(0x300000000000)); +} +#endif + +// MAP_STACK allowed. +// There isn't a good way to verify it did anything. +TEST_F(MMapTest, MapStack) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0), + SyscallSucceeds()); +} + +// MAP_LOCKED allowed. +// There isn't a good way to verify it did anything. +TEST_F(MMapTest, MapLocked) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0), + SyscallSucceeds()); +} + +// MAP_PRIVATE or MAP_SHARED must be passed +TEST_F(MMapTest, NotPrivateOrShared) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Only one of MAP_PRIVATE or MAP_SHARED may be passed +TEST_F(MMapTest, PrivateAndShared) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_SHARED | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(MMapTest, FixedAlignment) { + // Addr must be page aligned (MAP_FIXED) + EXPECT_THAT(Map(0x30000001, kPageSize, PROT_NONE, + MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Non-MAP_FIXED address does not need to be page aligned +TEST_F(MMapTest, NonFixedAlignment) { + EXPECT_THAT( + Map(0x30000001, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); +} + +// Length = 0 results in EINVAL. +TEST_F(MMapTest, InvalidLength) { + EXPECT_THAT(Map(0, 0, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Bad fd not allowed. +TEST_F(MMapTest, BadFd) { + EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE, 999, 0), + SyscallFailsWithErrno(EBADF)); +} + +// Mappings are writable. +TEST_F(MMapTest, ProtWrite) { + uint64_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + // This shouldn't cause a SIGSEGV. + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + // The written data should actually be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +// "Write-only" mappings are writable *and* readable. +TEST_F(MMapTest, ProtWriteOnly) { + uint64_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + // This shouldn't cause a SIGSEGV. + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + // The written data should actually be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +// "Write-only" mappings are readable. +// +// This is distinct from above to ensure the page is accessible even if the +// initial fault is a write fault. +TEST_F(MMapTest, ProtWriteOnlyReadable) { + uint64_t addr; + constexpr uint64_t kFirstWord = 0; + + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), &kFirstWord, + sizeof(kFirstWord))); +} + +// Mappings are writable after mprotect from PROT_NONE to PROT_READ|PROT_WRITE. +TEST_F(MMapTest, ProtectProtWrite) { + uint64_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE), + SyscallSucceeds()); + + // This shouldn't cause a SIGSEGV. + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + // The written data should actually be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +// SIGSEGV raised when reading PROT_NONE memory +TEST_F(MMapTest, ProtNoneDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + + ASSERT_THAT( + addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr), + ::testing::KilledBySignal(SIGSEGV), ""); +} + +// SIGSEGV raised when writing PROT_READ only memory +TEST_F(MMapTest, ReadOnlyDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + + ASSERT_THAT( + addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr) = 42, + ::testing::KilledBySignal(SIGSEGV), ""); +} + +// Writable mapping mprotect'd to read-only should not be writable. +TEST_F(MMapTest, MprotectReadOnlyDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + volatile int* val = reinterpret_cast<int*>(addr); + + // Copy to ensure page is mapped in. + *val = 42; + + ASSERT_THAT(Protect(addr, kPageSize, PROT_READ), SyscallSucceeds()); + + // Now it shouldn't be writable. + EXPECT_EXIT(*val = 0, ::testing::KilledBySignal(SIGSEGV), ""); +} + +// Verify that calling mprotect an address that's not page aligned fails. +TEST_F(MMapTest, MprotectNotPageAligned) { + uintptr_t addr; + + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + ASSERT_THAT(Protect(addr + 1, kPageSize - 1, PROT_READ), + SyscallFailsWithErrno(EINVAL)); +} + +// Verify that calling mprotect with an absurdly huge length fails. +TEST_F(MMapTest, MprotectHugeLength) { + uintptr_t addr; + + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + ASSERT_THAT(Protect(addr, static_cast<size_t>(-1), PROT_READ), + SyscallFailsWithErrno(ENOMEM)); +} + +#if defined(__x86_64__) || defined(__i386__) +// This code is equivalent in 32 and 64-bit mode +const uint8_t machine_code[] = { + 0xb8, 0x2a, 0x00, 0x00, 0x00, // movl $42, %eax + 0xc3, // retq +}; +#elif defined(__aarch64__) +const uint8_t machine_code[] = { + 0x40, 0x05, 0x80, 0x52, // mov w0, #42 + 0xc0, 0x03, 0x5f, 0xd6, // ret +}; +#endif + +// PROT_EXEC allows code execution +TEST_F(MMapTest, ProtExec) { + uintptr_t addr; + uint32_t (*func)(void); + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code)); + + func = reinterpret_cast<uint32_t (*)(void)>(addr); + + EXPECT_EQ(42, func()); +} + +// No PROT_EXEC disallows code execution +TEST_F(MMapTest, NoProtExecDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + uint32_t (*func)(void); + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + + memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code)); + + func = reinterpret_cast<uint32_t (*)(void)>(addr); + + EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), ""); +} + +TEST_F(MMapTest, NoExceedLimitData) { + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 1; + + setlim.rlim_cur = RLIM_INFINITY; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallSucceedsWithValue(0)); +} + +TEST_F(MMapTest, ExceedLimitData) { + // To unit test this more precisely, we'd need access to the mm's start_brk + // and end_brk, which we don't have direct access to :/ + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 8192; + + setlim.rlim_cur = 0; + setlim.rlim_max = RLIM_INFINITY; + // Set RLIMIT_DATA very low so any subsequent brk() calls fail. + // Reset RLIMIT_DATA during teardown step. + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); + // Teardown step... + setlim.rlim_cur = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); +} + +TEST_F(MMapTest, ExceedLimitDataPrlimit) { + // To unit test this more precisely, we'd need access to the mm's start_brk + // and end_brk, which we don't have direct access to :/ + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 8192; + + setlim.rlim_cur = 0; + setlim.rlim_max = RLIM_INFINITY; + // Set RLIMIT_DATA very low so any subsequent brk() calls fail. + // Reset RLIMIT_DATA during teardown step. + ASSERT_THAT(prlimit(0, RLIMIT_DATA, &setlim, nullptr), SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); + // Teardown step... + setlim.rlim_cur = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); +} + +TEST_F(MMapTest, ExceedLimitDataPrlimitPID) { + // To unit test this more precisely, we'd need access to the mm's start_brk + // and end_brk, which we don't have direct access to :/ + void* prevbrk; + void* target_brk; + struct rlimit setlim; + + prevbrk = sbrk(0); + ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk)); + target_brk = reinterpret_cast<char*>(prevbrk) + 8192; + + setlim.rlim_cur = 0; + setlim.rlim_max = RLIM_INFINITY; + // Set RLIMIT_DATA very low so any subsequent brk() calls fail. + // Reset RLIMIT_DATA during teardown step. + ASSERT_THAT(prlimit(syscall(__NR_gettid), RLIMIT_DATA, &setlim, nullptr), + SyscallSucceeds()); + EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM)); + // Teardown step... + setlim.rlim_cur = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds()); +} + +TEST_F(MMapTest, NoExceedLimitAS) { + constexpr uint64_t kAllocBytes = 200 << 20; + // Add some headroom to the AS limit in case of e.g. unexpected stack + // expansion. + constexpr uint64_t kExtraASBytes = kAllocBytes + (20 << 20); + static_assert(kAllocBytes < kExtraASBytes, + "test depends on allocation not exceeding AS limit"); + + auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize()); + struct rlimit setlim; + setlim.rlim_cur = vss + kExtraASBytes; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds()); + EXPECT_THAT( + Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceedsWithValue(Gt(0))); +} + +TEST_F(MMapTest, ExceedLimitAS) { + constexpr uint64_t kAllocBytes = 200 << 20; + // Add some headroom to the AS limit in case of e.g. unexpected stack + // expansion. + constexpr uint64_t kExtraASBytes = 20 << 20; + static_assert(kAllocBytes > kExtraASBytes, + "test depends on allocation exceeding AS limit"); + + auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize()); + struct rlimit setlim; + setlim.rlim_cur = vss + kExtraASBytes; + setlim.rlim_max = RLIM_INFINITY; + ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds()); + EXPECT_THAT( + Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(ENOMEM)); +} + +// Tests that setting an anonymous mmap to PROT_NONE doesn't free the memory. +TEST_F(MMapTest, SettingProtNoneDoesntFreeMemory) { + uintptr_t addr; + constexpr uint8_t kFirstWord[] = {42, 42, 42, 42}; + + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceedsWithValue(Gt(0))); + + memset(reinterpret_cast<void*>(addr), 42, kPageSize); + + ASSERT_THAT(Protect(addr, kPageSize, PROT_NONE), SyscallSucceeds()); + ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE), + SyscallSucceeds()); + + // The written data should still be there. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord))); +} + +constexpr char kFileContents[] = "Hello World!"; + +class MMapFileTest : public MMapTest { + protected: + FileDescriptor fd_; + std::string filename_; + + // Open a file for read/write + void SetUp() override { + MMapTest::SetUp(); + + filename_ = NewTempAbsPath(); + fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_CREAT | O_RDWR, 0644)); + + // Extend file so it can be written once mapped. Deliberately make the file + // only half a page in size, so we can test what happens when we access the + // second half. + // Use ftruncate(2) once the sentry supports it. + char zero = 0; + size_t count = 0; + do { + const DisableSave ds; // saving 2048 times is slow and useless. + Write(&zero, 1), SyscallSucceedsWithValue(1); + } while (++count < (kPageSize / 2)); + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + } + + // Close and delete file + void TearDown() override { + MMapTest::TearDown(); + fd_.reset(); // Make sure the files is closed before we unlink it. + ASSERT_THAT(unlink(filename_.c_str()), SyscallSucceeds()); + } + + ssize_t Read(char* buf, size_t count) { + ssize_t len = 0; + do { + ssize_t ret = read(fd_.get(), buf, count); + if (ret < 0) { + return ret; + } else if (ret == 0) { + return len; + } + + len += ret; + buf += ret; + } while (len < static_cast<ssize_t>(count)); + + return len; + } + + ssize_t Write(const char* buf, size_t count) { + ssize_t len = 0; + do { + ssize_t ret = write(fd_.get(), buf, count); + if (ret < 0) { + return ret; + } else if (ret == 0) { + return len; + } + + len += ret; + buf += ret; + } while (len < static_cast<ssize_t>(count)); + + return len; + } +}; + +class MMapFileParamTest + : public MMapFileTest, + public ::testing::WithParamInterface<std::tuple<int, int>> { + protected: + int prot() const { return std::get<0>(GetParam()); } + + int flags() const { return std::get<1>(GetParam()); } +}; + +// MAP_POPULATE allowed. +// There isn't a good way to verify it actually did anything. +TEST_P(MMapFileParamTest, MapPopulate) { + ASSERT_THAT(Map(0, kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), + SyscallSucceeds()); +} + +// MAP_POPULATE on a short file. +TEST_P(MMapFileParamTest, MapPopulateShort) { + ASSERT_THAT( + Map(0, 2 * kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), + SyscallSucceeds()); +} + +// Read contents from mapped file. +TEST_F(MMapFileTest, Read) { + size_t len = strlen(kFileContents); + ASSERT_EQ(len, Write(kFileContents, len)); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), 0), + SyscallSucceeds()); + + EXPECT_THAT(reinterpret_cast<char*>(addr), + EqualsMemory(std::string(kFileContents))); +} + +// Map at an offset. +TEST_F(MMapFileTest, MapOffset) { + ASSERT_THAT(lseek(fd_.get(), kPageSize, SEEK_SET), SyscallSucceeds()); + + size_t len = strlen(kFileContents); + ASSERT_EQ(len, Write(kFileContents, len)); + + uintptr_t addr; + ASSERT_THAT( + addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), kPageSize), + SyscallSucceeds()); + + EXPECT_THAT(reinterpret_cast<char*>(addr), + EqualsMemory(std::string(kFileContents))); +} + +TEST_F(MMapFileTest, MapOffsetBeyondEnd) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 10 * kPageSize), + SyscallSucceeds()); + + // Touching the memory causes SIGBUS. + size_t len = strlen(kFileContents); + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)), + ::testing::KilledBySignal(SIGBUS), ""); +} + +// Verify mmap fails when sum of length and offset overflows. +TEST_F(MMapFileTest, MapLengthPlusOffsetOverflows) { + const size_t length = static_cast<size_t>(-kPageSize); + const off_t offset = kPageSize; + ASSERT_THAT(Map(0, length, PROT_READ, MAP_PRIVATE, fd_.get(), offset), + SyscallFailsWithErrno(ENOMEM)); +} + +// MAP_PRIVATE PROT_WRITE is allowed on read-only FDs. +TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); + + uintptr_t addr; + EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd.get(), 0), + SyscallSucceeds()); + + // Touch the page to ensure the kernel didn't lie about writability. + size_t len = strlen(kFileContents); + std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)); +} + +// MAP_SHARED PROT_WRITE not allowed on read-only FDs. +TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY)); + + uintptr_t addr; + EXPECT_THAT( + addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0), + SyscallFailsWithErrno(EACCES)); +} + +// The FD must be readable. +TEST_P(MMapFileParamTest, WriteOnlyFd) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); + + uintptr_t addr; + EXPECT_THAT(addr = Map(0, kPageSize, prot(), flags(), fd.get(), 0), + SyscallFailsWithErrno(EACCES)); +} + +// Overwriting the contents of a file mapped MAP_SHARED PROT_READ +// should cause the new data to be reflected in the mapping. +TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to two pages and dirty them. + std::string bufA(kPageSize, 'a'); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + std::string bufB(kPageSize, 'b'); + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the mapping contains the right file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize)); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(), + kPageSize)); + + // Start at the beginning of the file. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Swap the write pattern. + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + + // Check that the mapping got updated. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufB.c_str(), kPageSize)); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufA.c_str(), + kPageSize)); +} + +// Partially overwriting a file mapped MAP_SHARED PROT_READ should be reflected +// in the mapping. +TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to two pages and dirty them. + std::string bufA(kPageSize, 'a'); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + std::string bufB(kPageSize, 'b'); + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the mapping contains the right file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize)); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(), + kPageSize)); + + // Start at the beginning of the file. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Do a partial overwrite, spanning both pages. + std::string bufC(kPageSize + (kPageSize / 2), 'c'); + ASSERT_THAT(Write(bufC.c_str(), bufC.size()), + SyscallSucceedsWithValue(bufC.size())); + + // Check that the mapping got updated. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufC.c_str(), + kPageSize + (kPageSize / 2))); + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr + kPageSize + (kPageSize / 2)), + bufB.c_str(), kPageSize / 2)); +} + +// Overwriting a file mapped MAP_SHARED PROT_READ should be reflected in the +// mapping and the file. +TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to two full pages and dirty it. + std::string bufA(2 * kPageSize, 'a'); + ASSERT_THAT(Write(bufA.c_str(), bufA.size()), + SyscallSucceedsWithValue(bufA.size())); + + // Map only the first page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Prepare to overwrite the file contents. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Overwrite everything, beyond the mapped portion. + std::string bufB(2 * kPageSize, 'b'); + ASSERT_THAT(Write(bufB.c_str(), bufB.size()), + SyscallSucceedsWithValue(bufB.size())); + + // What the mapped portion should now look like. + std::string bufMapped(kPageSize, 'b'); + + // Expect that the mapped portion is consistent. + EXPECT_EQ( + 0, memcmp(reinterpret_cast<void*>(addr), bufMapped.c_str(), kPageSize)); + + // Prepare to read the entire file contents. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Expect that the file was fully updated. + std::vector<char> bufFile(2 * kPageSize); + ASSERT_THAT(Read(bufFile.data(), bufFile.size()), + SyscallSucceedsWithValue(bufFile.size())); + // Cast to void* to avoid EXPECT_THAT assuming bufFile.data() is a + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(bufFile.data()), EqualsMemory(bufB)); +} + +// Write data to mapped file. +TEST_F(MMapFileTest, WriteShared) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + size_t len = strlen(kFileContents); + memcpy(reinterpret_cast<void*>(addr), kFileContents, len); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + std::vector<char> buf(len); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), + EqualsMemory(std::string(kFileContents))); +} + +// Write data to portion of mapped page beyond the end of the file. +// These writes are not reflected in the file. +TEST_F(MMapFileTest, WriteSharedBeyondEnd) { + // The file is only half of a page. We map an entire page. Writes to the + // end of the mapping must not be reflected in the file. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // First half; this is reflected in the file. + std::string first(kPageSize / 2, 'A'); + memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); + + // Second half; this is not reflected in the file. + std::string second(kPageSize / 2, 'B'); + memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(), + second.size()); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + // Big enough to fit the entire page, if the writes are mistakenly written to + // the file. + std::vector<char> buf(kPageSize); + + // Only the first half is in the file. + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(first.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C + // std::string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); +} + +// The portion of a mapped page that becomes part of the file after a truncate +// is reflected in the file. +TEST_F(MMapFileTest, WriteSharedTruncateUp) { + // The file is only half of a page. We map an entire page. Writes to the + // end of the mapping must not be reflected in the file. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // First half; this is reflected in the file. + std::string first(kPageSize / 2, 'A'); + memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); + + // Second half; this is not reflected in the file now (see + // WriteSharedBeyondEnd), but will be after the truncate. + std::string second(kPageSize / 2, 'B'); + memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(), + second.size()); + + // Extend the file to a full page. The second half of the page will be + // reflected in the file. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + // The whole page is in the file. + std::vector<char> buf(kPageSize); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first)); + EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2), + EqualsMemory(second)); +} + +TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to a full page and dirty it. + std::string buf(kPageSize, 'a'); + ASSERT_THAT(Write(buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the memory contains the file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize)); + + // Truncate down, then up. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + // Check that the memory was zeroed. + std::string zeroed(kPageSize, '\0'); + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize)); + + // The file may not actually be updated until msync is called. + ASSERT_THAT(Msync(), SyscallSucceeds()); + + // Prepare to read the entire file contents. + ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Expect that the file is fully updated. + std::vector<char> bufFile(kPageSize); + ASSERT_THAT(Read(bufFile.data(), bufFile.size()), + SyscallSucceedsWithValue(bufFile.size())); + EXPECT_EQ(0, memcmp(bufFile.data(), zeroed.c_str(), kPageSize)); +} + +TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) { + // The file is only half of a page. We map an entire page. Writes to the + // end of the mapping must not be reflected in the file. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // First half; this will be deleted by truncate(0). + std::string first(kPageSize / 2, 'A'); + memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size()); + + // Truncate down, then up. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + // The whole page is zeroed in memory. + std::string zeroed(kPageSize, '\0'); + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize)); + + // The file may not actually be updated until munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + // The whole file is also zeroed. + std::vector<char> buf(kPageSize); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed)); +} + +TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) { + SetupGvisorDeathTest(); + + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Expand the file to a full page and dirty it. + std::string buf(kPageSize, 'a'); + ASSERT_THAT(Write(buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Map the page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Check that the mapping contains the file data. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize)); + + // Truncate down. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Accessing the truncated region should cause a SIGBUS. + std::vector<char> in(kPageSize); + EXPECT_EXIT( + std::copy(reinterpret_cast<volatile char*>(addr), + reinterpret_cast<volatile char*>(addr) + kPageSize, in.data()), + ::testing::KilledBySignal(SIGBUS), ""); +} + +TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // Touch the memory to be sure it really is mapped. + size_t len = strlen(kFileContents); + memcpy(reinterpret_cast<void*>(addr), kFileContents, len); + + // Truncate down. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Accessing the truncated file should cause a SIGBUS. + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, + reinterpret_cast<volatile char*>(addr)), + ::testing::KilledBySignal(SIGBUS), ""); +} + +TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) { + // Start from scratch. + EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds()); + + // Dirty the file. + std::string buf(kPageSize, 'a'); + ASSERT_THAT(Write(buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Map a page. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + // Truncate to half of the page. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds()); + + // First half of the page untouched. + EXPECT_EQ(0, + memcmp(reinterpret_cast<void*>(addr), buf.data(), kPageSize / 2)); + + // Second half is zeroed. + std::string zeroed(kPageSize / 2, '\0'); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2), + zeroed.c_str(), kPageSize / 2)); +} + +// Page can still be accessed and contents are intact after truncating a partial +// page. +TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) { + // Expand the file to a full page. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds()); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + // Fill the entire page. + std::string contents(kPageSize, 'A'); + memcpy(reinterpret_cast<void*>(addr), contents.c_str(), contents.size()); + + // Truncate half of the page. + EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds()); + + // First half of the page untouched. + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), contents.c_str(), + kPageSize / 2)); + + // Second half zeroed. + std::string zeroed(kPageSize / 2, '\0'); + EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2), + zeroed.c_str(), kPageSize / 2)); +} + +// MAP_PRIVATE writes are not carried through to the underlying file. +TEST_F(MMapFileTest, WritePrivate) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 0), + SyscallSucceeds()); + + size_t len = strlen(kFileContents); + memcpy(reinterpret_cast<void*>(addr), kFileContents, len); + + // The file should not be updated, but if it mistakenly is, it may not be + // until after munmap is called. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + std::vector<char> buf(len); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a + // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C + // string, possibly overruning the buffer. + EXPECT_THAT(reinterpret_cast<void*>(buf.data()), + EqualsMemory(std::string(len, '\0'))); +} + +// SIGBUS raised when reading or writing past end of a mapped file. +TEST_P(MMapFileParamTest, SigBusDeath) { + SetupGvisorDeathTest(); + + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), + SyscallSucceeds()); + + auto* start = reinterpret_cast<volatile char*>(addr + kPageSize); + + // MMapFileTest makes a file kPageSize/2 long. The entire first page should be + // accessible, but anything beyond it should not. + if (prot() & PROT_WRITE) { + // Write beyond first page. + size_t len = strlen(kFileContents); + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, start), + ::testing::KilledBySignal(SIGBUS), ""); + } else { + // Read beyond first page. + std::vector<char> in(kPageSize); + EXPECT_EXIT(std::copy(start, start + kPageSize, in.data()), + ::testing::KilledBySignal(SIGBUS), ""); + } +} + +// Tests that SIGBUS is not raised when reading or writing to a file-mapped +// page before EOF, even if part of the mapping extends beyond EOF. +// +// See b/27877699. +TEST_P(MMapFileParamTest, NoSigBusOnPagesBeforeEOF) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), + SyscallSucceeds()); + + // The test passes if this survives. + auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1); + size_t len = strlen(kFileContents); + if (prot() & PROT_WRITE) { + std::copy(kFileContents, kFileContents + len, start); + } else { + std::vector<char> in(len); + std::copy(start, start + len, in.data()); + } +} + +// Tests that SIGBUS is not raised when reading or writing from a file-mapped +// page containing EOF, *after* the EOF. +TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), + SyscallSucceeds()); + + // The test passes if this survives. (Technically addr+kPageSize/2 is already + // beyond EOF, but +1 to check for fencepost errors.) + auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1); + size_t len = strlen(kFileContents); + if (prot() & PROT_WRITE) { + std::copy(kFileContents, kFileContents + len, start); + } else { + std::vector<char> in(len); + std::copy(start, start + len, in.data()); + } +} + +// Tests that reading from writable shared file-mapped pages succeeds. +// +// On most platforms this is trivial, but when the file is mapped via the sentry +// page cache (which does not yet support writing to shared mappings), a bug +// caused reads to fail unnecessarily on such mappings. See b/28913513. +TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { + uintptr_t addr; + size_t len = strlen(kFileContents); + + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + std::vector<char> buf(kPageSize); + // The test passes if this survives. + std::copy(reinterpret_cast<volatile char*>(addr), + reinterpret_cast<volatile char*>(addr) + len, buf.data()); +} + +// Tests that EFAULT is returned when invoking a syscall that requires the OS to +// read past end of file (resulting in a fault in sentry context in the gVisor +// case). See b/28913513. +TEST_F(MMapFileTest, InternalSigBus) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd_.get(), 0), + SyscallSucceeds()); + + // This depends on the fact that gVisor implements pipes internally. + int pipefd[2]; + ASSERT_THAT(pipe(pipefd), SyscallSucceeds()); + EXPECT_THAT( + write(pipefd[1], reinterpret_cast<void*>(addr + kPageSize), kPageSize), + SyscallFailsWithErrno(EFAULT)); + + EXPECT_THAT(close(pipefd[0]), SyscallSucceeds()); + EXPECT_THAT(close(pipefd[1]), SyscallSucceeds()); +} + +// Like InternalSigBus, but test the WriteZerosAt path by reading from +// /dev/zero to a shared mapping (so that the SIGBUS isn't caught during +// copy-on-write breaking). +TEST_F(MMapFileTest, InternalSigBusZeroing) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + + const FileDescriptor dev_zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + EXPECT_THAT(read(dev_zero.get(), reinterpret_cast<void*>(addr + kPageSize), + kPageSize), + SyscallFailsWithErrno(EFAULT)); +} + +// Checks that mmaps with a length of uint64_t(-PAGE_SIZE + 1) or greater do not +// induce a sentry panic (due to "rounding up" to 0). +TEST_F(MMapTest, HugeLength) { + EXPECT_THAT(Map(0, static_cast<uint64_t>(-kPageSize + 1), PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallFailsWithErrno(ENOMEM)); +} + +// Tests for a specific gVisor MM caching bug. +TEST_F(MMapTest, AccessCOWInvalidatesCachedSegments) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); + auto zero_fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY)); + + // Get a two-page private mapping and fill it with 1s. + uintptr_t addr; + ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), + SyscallSucceeds()); + memset(addr_, 1, 2 * kPageSize); + MaybeSave(); + + // Fork to make the mapping copy-on-write. + pid_t const pid = fork(); + if (pid == 0) { + // The child process waits for the parent to SIGKILL it. + while (true) { + pause(); + } + } + ASSERT_THAT(pid, SyscallSucceeds()); + auto cleanup_child = Cleanup([&] { + EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds()); + int status; + EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + }); + + // Induce a read-only Access of the first page of the mapping, which will not + // cause a copy. The usermem.Segment should be cached. + ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + + // Induce a writable Access of both pages of the mapping. This should + // invalidate the cached Segment. + ASSERT_THAT(PreadFd(zero_fd.get(), addr_, 2 * kPageSize, 0), + SyscallSucceedsWithValue(2 * kPageSize)); + + // Induce a read-only Access of the first page of the mapping again. It should + // read the 0s that were stored in the mapping by the read from /dev/zero. If + // the read failed to invalidate the cached Segment, it will instead read the + // 1s in the stale page. + ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + std::vector<char> buf(kPageSize); + ASSERT_THAT(PreadFd(fd.get(), buf.data(), kPageSize, 0), + SyscallSucceedsWithValue(kPageSize)); + for (size_t i = 0; i < kPageSize; i++) { + ASSERT_EQ(0, buf[i]) << "at offset " << i; + } +} + +TEST_F(MMapTest, NoReserve) { + const size_t kSize = 10 * 1 << 20; // 10M + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0), + SyscallSucceeds()); + EXPECT_GT(addr, 0); + + // Check that every page can be read/written. Technically, writing to memory + // could SIGSEGV in case there is no more memory available. In gVisor it + // would never happen though because NORESERVE is ignored. In Linux, it's + // possible to fail, but allocation is small enough that it's highly likely + // to succeed. + for (size_t j = 0; j < kSize; j += kPageSize) { + EXPECT_EQ(0, reinterpret_cast<char*>(addr)[j]); + reinterpret_cast<char*>(addr)[j] = j; + } +} + +// Map more than the gVisor page-cache map unit (64k) and ensure that +// it is consistent with reading from the file. +TEST_F(MMapFileTest, Bug38498194) { + // Choose a sufficiently large map unit. + constexpr int kSize = 4 * 1024 * 1024; + EXPECT_THAT(ftruncate(fd_.get(), kSize), SyscallSucceeds()); + + // Map a large enough region so that multiple internal segments + // are created to back the mapping. + uintptr_t addr; + ASSERT_THAT( + addr = Map(0, kSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + + std::vector<char> expect(kSize, 'a'); + std::copy(expect.data(), expect.data() + expect.size(), + reinterpret_cast<volatile char*>(addr)); + + // Trigger writeback for gVisor. In Linux pages stay cached until + // it can't hold onto them anymore. + ASSERT_THAT(Unmap(), SyscallSucceeds()); + + std::vector<char> buf(kSize); + ASSERT_THAT(Read(buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + EXPECT_EQ(buf, expect) << std::string(buf.data(), buf.size()); +} + +// Tests that reading from a file to a memory mapping of the same file does not +// deadlock. See b/34813270. +TEST_F(MMapFileTest, SelfRead) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, + fd_.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(Read(reinterpret_cast<char*>(addr), kPageSize / 2), + SyscallSucceedsWithValue(kPageSize / 2)); + // The resulting file contents are poorly-specified and irrelevant. +} + +// Tests that writing to a file from a memory mapping of the same file does not +// deadlock. Regression test for b/34813270. +TEST_F(MMapFileTest, SelfWrite) { + uintptr_t addr; + ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), + SyscallSucceeds()); + EXPECT_THAT(Write(reinterpret_cast<char*>(addr), kPageSize / 2), + SyscallSucceedsWithValue(kPageSize / 2)); + // The resulting file contents are poorly-specified and irrelevant. +} + +TEST(MMapDeathTest, TruncateAfterCOWBreak) { + SetupGvisorDeathTest(); + + // Create and map a single-page file. + auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDWR)); + ASSERT_THAT(ftruncate(fd.get(), kPageSize), SyscallSucceeds()); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap( + nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0)); + + // Write to this mapping, causing the page to be copied for write. + memset(mapping.ptr(), 'a', mapping.len()); + MaybeSave(); // Trigger a co-operative save cycle. + + // Truncate the file and expect it to invalidate the copied page. + ASSERT_THAT(ftruncate(fd.get(), 0), SyscallSucceeds()); + EXPECT_EXIT(*reinterpret_cast<volatile char*>(mapping.ptr()), + ::testing::KilledBySignal(SIGBUS), ""); +} + +// Regression test for #147. +TEST(MMapNoFixtureTest, MapReadOnlyAfterCreateWriteOnly) { + std::string filename = NewTempAbsPath(); + + // We have to create the file O_RDONLY to reproduce the bug because + // fsgofer.localFile.Create() silently upgrades O_WRONLY to O_RDWR, causing + // the cached "write-only" FD to be read/write and therefore usable by mmap(). + auto const ro_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(filename, O_RDONLY | O_CREAT | O_EXCL, 0666)); + + // Get a write-only FD for the same file, which should be ignored by mmap() + // (but isn't in #147). + auto const wo_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_WRONLY)); + ASSERT_THAT(ftruncate(wo_fd.get(), kPageSize), SyscallSucceeds()); + + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, ro_fd.get(), 0)); + std::vector<char> buf(kPageSize); + // The test passes if this survives. + std::copy(static_cast<char*>(mapping.ptr()), + static_cast<char*>(mapping.endptr()), buf.data()); +} + +// Conditional on MAP_32BIT. +// This flag is supported only on x86-64, for 64-bit programs. +#ifdef __x86_64__ + +TEST(MMapNoFixtureTest, Map32Bit) { + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE | MAP_32BIT)); + EXPECT_LT(mapping.addr(), static_cast<uintptr_t>(1) << 32); + EXPECT_LE(mapping.endaddr(), static_cast<uintptr_t>(1) << 32); +} + +#endif // defined(__x86_64__) + +INSTANTIATE_TEST_SUITE_P( + ReadWriteSharedPrivate, MMapFileParamTest, + ::testing::Combine(::testing::ValuesIn({ + PROT_READ, + PROT_WRITE, + PROT_READ | PROT_WRITE, + }), + ::testing::ValuesIn({MAP_SHARED, MAP_PRIVATE}))); + +} // namespace + +} // namespace testing +} // namespace gvisor |