// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <errno.h>
#include <fcntl.h>
#include <linux/magic.h>
#include <linux/unistd.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/statfs.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#include <vector>

#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_split.h"
#include "test/util/cleanup.h"
#include "test/util/file_descriptor.h"
#include "test/util/fs_util.h"
#include "test/util/memory_util.h"
#include "test/util/multiprocess_util.h"
#include "test/util/temp_path.h"
#include "test/util/test_util.h"

using ::testing::AnyOf;
using ::testing::Eq;
using ::testing::Gt;

namespace gvisor {
namespace testing {

namespace {

PosixErrorOr<int64_t> VirtualMemorySize() {
  ASSIGN_OR_RETURN_ERRNO(auto contents, GetContents("/proc/self/statm"));
  std::vector<std::string> parts = absl::StrSplit(contents, ' ');
  if (parts.empty()) {
    return PosixError(EINVAL, "Unable to parse /proc/self/statm");
  }
  ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi<int64_t>(parts[0]));
  return pages * getpagesize();
}

class MMapTest : public ::testing::Test {
 protected:
  // Unmap mapping, if one was made.
  void TearDown() override {
    if (addr_) {
      EXPECT_THAT(Unmap(), SyscallSucceeds());
    }
  }

  // Remembers mapping, so it can be automatically unmapped.
  uintptr_t Map(uintptr_t addr, size_t length, int prot, int flags, int fd,
                off_t offset) {
    void* ret =
        mmap(reinterpret_cast<void*>(addr), length, prot, flags, fd, offset);

    if (ret != MAP_FAILED) {
      addr_ = ret;
      length_ = length;
    }

    return reinterpret_cast<uintptr_t>(ret);
  }

  // Unmap previous mapping
  int Unmap() {
    if (!addr_) {
      return -1;
    }

    int ret = munmap(addr_, length_);

    addr_ = nullptr;
    length_ = 0;

    return ret;
  }

  // Msync the mapping.
  int Msync() { return msync(addr_, length_, MS_SYNC); }

  // Mlock the mapping.
  int Mlock() { return mlock(addr_, length_); }

  // Munlock the mapping.
  int Munlock() { return munlock(addr_, length_); }

  int Protect(uintptr_t addr, size_t length, int prot) {
    return mprotect(reinterpret_cast<void*>(addr), length, prot);
  }

  void* addr_ = nullptr;
  size_t length_ = 0;
};

// Matches if arg contains the same contents as string str.
MATCHER_P(EqualsMemory, str, "") {
  if (0 == memcmp(arg, str.c_str(), str.size())) {
    return true;
  }

  *result_listener << "Memory did not match. Got:\n"
                   << absl::BytesToHexString(
                          std::string(static_cast<char*>(arg), str.size()))
                   << "Want:\n"
                   << absl::BytesToHexString(str);
  return false;
}

// We can't map pipes, but for different reasons.
TEST_F(MMapTest, MapPipe) {
  int fds[2];
  ASSERT_THAT(pipe(fds), SyscallSucceeds());
  EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[0], 0),
              SyscallFailsWithErrno(ENODEV));
  EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[1], 0),
              SyscallFailsWithErrno(EACCES));
  ASSERT_THAT(close(fds[0]), SyscallSucceeds());
  ASSERT_THAT(close(fds[1]), SyscallSucceeds());
}

// It's very common to mmap /dev/zero because anonymous mappings aren't part
// of POSIX although they are widely supported. So a zero initialized memory
// region would actually come from a "file backed" /dev/zero mapping.
TEST_F(MMapTest, MapDevZeroShared) {
  // This test will verify that we're able to map a page backed by /dev/zero
  // as MAP_SHARED.
  const FileDescriptor dev_zero =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));

  // Test that we can create a RW SHARED mapping of /dev/zero.
  ASSERT_THAT(
      Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
      SyscallSucceeds());
}

TEST_F(MMapTest, MapDevZeroPrivate) {
  // This test will verify that we're able to map a page backed by /dev/zero
  // as MAP_PRIVATE.
  const FileDescriptor dev_zero =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));

  // Test that we can create a RW SHARED mapping of /dev/zero.
  ASSERT_THAT(
      Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0),
      SyscallSucceeds());
}

TEST_F(MMapTest, MapDevZeroNoPersistence) {
  // This test will verify that two independent mappings of /dev/zero do not
  // appear to reference the same "backed file."

  const FileDescriptor dev_zero1 =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
  const FileDescriptor dev_zero2 =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));

  ASSERT_THAT(
      Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero1.get(), 0),
      SyscallSucceeds());

  // Create a second mapping via the second /dev/zero fd.
  void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                        dev_zero2.get(), 0);
  ASSERT_THAT(reinterpret_cast<intptr_t>(psec_map), SyscallSucceeds());

  // Always unmap.
  auto cleanup_psec_map = Cleanup(
      [&] { EXPECT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); });

  // Verify that we have independently addressed pages.
  ASSERT_NE(psec_map, addr_);

  std::string buf_zero(kPageSize, 0x00);
  std::string buf_ones(kPageSize, 0xFF);

  // Verify the first is actually all zeros after mmap.
  EXPECT_THAT(addr_, EqualsMemory(buf_zero));

  // Let's fill in the first mapping with 0xFF.
  memcpy(addr_, buf_ones.data(), kPageSize);

  // Verify that the memcpy actually stuck in the page.
  EXPECT_THAT(addr_, EqualsMemory(buf_ones));

  // Verify that it didn't affect the second page which should be all zeros.
  EXPECT_THAT(psec_map, EqualsMemory(buf_zero));
}

TEST_F(MMapTest, MapDevZeroSharedMultiplePages) {
  // This will test that we're able to map /dev/zero over multiple pages.
  const FileDescriptor dev_zero =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));

  // Test that we can create a RW SHARED mapping of /dev/zero.
  ASSERT_THAT(Map(0, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE,
                  dev_zero.get(), 0),
              SyscallSucceeds());

  std::string buf_zero(kPageSize * 2, 0x00);
  std::string buf_ones(kPageSize * 2, 0xFF);

  // Verify the two pages are actually all zeros after mmap.
  EXPECT_THAT(addr_, EqualsMemory(buf_zero));

  // Fill out the pages with all ones.
  memcpy(addr_, buf_ones.data(), kPageSize * 2);

  // Verify that the memcpy actually stuck in the pages.
  EXPECT_THAT(addr_, EqualsMemory(buf_ones));
}

TEST_F(MMapTest, MapDevZeroSharedFdNoPersistence) {
  // This test will verify that two independent mappings of /dev/zero do not
  // appear to reference the same "backed file" even when mapped from the
  // same initial fd.
  const FileDescriptor dev_zero =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));

  ASSERT_THAT(
      Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
      SyscallSucceeds());

  // Create a second mapping via the same fd.
  void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                        dev_zero.get(), 0);
  ASSERT_THAT(reinterpret_cast<int64_t>(psec_map), SyscallSucceeds());

  // Always unmap.
  auto cleanup_psec_map = Cleanup(
      [&] { ASSERT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); });

  // Verify that we have independently addressed pages.
  ASSERT_NE(psec_map, addr_);

  std::string buf_zero(kPageSize, 0x00);
  std::string buf_ones(kPageSize, 0xFF);

  // Verify the first is actually all zeros after mmap.
  EXPECT_THAT(addr_, EqualsMemory(buf_zero));

  // Let's fill in the first mapping with 0xFF.
  memcpy(addr_, buf_ones.data(), kPageSize);

  // Verify that the memcpy actually stuck in the page.
  EXPECT_THAT(addr_, EqualsMemory(buf_ones));

  // Verify that it didn't affect the second page which should be all zeros.
  EXPECT_THAT(psec_map, EqualsMemory(buf_zero));
}

TEST_F(MMapTest, MapDevZeroSegfaultAfterUnmap) {
  SetupGvisorDeathTest();

  // This test will verify that we're able to map a page backed by /dev/zero
  // as MAP_SHARED and after it's unmapped any access results in a SIGSEGV.
  // This test is redundant but given the special nature of /dev/zero mappings
  // it doesn't hurt.
  const FileDescriptor dev_zero =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));

  const auto rest = [&] {
    // Test that we can create a RW SHARED mapping of /dev/zero.
    TEST_PCHECK(Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                    dev_zero.get(),
                    0) != reinterpret_cast<uintptr_t>(MAP_FAILED));

    // Confirm that accesses after the unmap result in a SIGSEGV.
    //
    // N.B. We depend on this process being single-threaded to ensure there
    // can't be another mmap to map addr before the dereference below.
    void* addr_saved = addr_;  // Unmap resets addr_.
    TEST_PCHECK(Unmap() == 0);
    *reinterpret_cast<volatile int*>(addr_saved) = 0xFF;
  };

  EXPECT_THAT(InForkedProcess(rest),
              IsPosixErrorOkAndHolds(AnyOf(Eq(W_EXITCODE(0, SIGSEGV)),
                                           Eq(W_EXITCODE(0, 128 + SIGSEGV)))));
}

TEST_F(MMapTest, MapDevZeroUnaligned) {
  const FileDescriptor dev_zero =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
  const size_t size = kPageSize + kPageSize / 2;
  const std::string buf_zero(size, 0x00);

  ASSERT_THAT(
      Map(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
      SyscallSucceeds());
  EXPECT_THAT(addr_, EqualsMemory(buf_zero));
  ASSERT_THAT(Unmap(), SyscallSucceeds());

  ASSERT_THAT(
      Map(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0),
      SyscallSucceeds());
  EXPECT_THAT(addr_, EqualsMemory(buf_zero));
}

// We can't map _some_ character devices.
TEST_F(MMapTest, MapCharDevice) {
  const FileDescriptor cdevfd =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/random", 0, 0));
  EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, cdevfd.get(), 0),
              SyscallFailsWithErrno(ENODEV));
}

// We can't map directories.
TEST_F(MMapTest, MapDirectory) {
  const FileDescriptor dirfd =
      ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), 0, 0));
  EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, dirfd.get(), 0),
              SyscallFailsWithErrno(ENODEV));
}

// We can map *something*
TEST_F(MMapTest, MapAnything) {
  EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceedsWithValue(Gt(0)));
}

// Map length < PageSize allowed
TEST_F(MMapTest, SmallMap) {
  EXPECT_THAT(Map(0, 128, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceeds());
}

// Hint address doesn't break anything.
// Note: there is no requirement we actually get the hint address
TEST_F(MMapTest, HintAddress) {
  EXPECT_THAT(
      Map(0x30000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
      SyscallSucceeds());
}

// MAP_FIXED gives us exactly the requested address
TEST_F(MMapTest, MapFixed) {
  EXPECT_THAT(Map(0x30000000, kPageSize, PROT_NONE,
                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0),
              SyscallSucceedsWithValue(0x30000000));
}

// 64-bit addresses work too
#if defined(__x86_64__) || defined(__aarch64__)
TEST_F(MMapTest, MapFixed64) {
  EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE,
                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0),
              SyscallSucceedsWithValue(0x300000000000));
}
#endif

// MAP_STACK allowed.
// There isn't a good way to verify it did anything.
TEST_F(MMapTest, MapStack) {
  EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0),
              SyscallSucceeds());
}

// MAP_LOCKED allowed.
// There isn't a good way to verify it did anything.
TEST_F(MMapTest, MapLocked) {
  EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0),
              SyscallSucceeds());
}

// MAP_PRIVATE or MAP_SHARED must be passed
TEST_F(MMapTest, NotPrivateOrShared) {
  EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_ANONYMOUS, -1, 0),
              SyscallFailsWithErrno(EINVAL));
}

// Only one of MAP_PRIVATE or MAP_SHARED may be passed
TEST_F(MMapTest, PrivateAndShared) {
  EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
                  MAP_PRIVATE | MAP_SHARED | MAP_ANONYMOUS, -1, 0),
              SyscallFailsWithErrno(EINVAL));
}

TEST_F(MMapTest, FixedAlignment) {
  // Addr must be page aligned (MAP_FIXED)
  EXPECT_THAT(Map(0x30000001, kPageSize, PROT_NONE,
                  MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0),
              SyscallFailsWithErrno(EINVAL));
}

// Non-MAP_FIXED address does not need to be page aligned
TEST_F(MMapTest, NonFixedAlignment) {
  EXPECT_THAT(
      Map(0x30000001, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
      SyscallSucceeds());
}

// Length = 0 results in EINVAL.
TEST_F(MMapTest, InvalidLength) {
  EXPECT_THAT(Map(0, 0, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallFailsWithErrno(EINVAL));
}

// Bad fd not allowed.
TEST_F(MMapTest, BadFd) {
  EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE, 999, 0),
              SyscallFailsWithErrno(EBADF));
}

// Mappings are writable.
TEST_F(MMapTest, ProtWrite) {
  uint64_t addr;
  constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};

  EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceeds());

  // This shouldn't cause a SIGSEGV.
  memset(reinterpret_cast<void*>(addr), 42, kPageSize);

  // The written data should actually be there.
  EXPECT_EQ(
      0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
}

// "Write-only" mappings are writable *and* readable.
TEST_F(MMapTest, ProtWriteOnly) {
  uint64_t addr;
  constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};

  EXPECT_THAT(
      addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
      SyscallSucceeds());

  // This shouldn't cause a SIGSEGV.
  memset(reinterpret_cast<void*>(addr), 42, kPageSize);

  // The written data should actually be there.
  EXPECT_EQ(
      0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
}

// "Write-only" mappings are readable.
//
// This is distinct from above to ensure the page is accessible even if the
// initial fault is a write fault.
TEST_F(MMapTest, ProtWriteOnlyReadable) {
  uint64_t addr;
  constexpr uint64_t kFirstWord = 0;

  EXPECT_THAT(
      addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
      SyscallSucceeds());

  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), &kFirstWord,
                      sizeof(kFirstWord)));
}

// Mappings are writable after mprotect from PROT_NONE to PROT_READ|PROT_WRITE.
TEST_F(MMapTest, ProtectProtWrite) {
  uint64_t addr;
  constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};

  EXPECT_THAT(
      addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
      SyscallSucceeds());

  ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE),
              SyscallSucceeds());

  // This shouldn't cause a SIGSEGV.
  memset(reinterpret_cast<void*>(addr), 42, kPageSize);

  // The written data should actually be there.
  EXPECT_EQ(
      0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
}

// SIGSEGV raised when reading PROT_NONE memory
TEST_F(MMapTest, ProtNoneDeath) {
  SetupGvisorDeathTest();

  uintptr_t addr;

  ASSERT_THAT(
      addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
      SyscallSucceeds());

  EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr),
              ::testing::KilledBySignal(SIGSEGV), "");
}

// SIGSEGV raised when writing PROT_READ only memory
TEST_F(MMapTest, ReadOnlyDeath) {
  SetupGvisorDeathTest();

  uintptr_t addr;

  ASSERT_THAT(
      addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
      SyscallSucceeds());

  EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr) = 42,
              ::testing::KilledBySignal(SIGSEGV), "");
}

// Writable mapping mprotect'd to read-only should not be writable.
TEST_F(MMapTest, MprotectReadOnlyDeath) {
  SetupGvisorDeathTest();

  uintptr_t addr;

  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceeds());

  volatile int* val = reinterpret_cast<int*>(addr);

  // Copy to ensure page is mapped in.
  *val = 42;

  ASSERT_THAT(Protect(addr, kPageSize, PROT_READ), SyscallSucceeds());

  // Now it shouldn't be writable.
  EXPECT_EXIT(*val = 0, ::testing::KilledBySignal(SIGSEGV), "");
}

// Verify that calling mprotect an address that's not page aligned fails.
TEST_F(MMapTest, MprotectNotPageAligned) {
  uintptr_t addr;

  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceeds());
  ASSERT_THAT(Protect(addr + 1, kPageSize - 1, PROT_READ),
              SyscallFailsWithErrno(EINVAL));
}

// Verify that calling mprotect with an absurdly huge length fails.
TEST_F(MMapTest, MprotectHugeLength) {
  uintptr_t addr;

  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceeds());
  ASSERT_THAT(Protect(addr, static_cast<size_t>(-1), PROT_READ),
              SyscallFailsWithErrno(ENOMEM));
}

#if defined(__x86_64__) || defined(__i386__)
// This code is equivalent in 32 and 64-bit mode
const uint8_t machine_code[] = {
    0xb8, 0x2a, 0x00, 0x00, 0x00,  // movl $42, %eax
    0xc3,                          // retq
};
#elif defined(__aarch64__)
const uint8_t machine_code[] = {
    0x40, 0x05, 0x80, 0x52,  // mov w0, #42
    0xc0, 0x03, 0x5f, 0xd6,  // ret
};
#endif

// PROT_EXEC allows code execution
TEST_F(MMapTest, ProtExec) {
  uintptr_t addr;
  uint32_t (*func)(void);

  EXPECT_THAT(addr = Map(0, kPageSize, PROT_EXEC | PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceeds());

  memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code));

#if defined(__aarch64__)
  // We use this as a memory barrier for Arm64.
  ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_EXEC),
              SyscallSucceeds());
#endif

  func = reinterpret_cast<uint32_t (*)(void)>(addr);

  EXPECT_EQ(42, func());
}

// No PROT_EXEC disallows code execution
TEST_F(MMapTest, NoProtExecDeath) {
  SetupGvisorDeathTest();

  uintptr_t addr;
  uint32_t (*func)(void);

  EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceeds());

  memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code));

  func = reinterpret_cast<uint32_t (*)(void)>(addr);

  EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), "");
}

TEST_F(MMapTest, NoExceedLimitData) {
  void* prevbrk;
  void* target_brk;
  struct rlimit setlim;

  prevbrk = sbrk(0);
  ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
  target_brk = reinterpret_cast<char*>(prevbrk) + 1;

  setlim.rlim_cur = RLIM_INFINITY;
  setlim.rlim_max = RLIM_INFINITY;
  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
  EXPECT_THAT(brk(target_brk), SyscallSucceedsWithValue(0));
}

TEST_F(MMapTest, ExceedLimitData) {
  // To unit test this more precisely, we'd need access to the mm's start_brk
  // and end_brk, which we don't have direct access to :/
  void* prevbrk;
  void* target_brk;
  struct rlimit setlim;

  prevbrk = sbrk(0);
  ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
  target_brk = reinterpret_cast<char*>(prevbrk) + 8192;

  setlim.rlim_cur = 0;
  setlim.rlim_max = RLIM_INFINITY;
  // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
  // Reset RLIMIT_DATA during teardown step.
  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
  EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
  // Teardown step...
  setlim.rlim_cur = RLIM_INFINITY;
  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
}

TEST_F(MMapTest, ExceedLimitDataPrlimit) {
  // To unit test this more precisely, we'd need access to the mm's start_brk
  // and end_brk, which we don't have direct access to :/
  void* prevbrk;
  void* target_brk;
  struct rlimit setlim;

  prevbrk = sbrk(0);
  ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
  target_brk = reinterpret_cast<char*>(prevbrk) + 8192;

  setlim.rlim_cur = 0;
  setlim.rlim_max = RLIM_INFINITY;
  // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
  // Reset RLIMIT_DATA during teardown step.
  ASSERT_THAT(prlimit(0, RLIMIT_DATA, &setlim, nullptr), SyscallSucceeds());
  EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
  // Teardown step...
  setlim.rlim_cur = RLIM_INFINITY;
  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
}

TEST_F(MMapTest, ExceedLimitDataPrlimitPID) {
  // To unit test this more precisely, we'd need access to the mm's start_brk
  // and end_brk, which we don't have direct access to :/
  void* prevbrk;
  void* target_brk;
  struct rlimit setlim;

  prevbrk = sbrk(0);
  ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
  target_brk = reinterpret_cast<char*>(prevbrk) + 8192;

  setlim.rlim_cur = 0;
  setlim.rlim_max = RLIM_INFINITY;
  // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
  // Reset RLIMIT_DATA during teardown step.
  ASSERT_THAT(prlimit(syscall(__NR_gettid), RLIMIT_DATA, &setlim, nullptr),
              SyscallSucceeds());
  EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
  // Teardown step...
  setlim.rlim_cur = RLIM_INFINITY;
  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
}

TEST_F(MMapTest, NoExceedLimitAS) {
  constexpr uint64_t kAllocBytes = 200 << 20;
  // Add some headroom to the AS limit in case of e.g. unexpected stack
  // expansion.
  constexpr uint64_t kExtraASBytes = kAllocBytes + (20 << 20);
  static_assert(kAllocBytes < kExtraASBytes,
                "test depends on allocation not exceeding AS limit");

  auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize());
  struct rlimit setlim;
  setlim.rlim_cur = vss + kExtraASBytes;
  setlim.rlim_max = RLIM_INFINITY;
  ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds());
  EXPECT_THAT(
      Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
      SyscallSucceedsWithValue(Gt(0)));
}

TEST_F(MMapTest, ExceedLimitAS) {
  constexpr uint64_t kAllocBytes = 200 << 20;
  // Add some headroom to the AS limit in case of e.g. unexpected stack
  // expansion.
  constexpr uint64_t kExtraASBytes = 20 << 20;
  static_assert(kAllocBytes > kExtraASBytes,
                "test depends on allocation exceeding AS limit");

  auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize());
  struct rlimit setlim;
  setlim.rlim_cur = vss + kExtraASBytes;
  setlim.rlim_max = RLIM_INFINITY;
  ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds());
  EXPECT_THAT(
      Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
      SyscallFailsWithErrno(ENOMEM));
}

// Tests that setting an anonymous mmap to PROT_NONE doesn't free the memory.
TEST_F(MMapTest, SettingProtNoneDoesntFreeMemory) {
  uintptr_t addr;
  constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};

  EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceedsWithValue(Gt(0)));

  memset(reinterpret_cast<void*>(addr), 42, kPageSize);

  ASSERT_THAT(Protect(addr, kPageSize, PROT_NONE), SyscallSucceeds());
  ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE),
              SyscallSucceeds());

  // The written data should still be there.
  EXPECT_EQ(
      0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
}

constexpr char kFileContents[] = "Hello World!";

class MMapFileTest : public MMapTest {
 protected:
  FileDescriptor fd_;
  std::string filename_;

  // Open a file for read/write
  void SetUp() override {
    MMapTest::SetUp();

    filename_ = NewTempAbsPath();
    fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_CREAT | O_RDWR, 0644));

    // Extend file so it can be written once mapped. Deliberately make the file
    // only half a page in size, so we can test what happens when we access the
    // second half.
    // Use ftruncate(2) once the sentry supports it.
    char zero = 0;
    size_t count = 0;
    do {
      const DisableSave ds;  // saving 2048 times is slow and useless.
      Write(&zero, 1), SyscallSucceedsWithValue(1);
    } while (++count < (kPageSize / 2));
    ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
  }

  // Close and delete file
  void TearDown() override {
    MMapTest::TearDown();
    fd_.reset();  // Make sure the files is closed before we unlink it.
    ASSERT_THAT(unlink(filename_.c_str()), SyscallSucceeds());
  }

  ssize_t Read(char* buf, size_t count) {
    ssize_t len = 0;
    do {
      ssize_t ret = read(fd_.get(), buf, count);
      if (ret < 0) {
        return ret;
      } else if (ret == 0) {
        return len;
      }

      len += ret;
      buf += ret;
    } while (len < static_cast<ssize_t>(count));

    return len;
  }

  ssize_t Write(const char* buf, size_t count) {
    ssize_t len = 0;
    do {
      ssize_t ret = write(fd_.get(), buf, count);
      if (ret < 0) {
        return ret;
      } else if (ret == 0) {
        return len;
      }

      len += ret;
      buf += ret;
    } while (len < static_cast<ssize_t>(count));

    return len;
  }
};

class MMapFileParamTest
    : public MMapFileTest,
      public ::testing::WithParamInterface<std::tuple<int, int>> {
 protected:
  int prot() const { return std::get<0>(GetParam()); }

  int flags() const { return std::get<1>(GetParam()); }
};

// MAP_POPULATE allowed.
// There isn't a good way to verify it actually did anything.
TEST_P(MMapFileParamTest, MapPopulate) {
  ASSERT_THAT(Map(0, kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0),
              SyscallSucceeds());
}

// MAP_POPULATE on a short file.
TEST_P(MMapFileParamTest, MapPopulateShort) {
  ASSERT_THAT(
      Map(0, 2 * kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0),
      SyscallSucceeds());
}

// Read contents from mapped file.
TEST_F(MMapFileTest, Read) {
  size_t len = strlen(kFileContents);
  ASSERT_EQ(len, Write(kFileContents, len));

  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), 0),
              SyscallSucceeds());

  EXPECT_THAT(reinterpret_cast<char*>(addr),
              EqualsMemory(std::string(kFileContents)));
}

// Map at an offset.
TEST_F(MMapFileTest, MapOffset) {
  ASSERT_THAT(lseek(fd_.get(), kPageSize, SEEK_SET), SyscallSucceeds());

  size_t len = strlen(kFileContents);
  ASSERT_EQ(len, Write(kFileContents, len));

  uintptr_t addr;
  ASSERT_THAT(
      addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), kPageSize),
      SyscallSucceeds());

  EXPECT_THAT(reinterpret_cast<char*>(addr),
              EqualsMemory(std::string(kFileContents)));
}

TEST_F(MMapFileTest, MapOffsetBeyondEnd) {
  SetupGvisorDeathTest();

  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
                         fd_.get(), 10 * kPageSize),
              SyscallSucceeds());

  // Touching the memory causes SIGBUS.
  size_t len = strlen(kFileContents);
  EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
                        reinterpret_cast<volatile char*>(addr)),
              ::testing::KilledBySignal(SIGBUS), "");
}

// Verify mmap fails when sum of length and offset overflows.
TEST_F(MMapFileTest, MapLengthPlusOffsetOverflows) {
  const size_t length = static_cast<size_t>(-kPageSize);
  const off_t offset = kPageSize;
  ASSERT_THAT(Map(0, length, PROT_READ, MAP_PRIVATE, fd_.get(), offset),
              SyscallFailsWithErrno(ENOMEM));
}

// MAP_PRIVATE PROT_WRITE is allowed on read-only FDs.
TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) {
  const FileDescriptor fd =
      ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY));

  uintptr_t addr;
  EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
                         fd.get(), 0),
              SyscallSucceeds());

  // Touch the page to ensure the kernel didn't lie about writability.
  size_t len = strlen(kFileContents);
  std::copy(kFileContents, kFileContents + len,
            reinterpret_cast<volatile char*>(addr));
}

// MAP_SHARED PROT_WRITE not allowed on read-only FDs.
TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) {
  const FileDescriptor fd =
      ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY));

  uintptr_t addr;
  EXPECT_THAT(
      addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0),
      SyscallFailsWithErrno(EACCES));
}

// Mmap not allowed on O_PATH FDs.
TEST_F(MMapFileTest, MmapFileWithOpath) {
  SKIP_IF(IsRunningWithVFS1());
  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
  const FileDescriptor fd =
      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_PATH));

  uintptr_t addr;
  EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd.get(), 0),
              SyscallFailsWithErrno(EBADF));
}

// The FD must be readable.
TEST_P(MMapFileParamTest, WriteOnlyFd) {
  const FileDescriptor fd =
      ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY));

  uintptr_t addr;
  EXPECT_THAT(addr = Map(0, kPageSize, prot(), flags(), fd.get(), 0),
              SyscallFailsWithErrno(EACCES));
}

// Overwriting the contents of a file mapped MAP_SHARED PROT_READ
// should cause the new data to be reflected in the mapping.
TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) {
  // Start from scratch.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());

  // Expand the file to two pages and dirty them.
  std::string bufA(kPageSize, 'a');
  ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
              SyscallSucceedsWithValue(bufA.size()));
  std::string bufB(kPageSize, 'b');
  ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
              SyscallSucceedsWithValue(bufB.size()));

  // Map the page.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
              SyscallSucceeds());

  // Check that the mapping contains the right file data.
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize));
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(),
                      kPageSize));

  // Start at the beginning of the file.
  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));

  // Swap the write pattern.
  ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
              SyscallSucceedsWithValue(bufB.size()));
  ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
              SyscallSucceedsWithValue(bufA.size()));

  // Check that the mapping got updated.
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufB.c_str(), kPageSize));
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufA.c_str(),
                      kPageSize));
}

// Partially overwriting a file mapped MAP_SHARED PROT_READ should be reflected
// in the mapping.
TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) {
  // Start from scratch.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());

  // Expand the file to two pages and dirty them.
  std::string bufA(kPageSize, 'a');
  ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
              SyscallSucceedsWithValue(bufA.size()));
  std::string bufB(kPageSize, 'b');
  ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
              SyscallSucceedsWithValue(bufB.size()));

  // Map the page.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
              SyscallSucceeds());

  // Check that the mapping contains the right file data.
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize));
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(),
                      kPageSize));

  // Start at the beginning of the file.
  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));

  // Do a partial overwrite, spanning both pages.
  std::string bufC(kPageSize + (kPageSize / 2), 'c');
  ASSERT_THAT(Write(bufC.c_str(), bufC.size()),
              SyscallSucceedsWithValue(bufC.size()));

  // Check that the mapping got updated.
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufC.c_str(),
                      kPageSize + (kPageSize / 2)));
  EXPECT_EQ(0,
            memcmp(reinterpret_cast<void*>(addr + kPageSize + (kPageSize / 2)),
                   bufB.c_str(), kPageSize / 2));
}

// Overwriting a file mapped MAP_SHARED PROT_READ should be reflected in the
// mapping and the file.
TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) {
  // Start from scratch.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());

  // Expand the file to two full pages and dirty it.
  std::string bufA(2 * kPageSize, 'a');
  ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
              SyscallSucceedsWithValue(bufA.size()));

  // Map only the first page.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
              SyscallSucceeds());

  // Prepare to overwrite the file contents.
  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));

  // Overwrite everything, beyond the mapped portion.
  std::string bufB(2 * kPageSize, 'b');
  ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
              SyscallSucceedsWithValue(bufB.size()));

  // What the mapped portion should now look like.
  std::string bufMapped(kPageSize, 'b');

  // Expect that the mapped portion is consistent.
  EXPECT_EQ(
      0, memcmp(reinterpret_cast<void*>(addr), bufMapped.c_str(), kPageSize));

  // Prepare to read the entire file contents.
  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));

  // Expect that the file was fully updated.
  std::vector<char> bufFile(2 * kPageSize);
  ASSERT_THAT(Read(bufFile.data(), bufFile.size()),
              SyscallSucceedsWithValue(bufFile.size()));
  // Cast to void* to avoid EXPECT_THAT assuming bufFile.data() is a
  // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
  // std::string, possibly overruning the buffer.
  EXPECT_THAT(reinterpret_cast<void*>(bufFile.data()), EqualsMemory(bufB));
}

// Write data to mapped file.
TEST_F(MMapFileTest, WriteShared) {
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                         fd_.get(), 0),
              SyscallSucceeds());

  size_t len = strlen(kFileContents);
  memcpy(reinterpret_cast<void*>(addr), kFileContents, len);

  // The file may not actually be updated until munmap is called.
  ASSERT_THAT(Unmap(), SyscallSucceeds());

  std::vector<char> buf(len);
  ASSERT_THAT(Read(buf.data(), buf.size()),
              SyscallSucceedsWithValue(buf.size()));
  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  // string, possibly overruning the buffer.
  EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
              EqualsMemory(std::string(kFileContents)));
}

// Write data to portion of mapped page beyond the end of the file.
// These writes are not reflected in the file.
TEST_F(MMapFileTest, WriteSharedBeyondEnd) {
  // The file is only half of a page. We map an entire page. Writes to the
  // end of the mapping must not be reflected in the file.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                         fd_.get(), 0),
              SyscallSucceeds());

  // First half; this is reflected in the file.
  std::string first(kPageSize / 2, 'A');
  memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());

  // Second half; this is not reflected in the file.
  std::string second(kPageSize / 2, 'B');
  memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(),
         second.size());

  // The file may not actually be updated until munmap is called.
  ASSERT_THAT(Unmap(), SyscallSucceeds());

  // Big enough to fit the entire page, if the writes are mistakenly written to
  // the file.
  std::vector<char> buf(kPageSize);

  // Only the first half is in the file.
  ASSERT_THAT(Read(buf.data(), buf.size()),
              SyscallSucceedsWithValue(first.size()));
  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
  // std::string, possibly overruning the buffer.
  EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
}

// The portion of a mapped page that becomes part of the file after a truncate
// is reflected in the file.
TEST_F(MMapFileTest, WriteSharedTruncateUp) {
  // The file is only half of a page. We map an entire page. Writes to the
  // end of the mapping must not be reflected in the file.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                         fd_.get(), 0),
              SyscallSucceeds());

  // First half; this is reflected in the file.
  std::string first(kPageSize / 2, 'A');
  memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());

  // Second half; this is not reflected in the file now (see
  // WriteSharedBeyondEnd), but will be after the truncate.
  std::string second(kPageSize / 2, 'B');
  memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(),
         second.size());

  // Extend the file to a full page. The second half of the page will be
  // reflected in the file.
  EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());

  // The file may not actually be updated until munmap is called.
  ASSERT_THAT(Unmap(), SyscallSucceeds());

  // The whole page is in the file.
  std::vector<char> buf(kPageSize);
  ASSERT_THAT(Read(buf.data(), buf.size()),
              SyscallSucceedsWithValue(buf.size()));
  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  // string, possibly overruning the buffer.
  EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
  EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2),
              EqualsMemory(second));
}

TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) {
  // Start from scratch.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());

  // Expand the file to a full page and dirty it.
  std::string buf(kPageSize, 'a');
  ASSERT_THAT(Write(buf.c_str(), buf.size()),
              SyscallSucceedsWithValue(buf.size()));

  // Map the page.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
              SyscallSucceeds());

  // Check that the memory contains the file data.
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize));

  // Truncate down, then up.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());

  // Check that the memory was zeroed.
  std::string zeroed(kPageSize, '\0');
  EXPECT_EQ(0,
            memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize));

  // The file may not actually be updated until msync is called.
  ASSERT_THAT(Msync(), SyscallSucceeds());

  // Prepare to read the entire file contents.
  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));

  // Expect that the file is fully updated.
  std::vector<char> bufFile(kPageSize);
  ASSERT_THAT(Read(bufFile.data(), bufFile.size()),
              SyscallSucceedsWithValue(bufFile.size()));
  EXPECT_EQ(0, memcmp(bufFile.data(), zeroed.c_str(), kPageSize));
}

TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) {
  // The file is only half of a page. We map an entire page. Writes to the
  // end of the mapping must not be reflected in the file.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                         fd_.get(), 0),
              SyscallSucceeds());

  // First half; this will be deleted by truncate(0).
  std::string first(kPageSize / 2, 'A');
  memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());

  // Truncate down, then up.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
  EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());

  // The whole page is zeroed in memory.
  std::string zeroed(kPageSize, '\0');
  EXPECT_EQ(0,
            memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize));

  // The file may not actually be updated until munmap is called.
  ASSERT_THAT(Unmap(), SyscallSucceeds());

  // The whole file is also zeroed.
  std::vector<char> buf(kPageSize);
  ASSERT_THAT(Read(buf.data(), buf.size()),
              SyscallSucceedsWithValue(buf.size()));
  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  // string, possibly overruning the buffer.
  EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed));
}

TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) {
  SetupGvisorDeathTest();

  // Start from scratch.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());

  // Expand the file to a full page and dirty it.
  std::string buf(kPageSize, 'a');
  ASSERT_THAT(Write(buf.c_str(), buf.size()),
              SyscallSucceedsWithValue(buf.size()));

  // Map the page.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
              SyscallSucceeds());

  // Check that the mapping contains the file data.
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize));

  // Truncate down.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());

  // Accessing the truncated region should cause a SIGBUS.
  std::vector<char> in(kPageSize);
  EXPECT_EXIT(
      std::copy(reinterpret_cast<volatile char*>(addr),
                reinterpret_cast<volatile char*>(addr) + kPageSize, in.data()),
      ::testing::KilledBySignal(SIGBUS), "");
}

TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) {
  SetupGvisorDeathTest();

  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                         fd_.get(), 0),
              SyscallSucceeds());

  // Touch the memory to be sure it really is mapped.
  size_t len = strlen(kFileContents);
  memcpy(reinterpret_cast<void*>(addr), kFileContents, len);

  // Truncate down.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());

  // Accessing the truncated file should cause a SIGBUS.
  EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
                        reinterpret_cast<volatile char*>(addr)),
              ::testing::KilledBySignal(SIGBUS), "");
}

TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) {
  // Start from scratch.
  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());

  // Dirty the file.
  std::string buf(kPageSize, 'a');
  ASSERT_THAT(Write(buf.c_str(), buf.size()),
              SyscallSucceedsWithValue(buf.size()));

  // Map a page.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
              SyscallSucceeds());

  // Truncate to half of the page.
  EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds());

  // First half of the page untouched.
  EXPECT_EQ(0,
            memcmp(reinterpret_cast<void*>(addr), buf.data(), kPageSize / 2));

  // Second half is zeroed.
  std::string zeroed(kPageSize / 2, '\0');
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2),
                      zeroed.c_str(), kPageSize / 2));
}

// Page can still be accessed and contents are intact after truncating a partial
// page.
TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) {
  // Expand the file to a full page.
  EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());

  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                         fd_.get(), 0),
              SyscallSucceeds());

  // Fill the entire page.
  std::string contents(kPageSize, 'A');
  memcpy(reinterpret_cast<void*>(addr), contents.c_str(), contents.size());

  // Truncate half of the page.
  EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds());

  // First half of the page untouched.
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), contents.c_str(),
                      kPageSize / 2));

  // Second half zeroed.
  std::string zeroed(kPageSize / 2, '\0');
  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2),
                      zeroed.c_str(), kPageSize / 2));
}

// MAP_PRIVATE writes are not carried through to the underlying file.
TEST_F(MMapFileTest, WritePrivate) {
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
                         fd_.get(), 0),
              SyscallSucceeds());

  size_t len = strlen(kFileContents);
  memcpy(reinterpret_cast<void*>(addr), kFileContents, len);

  // The file should not be updated, but if it mistakenly is, it may not be
  // until after munmap is called.
  ASSERT_THAT(Unmap(), SyscallSucceeds());

  std::vector<char> buf(len);
  ASSERT_THAT(Read(buf.data(), buf.size()),
              SyscallSucceedsWithValue(buf.size()));
  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
  // string, possibly overruning the buffer.
  EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
              EqualsMemory(std::string(len, '\0')));
}

// SIGBUS raised when reading or writing past end of a mapped file.
TEST_P(MMapFileParamTest, SigBusDeath) {
  SetupGvisorDeathTest();

  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0),
              SyscallSucceeds());

  auto* start = reinterpret_cast<volatile char*>(addr + kPageSize);

  // MMapFileTest makes a file kPageSize/2 long. The entire first page should be
  // accessible, but anything beyond it should not.
  if (prot() & PROT_WRITE) {
    // Write beyond first page.
    size_t len = strlen(kFileContents);
    EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, start),
                ::testing::KilledBySignal(SIGBUS), "");
  } else {
    // Read beyond first page.
    std::vector<char> in(kPageSize);
    EXPECT_EXIT(std::copy(start, start + kPageSize, in.data()),
                ::testing::KilledBySignal(SIGBUS), "");
  }
}

// Tests that SIGBUS is not raised when reading or writing to a file-mapped
// page before EOF, even if part of the mapping extends beyond EOF.
//
// See b/27877699.
TEST_P(MMapFileParamTest, NoSigBusOnPagesBeforeEOF) {
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0),
              SyscallSucceeds());

  // The test passes if this survives.
  auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1);
  size_t len = strlen(kFileContents);
  if (prot() & PROT_WRITE) {
    std::copy(kFileContents, kFileContents + len, start);
  } else {
    std::vector<char> in(len);
    std::copy(start, start + len, in.data());
  }
}

// Tests that SIGBUS is not raised when reading or writing from a file-mapped
// page containing EOF, *after* the EOF.
TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) {
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0),
              SyscallSucceeds());

  // The test passes if this survives. (Technically addr+kPageSize/2 is already
  // beyond EOF, but +1 to check for fencepost errors.)
  auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1);
  size_t len = strlen(kFileContents);
  if (prot() & PROT_WRITE) {
    std::copy(kFileContents, kFileContents + len, start);
  } else {
    std::vector<char> in(len);
    std::copy(start, start + len, in.data());
  }
}

// Tests that reading from writable shared file-mapped pages succeeds.
//
// On most platforms this is trivial, but when the file is mapped via the sentry
// page cache (which does not yet support writing to shared mappings), a bug
// caused reads to fail unnecessarily on such mappings. See b/28913513.
TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) {
  uintptr_t addr;
  size_t len = strlen(kFileContents);

  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                         fd_.get(), 0),
              SyscallSucceeds());

  std::vector<char> buf(kPageSize);
  // The test passes if this survives.
  std::copy(reinterpret_cast<volatile char*>(addr),
            reinterpret_cast<volatile char*>(addr) + len, buf.data());
}

// Tests that EFAULT is returned when invoking a syscall that requires the OS to
// read past end of file (resulting in a fault in sentry context in the gVisor
// case). See b/28913513.
TEST_F(MMapFileTest, InternalSigBus) {
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
                         fd_.get(), 0),
              SyscallSucceeds());

  // This depends on the fact that gVisor implements pipes internally.
  int pipefd[2];
  ASSERT_THAT(pipe(pipefd), SyscallSucceeds());
  EXPECT_THAT(
      write(pipefd[1], reinterpret_cast<void*>(addr + kPageSize), kPageSize),
      SyscallFailsWithErrno(EFAULT));

  EXPECT_THAT(close(pipefd[0]), SyscallSucceeds());
  EXPECT_THAT(close(pipefd[1]), SyscallSucceeds());
}

// Like InternalSigBus, but test the WriteZerosAt path by reading from
// /dev/zero to a shared mapping (so that the SIGBUS isn't caught during
// copy-on-write breaking).
TEST_F(MMapFileTest, InternalSigBusZeroing) {
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                         fd_.get(), 0),
              SyscallSucceeds());

  const FileDescriptor dev_zero =
      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
  EXPECT_THAT(read(dev_zero.get(), reinterpret_cast<void*>(addr + kPageSize),
                   kPageSize),
              SyscallFailsWithErrno(EFAULT));
}

// Checks that mmaps with a length of uint64_t(-PAGE_SIZE + 1) or greater do not
// induce a sentry panic (due to "rounding up" to 0).
TEST_F(MMapTest, HugeLength) {
  EXPECT_THAT(Map(0, static_cast<uint64_t>(-kPageSize + 1), PROT_NONE,
                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallFailsWithErrno(ENOMEM));
}

// Tests for a specific gVisor MM caching bug.
TEST_F(MMapTest, AccessCOWInvalidatesCachedSegments) {
  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
  auto zero_fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));

  // Get a two-page private mapping and fill it with 1s.
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
              SyscallSucceeds());
  memset(addr_, 1, 2 * kPageSize);
  MaybeSave();

  // Fork to make the mapping copy-on-write.
  pid_t const pid = fork();
  if (pid == 0) {
    // The child process waits for the parent to SIGKILL it.
    while (true) {
      pause();
    }
  }
  ASSERT_THAT(pid, SyscallSucceeds());
  auto cleanup_child = Cleanup([&] {
    EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds());
    int status;
    EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
  });

  // Induce a read-only Access of the first page of the mapping, which will not
  // cause a copy. The usermem.Segment should be cached.
  ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0),
              SyscallSucceedsWithValue(kPageSize));

  // Induce a writable Access of both pages of the mapping. This should
  // invalidate the cached Segment.
  ASSERT_THAT(PreadFd(zero_fd.get(), addr_, 2 * kPageSize, 0),
              SyscallSucceedsWithValue(2 * kPageSize));

  // Induce a read-only Access of the first page of the mapping again. It should
  // read the 0s that were stored in the mapping by the read from /dev/zero. If
  // the read failed to invalidate the cached Segment, it will instead read the
  // 1s in the stale page.
  ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0),
              SyscallSucceedsWithValue(kPageSize));
  std::vector<char> buf(kPageSize);
  ASSERT_THAT(PreadFd(fd.get(), buf.data(), kPageSize, 0),
              SyscallSucceedsWithValue(kPageSize));
  for (size_t i = 0; i < kPageSize; i++) {
    ASSERT_EQ(0, buf[i]) << "at offset " << i;
  }
}

TEST_F(MMapTest, NoReserve) {
  const size_t kSize = 10 * 1 << 20;  // 10M
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kSize, PROT_READ | PROT_WRITE,
                         MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0),
              SyscallSucceeds());
  EXPECT_GT(addr, 0);

  // Check that every page can be read/written. Technically, writing to memory
  // could SIGSEGV in case there is no more memory available. In gVisor it
  // would never happen though because NORESERVE is ignored. In Linux, it's
  // possible to fail, but allocation is small enough that it's highly likely
  // to succeed.
  for (size_t j = 0; j < kSize; j += kPageSize) {
    EXPECT_EQ(0, reinterpret_cast<char*>(addr)[j]);
    reinterpret_cast<char*>(addr)[j] = j;
  }
}

// Map more than the gVisor page-cache map unit (64k) and ensure that
// it is consistent with reading from the file.
TEST_F(MMapFileTest, Bug38498194) {
  // Choose a sufficiently large map unit.
  constexpr int kSize = 4 * 1024 * 1024;
  EXPECT_THAT(ftruncate(fd_.get(), kSize), SyscallSucceeds());

  // Map a large enough region so that multiple internal segments
  // are created to back the mapping.
  uintptr_t addr;
  ASSERT_THAT(
      addr = Map(0, kSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0),
      SyscallSucceeds());

  std::vector<char> expect(kSize, 'a');
  std::copy(expect.data(), expect.data() + expect.size(),
            reinterpret_cast<volatile char*>(addr));

  // Trigger writeback for gVisor. In Linux pages stay cached until
  // it can't hold onto them anymore.
  ASSERT_THAT(Unmap(), SyscallSucceeds());

  std::vector<char> buf(kSize);
  ASSERT_THAT(Read(buf.data(), buf.size()),
              SyscallSucceedsWithValue(buf.size()));
  EXPECT_EQ(buf, expect) << std::string(buf.data(), buf.size());
}

// Tests that reading from a file to a memory mapping of the same file does not
// deadlock. See b/34813270.
TEST_F(MMapFileTest, SelfRead) {
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
                         fd_.get(), 0),
              SyscallSucceeds());
  EXPECT_THAT(Read(reinterpret_cast<char*>(addr), kPageSize / 2),
              SyscallSucceedsWithValue(kPageSize / 2));
  // The resulting file contents are poorly-specified and irrelevant.
}

// Tests that writing to a file from a memory mapping of the same file does not
// deadlock. Regression test for b/34813270.
TEST_F(MMapFileTest, SelfWrite) {
  uintptr_t addr;
  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
              SyscallSucceeds());
  EXPECT_THAT(Write(reinterpret_cast<char*>(addr), kPageSize / 2),
              SyscallSucceedsWithValue(kPageSize / 2));
  // The resulting file contents are poorly-specified and irrelevant.
}

TEST(MMapDeathTest, TruncateAfterCOWBreak) {
  SetupGvisorDeathTest();

  // Create and map a single-page file.
  auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
  auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDWR));
  ASSERT_THAT(ftruncate(fd.get(), kPageSize), SyscallSucceeds());
  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0));

  // Write to this mapping, causing the page to be copied for write.
  memset(mapping.ptr(), 'a', mapping.len());
  MaybeSave();  // Trigger a co-operative save cycle.

  // Truncate the file and expect it to invalidate the copied page.
  ASSERT_THAT(ftruncate(fd.get(), 0), SyscallSucceeds());
  EXPECT_EXIT(*reinterpret_cast<volatile char*>(mapping.ptr()),
              ::testing::KilledBySignal(SIGBUS), "");
}

// Regression test for #147.
TEST(MMapNoFixtureTest, MapReadOnlyAfterCreateWriteOnly) {
  std::string filename = NewTempAbsPath();

  // We have to create the file O_RDONLY to reproduce the bug because
  // fsgofer.localFile.Create() silently upgrades O_WRONLY to O_RDWR, causing
  // the cached "write-only" FD to be read/write and therefore usable by mmap().
  auto const ro_fd = ASSERT_NO_ERRNO_AND_VALUE(
      Open(filename, O_RDONLY | O_CREAT | O_EXCL, 0666));

  // Get a write-only FD for the same file, which should be ignored by mmap()
  // (but isn't in #147).
  auto const wo_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_WRONLY));
  ASSERT_THAT(ftruncate(wo_fd.get(), kPageSize), SyscallSucceeds());

  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
      Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, ro_fd.get(), 0));
  std::vector<char> buf(kPageSize);
  // The test passes if this survives.
  std::copy(static_cast<char*>(mapping.ptr()),
            static_cast<char*>(mapping.endptr()), buf.data());
}

// Conditional on MAP_32BIT.
// This flag is supported only on x86-64, for 64-bit programs.
#ifdef __x86_64__

TEST(MMapNoFixtureTest, Map32Bit) {
  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
      MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE | MAP_32BIT));
  EXPECT_LT(mapping.addr(), static_cast<uintptr_t>(1) << 32);
  EXPECT_LE(mapping.endaddr(), static_cast<uintptr_t>(1) << 32);
}

#endif  // defined(__x86_64__)

INSTANTIATE_TEST_SUITE_P(
    ReadWriteSharedPrivate, MMapFileParamTest,
    ::testing::Combine(::testing::ValuesIn({
                           PROT_READ,
                           PROT_WRITE,
                           PROT_READ | PROT_WRITE,
                       }),
                       ::testing::ValuesIn({MAP_SHARED, MAP_PRIVATE})));

}  // namespace

}  // namespace testing
}  // namespace gvisor