279 files changed, 80398 insertions, 0 deletions
diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc
new file mode 100644
index 000000000..3c825477c
--- /dev/null
+++ b/test/syscalls/linux/32bit.cc
@@ -0,0 +1,248 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include <sys/mman.h>
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "test/util/memory_util.h"
+#include "test/util/platform_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+#ifndef __x86_64__
+#error "This test is x86-64 specific."
+#endif
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr char kInt3 = '\xcc';
+constexpr char kInt80[2] = {'\xcd', '\x80'};
+constexpr char kSyscall[2] = {'\x0f', '\x05'};
+constexpr char kSysenter[2] = {'\x0f', '\x34'};
+
+void ExitGroup32(const char instruction[2], int code) {
+  const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+      Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE | PROT_EXEC,
+           MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0));
+
+  // Fill with INT 3 in case we execute too far.
+  memset(m.ptr(), kInt3, m.len());
+
+  // Copy in the actual instruction.
+  memcpy(m.ptr(), instruction, 2);
+
+  // We're playing *extremely* fast-and-loose with the various syscall ABIs
+  // here, which we can more-or-less get away with since exit_group doesn't
+  // return.
+  //
+  // SYSENTER expects the user stack in (%ebp) and arg6 in 0(%ebp). The kernel
+  // will unconditionally dereference %ebp for arg6, so we must pass a valid
+  // address or it will return EFAULT.
+  //
+  // SYSENTER also unconditionally returns to thread_info->sysenter_return which
+  // is ostensibly a stub in the 32-bit VDSO. But a 64-bit binary doesn't have
+  // the 32-bit VDSO mapped, so sysenter_return will simply be the value
+  // inherited from the most recent 32-bit ancestor, or NULL if there is none.
+  // As a result, return would not return from SYSENTER.
+  asm volatile(
+      "movl $252, %%eax\n"     // exit_group
+      "movl %[code], %%ebx\n"  // code
+      "movl %%edx, %%ebp\n"    // SYSENTER: user stack (use IP as a valid addr)
+      "leaq -20(%%rsp), %%rsp\n"
+      "movl $0x2b, 16(%%rsp)\n"  // SS = CPL3 data segment
+      "movl $0,12(%%rsp)\n"      // ESP = nullptr (unused)
+      "movl $0, 8(%%rsp)\n"      // EFLAGS
+      "movl $0x23, 4(%%rsp)\n"   // CS = CPL3 32-bit code segment
+      "movl %%edx, 0(%%rsp)\n"   // EIP
+      "iretl\n"
+      "int $3\n"
+      :
+      : [ code ] "m"(code), [ ip ] "d"(m.ptr())
+      : "rax", "rbx");
+}
+
+constexpr int kExitCode = 42;
+
+TEST(Syscall32Bit, Int80) {
+  switch (PlatformSupport32Bit()) {
+    case PlatformSupport::NotSupported:
+      break;
+    case PlatformSupport::Segfault:
+      EXPECT_EXIT(ExitGroup32(kInt80, kExitCode),
+                  ::testing::KilledBySignal(SIGSEGV), "");
+      break;
+
+    case PlatformSupport::Ignored:
+      // Since the call is ignored, we'll hit the int3 trap.
+      EXPECT_EXIT(ExitGroup32(kInt80, kExitCode),
+                  ::testing::KilledBySignal(SIGTRAP), "");
+      break;
+
+    case PlatformSupport::Allowed:
+      EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), ::testing::ExitedWithCode(42),
+                  "");
+      break;
+  }
+}
+
+TEST(Syscall32Bit, Sysenter) {
+  if ((PlatformSupport32Bit() == PlatformSupport::Allowed ||
+       PlatformSupport32Bit() == PlatformSupport::Ignored) &&
+      GetCPUVendor() == CPUVendor::kAMD) {
+    // SYSENTER is an illegal instruction in compatibility mode on AMD.
+    EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode),
+                ::testing::KilledBySignal(SIGILL), "");
+    return;
+  }
+
+  switch (PlatformSupport32Bit()) {
+    case PlatformSupport::NotSupported:
+      break;
+
+    case PlatformSupport::Segfault:
+      EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode),
+                  ::testing::KilledBySignal(SIGSEGV), "");
+      break;
+
+    case PlatformSupport::Ignored:
+      // See above, except expected code is SIGSEGV.
+      EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode),
+                  ::testing::KilledBySignal(SIGSEGV), "");
+      break;
+
+    case PlatformSupport::Allowed:
+      EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode),
+                  ::testing::ExitedWithCode(42), "");
+      break;
+  }
+}
+
+TEST(Syscall32Bit, Syscall) {
+  if ((PlatformSupport32Bit() == PlatformSupport::Allowed ||
+       PlatformSupport32Bit() == PlatformSupport::Ignored) &&
+      GetCPUVendor() == CPUVendor::kIntel) {
+    // SYSCALL is an illegal instruction in compatibility mode on Intel.
+    EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode),
+                ::testing::KilledBySignal(SIGILL), "");
+    return;
+  }
+
+  switch (PlatformSupport32Bit()) {
+    case PlatformSupport::NotSupported:
+      break;
+
+    case PlatformSupport::Segfault:
+      EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode),
+                  ::testing::KilledBySignal(SIGSEGV), "");
+      break;
+
+    case PlatformSupport::Ignored:
+      // See above.
+      EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode),
+                  ::testing::KilledBySignal(SIGSEGV), "");
+      break;
+
+    case PlatformSupport::Allowed:
+      EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode),
+                  ::testing::ExitedWithCode(42), "");
+      break;
+  }
+}
+
+// Far call code called below.
+//
+// Input stack layout:
+//
+// %esp+12 lcall segment
+// %esp+8  lcall address offset
+// %esp+0  return address
+//
+// The lcall will enter compatibility mode and jump to the call address (the
+// address of the lret). The lret will return to 64-bit mode at the retq, which
+// will return to the external caller of this function.
+//
+// Since this enters compatibility mode, it must be mapped in a 32-bit region of
+// address space and have a 32-bit stack pointer.
+constexpr char kFarCall[] = {
+    '\x67', '\xff', '\x5c', '\x24', '\x08',  // lcall *8(%esp)
+    '\xc3',                                  // retq
+    '\xcb',                                  // lret
+};
+
+void FarCall32() {
+  const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+      Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE | PROT_EXEC,
+           MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0));
+
+  // Fill with INT 3 in case we execute too far.
+  memset(m.ptr(), kInt3, m.len());
+
+  // 32-bit code.
+  memcpy(m.ptr(), kFarCall, sizeof(kFarCall));
+
+  // Use the end of the code page as its stack.
+  uintptr_t stack = m.endaddr();
+
+  uintptr_t lcall = m.addr();
+  uintptr_t lret = m.addr() + sizeof(kFarCall) - 1;
+
+  // N.B. We must save and restore RSP manually. GCC can do so automatically
+  // with an "rsp" clobber, but clang cannot.
+  asm volatile(
+      // Place the address of lret (%edx) and the 32-bit code segment (0x23) on
+      // the 32-bit stack for lcall.
+      "subl $0x8, %%ecx\n"
+      "movl $0x23, 4(%%ecx)\n"
+      "movl %%edx, 0(%%ecx)\n"
+
+      // Save the current stack and switch to 32-bit stack.
+      "pushq %%rbp\n"
+      "movq %%rsp, %%rbp\n"
+      "movq %%rcx, %%rsp\n"
+
+      // Run the lcall code.
+      "callq *%%rbx\n"
+
+      // Restore the old stack.
+      "leaveq\n"
+      : "+c"(stack)
+      : "b"(lcall), "d"(lret));
+}
+
+TEST(Call32Bit, Disallowed) {
+  switch (PlatformSupport32Bit()) {
+    case PlatformSupport::NotSupported:
+      break;
+
+    case PlatformSupport::Segfault:
+      EXPECT_EXIT(FarCall32(), ::testing::KilledBySignal(SIGSEGV), "");
+      break;
+
+    case PlatformSupport::Ignored:
+      ABSL_FALLTHROUGH_INTENDED;
+    case PlatformSupport::Allowed:
+      // Shouldn't crash.
+      FarCall32();
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
new file mode 100644
index 000000000..9e097c888
--- /dev/null
+++ b/test/syscalls/linux/BUILD
@@ -0,0 +1,3933 @@
+load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "gtest", "select_arch", "select_system")
+
+package(
+    default_visibility = ["//:sandbox"],
+    licenses = ["notice"],
+)
+
+exports_files(
+    [
+        "socket.cc",
+        "socket_inet_loopback.cc",
+        "socket_ip_loopback_blocking.cc",
+        "socket_ip_tcp_generic_loopback.cc",
+        "socket_ip_tcp_loopback.cc",
+        "socket_ip_tcp_loopback_blocking.cc",
+        "socket_ip_tcp_loopback_nonblock.cc",
+        "socket_ip_tcp_udp_generic.cc",
+        "socket_ip_udp_loopback.cc",
+        "socket_ip_udp_loopback_blocking.cc",
+        "socket_ip_udp_loopback_nonblock.cc",
+        "socket_ip_unbound.cc",
+        "socket_ipv4_tcp_unbound_external_networking_test.cc",
+        "socket_ipv4_udp_unbound_external_networking_test.cc",
+        "socket_ipv4_udp_unbound_loopback.cc",
+        "tcp_socket.cc",
+        "udp_bind.cc",
+        "udp_socket.cc",
+    ],
+    visibility = ["//:sandbox"],
+)
+
+cc_binary(
+    name = "sigaltstack_check",
+    testonly = 1,
+    srcs = ["sigaltstack_check.cc"],
+    deps = ["//test/util:logging"],
+)
+
+cc_binary(
+    name = "exec_assert_closed_workload",
+    testonly = 1,
+    srcs = ["exec_assert_closed_workload.cc"],
+    deps = [
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_binary(
+    name = "exec_basic_workload",
+    testonly = 1,
+    srcs = [
+        "exec.h",
+        "exec_basic_workload.cc",
+    ],
+)
+
+cc_binary(
+    name = "exec_proc_exe_workload",
+    testonly = 1,
+    srcs = ["exec_proc_exe_workload.cc"],
+    deps = [
+        "//test/util:fs_util",
+        "//test/util:posix_error",
+    ],
+)
+
+cc_binary(
+    name = "exec_state_workload",
+    testonly = 1,
+    srcs = ["exec_state_workload.cc"],
+    deps = ["@com_google_absl//absl/strings"],
+)
+
+sh_binary(
+    name = "exit_script",
+    testonly = 1,
+    srcs = [
+        "exit_script.sh",
+    ],
+)
+
+cc_binary(
+    name = "priority_execve",
+    testonly = 1,
+    srcs = [
+        "priority_execve.cc",
+    ],
+)
+
+cc_library(
+    name = "base_poll_test",
+    testonly = 1,
+    srcs = ["base_poll_test.cc"],
+    hdrs = ["base_poll_test.h"],
+    deps = [
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:signal_util",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_library(
+    name = "file_base",
+    testonly = 1,
+    hdrs = ["file_base.h"],
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_util",
+    ],
+)
+
+cc_library(
+    name = "socket_netlink_util",
+    testonly = 1,
+    srcs = ["socket_netlink_util.cc"],
+    hdrs = ["socket_netlink_util.h"],
+    deps = [
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        "//test/util:posix_error",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "socket_netlink_route_util",
+    testonly = 1,
+    srcs = ["socket_netlink_route_util.cc"],
+    hdrs = ["socket_netlink_route_util.h"],
+    deps = [
+        ":socket_netlink_util",
+    ],
+)
+
+cc_library(
+    name = "socket_test_util",
+    testonly = 1,
+    srcs = [
+        "socket_test_util.cc",
+        "socket_test_util_impl.cc",
+    ],
+    hdrs = ["socket_test_util.h"],
+    defines = select_system(),
+    deps = default_net_util() + [
+        gtest,
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/time",
+        "@com_google_absl//absl/types:optional",
+        "//test/util:file_descriptor",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_library(
+    name = "unix_domain_socket_test_util",
+    testonly = 1,
+    srcs = ["unix_domain_socket_test_util.cc"],
+    hdrs = ["unix_domain_socket_test_util.h"],
+    deps = [
+        ":socket_test_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:test_util",
+    ],
+)
+
+cc_library(
+    name = "ip_socket_test_util",
+    testonly = 1,
+    srcs = ["ip_socket_test_util.cc"],
+    hdrs = ["ip_socket_test_util.h"],
+    deps = [
+        ":socket_test_util",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_binary(
+    name = "clock_nanosleep_test",
+    testonly = 1,
+    srcs = ["clock_nanosleep.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:timer_util",
+    ],
+)
+
+cc_binary(
+    name = "32bit_test",
+    testonly = 1,
+    srcs = select_arch(
+        amd64 = ["32bit.cc"],
+        arm64 = [],
+    ),
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/base:core_headers",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:platform_util",
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "accept_bind_test",
+    testonly = 1,
+    srcs = ["accept_bind.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "accept_bind_stream_test",
+    testonly = 1,
+    srcs = ["accept_bind_stream.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "access_test",
+    testonly = 1,
+    srcs = ["access.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:fs_util",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "affinity_test",
+    testonly = 1,
+    srcs = ["affinity.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "aio_test",
+    testonly = 1,
+    srcs = [
+        "aio.cc",
+        "file_base.h",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:posix_error",
+        "//test/util:proc_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "alarm_test",
+    testonly = 1,
+    srcs = ["alarm.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:signal_util",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "bad_test",
+    testonly = 1,
+    srcs = ["bad.cc"],
+    linkstatic = 1,
+    visibility = [
+        "//:sandbox",
+    ],
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "bind_test",
+    testonly = 1,
+    srcs = ["bind.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_test",
+    testonly = 1,
+    srcs = ["socket.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        gtest,
+        "//test/util:file_descriptor",
+        "//test/util:temp_umask",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_capability_test",
+    testonly = 1,
+    srcs = ["socket_capability.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "brk_test",
+    testonly = 1,
+    srcs = ["brk.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "chdir_test",
+    testonly = 1,
+    srcs = ["chdir.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "chmod_test",
+    testonly = 1,
+    srcs = ["chmod.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "chown_test",
+    testonly = 1,
+    srcs = ["chown.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/synchronization",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "sticky_test",
+    testonly = 1,
+    srcs = ["sticky.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/flags:flag",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "chroot_test",
+    testonly = 1,
+    srcs = ["chroot.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:mount_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "clock_getres_test",
+    testonly = 1,
+    srcs = ["clock_getres.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "clock_gettime_test",
+    testonly = 1,
+    srcs = ["clock_gettime.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "concurrency_test",
+    testonly = 1,
+    srcs = ["concurrency.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:platform_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "connect_external_test",
+    testonly = 1,
+    srcs = ["connect_external.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "creat_test",
+    testonly = 1,
+    srcs = ["creat.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:fs_util",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "dev_test",
+    testonly = 1,
+    srcs = ["dev.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "dup_test",
+    testonly = 1,
+    srcs = ["dup.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:eventfd_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "epoll_test",
+    testonly = 1,
+    srcs = ["epoll.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:epoll_util",
+        "//test/util:eventfd_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "eventfd_test",
+    testonly = 1,
+    srcs = ["eventfd.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:epoll_util",
+        "//test/util:eventfd_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "exceptions_test",
+    testonly = 1,
+    srcs = select_arch(
+        amd64 = ["exceptions.cc"],
+        arm64 = [],
+    ),
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:logging",
+        "//test/util:platform_util",
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "getcpu_test",
+    testonly = 1,
+    srcs = ["getcpu.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "getcpu_host_test",
+    testonly = 1,
+    srcs = ["getcpu.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "getrusage_test",
+    testonly = 1,
+    srcs = ["getrusage.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:memory_util",
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "exec_binary_test",
+    testonly = 1,
+    srcs = ["exec_binary.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:proc_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "exec_test",
+    testonly = 1,
+    srcs = [
+        "exec.cc",
+        "exec.h",
+    ],
+    data = [
+        ":exec_assert_closed_workload",
+        ":exec_basic_workload",
+        ":exec_proc_exe_workload",
+        ":exec_state_workload",
+        ":exit_script",
+        ":priority_execve",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/types:optional",
+        gtest,
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "exit_test",
+    testonly = 1,
+    srcs = ["exit.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:time_util",
+    ],
+)
+
+cc_binary(
+    name = "fallocate_test",
+    testonly = 1,
+    srcs = ["fallocate.cc"],
+    linkstatic = 1,
+    deps = [
+        ":file_base",
+        ":socket_test_util",
+        "//test/util:cleanup",
+        "//test/util:eventfd_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "fault_test",
+    testonly = 1,
+    srcs = ["fault.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "fchdir_test",
+    testonly = 1,
+    srcs = ["fchdir.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "fcntl_test",
+    testonly = 1,
+    srcs = ["fcntl.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:cleanup",
+        "//test/util:epoll_util",
+        "//test/util:eventfd_util",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:save_util",
+        "//test/util:temp_path",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:timer_util",
+    ],
+)
+
+cc_binary(
+    name = "flock_test",
+    testonly = 1,
+    srcs = [
+        "file_base.h",
+        "flock.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:epoll_util",
+        "//test/util:eventfd_util",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:timer_util",
+    ],
+)
+
+cc_binary(
+    name = "fork_test",
+    testonly = 1,
+    srcs = ["fork.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:memory_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "fpsig_fork_test",
+    testonly = 1,
+    srcs = ["fpsig_fork.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:logging",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "fpsig_nested_test",
+    testonly = 1,
+    srcs = ["fpsig_nested.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "sync_file_range_test",
+    testonly = 1,
+    srcs = ["sync_file_range.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "fsync_test",
+    testonly = 1,
+    srcs = ["fsync.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "futex_test",
+    testonly = 1,
+    srcs = ["futex.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:save_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:time_util",
+        "//test/util:timer_util",
+    ],
+)
+
+cc_binary(
+    name = "getdents_test",
+    testonly = 1,
+    srcs = ["getdents.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:eventfd_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "getrandom_test",
+    testonly = 1,
+    srcs = ["getrandom.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "inotify_test",
+    testonly = 1,
+    srcs = ["inotify.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:epoll_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_binary(
+    name = "ioctl_test",
+    testonly = 1,
+    srcs = ["ioctl.cc"],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_library(
+    name = "iptables_types",
+    testonly = 1,
+    hdrs = [
+        "iptables.h",
+    ],
+)
+
+cc_binary(
+    name = "iptables_test",
+    testonly = 1,
+    srcs = [
+        "iptables.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":iptables_types",
+        ":socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "itimer_test",
+    testonly = 1,
+    srcs = ["itimer.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:signal_util",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:timer_util",
+    ],
+)
+
+cc_binary(
+    name = "kill_test",
+    testonly = 1,
+    srcs = ["kill.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "link_test",
+    testonly = 1,
+    srcs = ["link.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "lseek_test",
+    testonly = 1,
+    srcs = ["lseek.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "madvise_test",
+    testonly = 1,
+    srcs = ["madvise.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:logging",
+        "//test/util:memory_util",
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "mempolicy_test",
+    testonly = 1,
+    srcs = ["mempolicy.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "@com_google_absl//absl/memory",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "mincore_test",
+    testonly = 1,
+    srcs = ["mincore.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "mkdir_test",
+    testonly = 1,
+    srcs = ["mkdir.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:fs_util",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:temp_umask",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "mknod_test",
+    testonly = 1,
+    srcs = ["mknod.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "mlock_test",
+    testonly = 1,
+    srcs = ["mlock.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:cleanup",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:multiprocess_util",
+        "//test/util:rlimit_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "mmap_test",
+    testonly = 1,
+    srcs = ["mmap.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:multiprocess_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "mount_test",
+    testonly = 1,
+    srcs = ["mount.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:mount_util",
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "mremap_test",
+    testonly = 1,
+    srcs = ["mremap.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:logging",
+        "//test/util:memory_util",
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "msync_test",
+    testonly = 1,
+    srcs = ["msync.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "//test/util:memory_util",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "munmap_test",
+    testonly = 1,
+    srcs = ["munmap.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "open_test",
+    testonly = 1,
+    srcs = [
+        "file_base.h",
+        "open.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "open_create_test",
+    testonly = 1,
+    srcs = ["open_create.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:temp_umask",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "packet_socket_raw_test",
+    testonly = 1,
+    srcs = ["packet_socket_raw.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/base:endian",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "packet_socket_test",
+    testonly = 1,
+    srcs = ["packet_socket.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/base:endian",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "pty_test",
+    testonly = 1,
+    srcs = ["pty.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:pty_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "pty_root_test",
+    testonly = 1,
+    srcs = ["pty_root.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/base:core_headers",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:pty_util",
+        "//test/util:test_main",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "partial_bad_buffer_test",
+    testonly = 1,
+    srcs = ["partial_bad_buffer.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "pause_test",
+    testonly = 1,
+    srcs = ["pause.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "ping_socket_test",
+    testonly = 1,
+    srcs = ["ping_socket.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:save_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "pipe_test",
+    testonly = 1,
+    srcs = ["pipe.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "poll_test",
+    testonly = 1,
+    srcs = ["poll.cc"],
+    linkstatic = 1,
+    deps = [
+        ":base_poll_test",
+        "//test/util:eventfd_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "ppoll_test",
+    testonly = 1,
+    srcs = ["ppoll.cc"],
+    linkstatic = 1,
+    deps = [
+        ":base_poll_test",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "arch_prctl_test",
+    testonly = 1,
+    srcs = select_arch(
+        amd64 = ["arch_prctl.cc"],
+        arm64 = [],
+    ),
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "prctl_test",
+    testonly = 1,
+    srcs = ["prctl.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:cleanup",
+        "@com_google_absl//absl/flags:flag",
+        gtest,
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "prctl_setuid_test",
+    testonly = 1,
+    srcs = ["prctl_setuid.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "@com_google_absl//absl/flags:flag",
+        gtest,
+        "//test/util:logging",
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "pread64_test",
+    testonly = 1,
+    srcs = ["pread64.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "preadv_test",
+    testonly = 1,
+    srcs = ["preadv.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:memory_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:timer_util",
+    ],
+)
+
+cc_binary(
+    name = "preadv2_test",
+    testonly = 1,
+    srcs = [
+        "file_base.h",
+        "preadv2.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "priority_test",
+    testonly = 1,
+    srcs = ["priority.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "proc_test",
+    testonly = 1,
+    srcs = ["proc.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:time_util",
+        "//test/util:timer_util",
+    ],
+)
+
+cc_binary(
+    name = "proc_net_test",
+    testonly = 1,
+    srcs = ["proc_net.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "proc_pid_oomscore_test",
+    testonly = 1,
+    srcs = ["proc_pid_oomscore.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:fs_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_binary(
+    name = "proc_pid_smaps_test",
+    testonly = 1,
+    srcs = ["proc_pid_smaps.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/types:optional",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:posix_error",
+        "//test/util:proc_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "proc_pid_uid_gid_map_test",
+    testonly = 1,
+    srcs = ["proc_pid_uid_gid_map.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:logging",
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:save_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:time_util",
+    ],
+)
+
+cc_binary(
+    name = "pselect_test",
+    testonly = 1,
+    srcs = ["pselect.cc"],
+    linkstatic = 1,
+    deps = [
+        ":base_poll_test",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "ptrace_test",
+    testonly = 1,
+    srcs = ["ptrace.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:multiprocess_util",
+        "//test/util:platform_util",
+        "//test/util:signal_util",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:time_util",
+    ],
+)
+
+cc_binary(
+    name = "pwrite64_test",
+    testonly = 1,
+    srcs = ["pwrite64.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "pwritev2_test",
+    testonly = 1,
+    srcs = [
+        "pwritev2.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":file_base",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "raw_socket_hdrincl_test",
+    testonly = 1,
+    srcs = ["raw_socket_hdrincl.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/base:endian",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "raw_socket_test",
+    testonly = 1,
+    srcs = ["raw_socket.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/base:core_headers",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "raw_socket_icmp_test",
+    testonly = 1,
+    srcs = ["raw_socket_icmp.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/base:core_headers",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "read_test",
+    testonly = 1,
+    srcs = ["read.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "readahead_test",
+    testonly = 1,
+    srcs = ["readahead.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "readv_test",
+    testonly = 1,
+    srcs = [
+        "file_base.h",
+        "readv.cc",
+        "readv_common.cc",
+        "readv_common.h",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:timer_util",
+    ],
+)
+
+cc_binary(
+    name = "readv_socket_test",
+    testonly = 1,
+    srcs = [
+        "readv_common.cc",
+        "readv_common.h",
+        "readv_socket.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "rename_test",
+    testonly = 1,
+    srcs = ["rename.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "rlimits_test",
+    testonly = 1,
+    srcs = ["rlimits.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "rseq_test",
+    testonly = 1,
+    srcs = ["rseq.cc"],
+    data = ["//test/syscalls/linux/rseq"],
+    linkstatic = 1,
+    deps = [
+        "//test/syscalls/linux/rseq:lib",
+        gtest,
+        "//test/util:logging",
+        "//test/util:multiprocess_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "rtsignal_test",
+    testonly = 1,
+    srcs = ["rtsignal.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        gtest,
+        "//test/util:logging",
+        "//test/util:posix_error",
+        "//test/util:signal_util",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "sched_test",
+    testonly = 1,
+    srcs = ["sched.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "sched_yield_test",
+    testonly = 1,
+    srcs = ["sched_yield.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "seccomp_test",
+    testonly = 1,
+    srcs = ["seccomp.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/base:core_headers",
+        gtest,
+        "//test/util:logging",
+        "//test/util:memory_util",
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:proc_util",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "select_test",
+    testonly = 1,
+    srcs = ["select.cc"],
+    linkstatic = 1,
+    deps = [
+        ":base_poll_test",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:rlimit_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "sendfile_test",
+    testonly = 1,
+    srcs = ["sendfile.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:eventfd_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "sendfile_socket_test",
+    testonly = 1,
+    srcs = ["sendfile_socket.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        ":ip_socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "splice_test",
+    testonly = 1,
+    srcs = ["splice.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "sigaction_test",
+    testonly = 1,
+    srcs = ["sigaction.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "sigaltstack_test",
+    testonly = 1,
+    srcs = ["sigaltstack.cc"],
+    data = [
+        ":sigaltstack_check",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "//test/util:fs_util",
+        gtest,
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "sigiret_test",
+    testonly = 1,
+    srcs = select_arch(
+        amd64 = ["sigiret.cc"],
+        arm64 = [],
+    ),
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:logging",
+        "//test/util:signal_util",
+        "//test/util:test_util",
+        "//test/util:timer_util",
+    ] + select_arch(
+        amd64 = [],
+        arm64 = ["//test/util:test_main"],
+    ),
+)
+
+cc_binary(
+    name = "signalfd_test",
+    testonly = 1,
+    srcs = ["signalfd.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/synchronization",
+        gtest,
+        "//test/util:logging",
+        "//test/util:posix_error",
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "sigprocmask_test",
+    testonly = 1,
+    srcs = ["sigprocmask.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "sigstop_test",
+    testonly = 1,
+    srcs = ["sigstop.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "sigtimedwait_test",
+    testonly = 1,
+    srcs = ["sigtimedwait.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:signal_util",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:timer_util",
+    ],
+)
+
+cc_library(
+    name = "socket_generic_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_generic.cc",
+    ],
+    hdrs = [
+        "socket_generic.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_binary(
+    name = "socket_stress_test",
+    testonly = 1,
+    srcs = [
+        "socket_generic_stress.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_library(
+    name = "socket_unix_dgram_test_cases",
+    testonly = 1,
+    srcs = ["socket_unix_dgram.cc"],
+    hdrs = ["socket_unix_dgram.h"],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_unix_seqpacket_test_cases",
+    testonly = 1,
+    srcs = ["socket_unix_seqpacket.cc"],
+    hdrs = ["socket_unix_seqpacket.h"],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_ip_tcp_generic_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_ip_tcp_generic.cc",
+    ],
+    hdrs = [
+        "socket_ip_tcp_generic.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_non_blocking_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_non_blocking.cc",
+    ],
+    hdrs = [
+        "socket_non_blocking.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_unix_non_stream_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_unix_non_stream.cc",
+    ],
+    hdrs = [
+        "socket_unix_non_stream.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_non_stream_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_non_stream.cc",
+    ],
+    hdrs = [
+        "socket_non_stream.h",
+    ],
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_ip_udp_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_ip_udp_generic.cc",
+    ],
+    hdrs = [
+        "socket_ip_udp_generic.h",
+    ],
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_ipv4_udp_unbound_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_ipv4_udp_unbound.cc",
+    ],
+    hdrs = [
+        "socket_ipv4_udp_unbound.h",
+    ],
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        "@com_google_absl//absl/memory",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_ipv4_udp_unbound_external_networking_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_ipv4_udp_unbound_external_networking.cc",
+    ],
+    hdrs = [
+        "socket_ipv4_udp_unbound_external_networking.h",
+    ],
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_ipv4_tcp_unbound_external_networking_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_ipv4_tcp_unbound_external_networking.cc",
+    ],
+    hdrs = [
+        "socket_ipv4_tcp_unbound_external_networking.h",
+    ],
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_binary(
+    name = "socket_abstract_test",
+    testonly = 1,
+    srcs = [
+        "socket_abstract.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_generic_test_cases",
+        ":socket_test_util",
+        ":socket_unix_cmsg_test_cases",
+        ":socket_unix_test_cases",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_abstract_non_blocking_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_abstract_nonblock.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_non_blocking_test_cases",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_dgram_local_test",
+    testonly = 1,
+    srcs = ["socket_unix_dgram_local.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_non_stream_test_cases",
+        ":socket_test_util",
+        ":socket_unix_dgram_test_cases",
+        ":socket_unix_non_stream_test_cases",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_dgram_non_blocking_test",
+    testonly = 1,
+    srcs = ["socket_unix_dgram_non_blocking.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_seqpacket_local_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_seqpacket_local.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_non_stream_test_cases",
+        ":socket_test_util",
+        ":socket_unix_non_stream_test_cases",
+        ":socket_unix_seqpacket_test_cases",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_stream_test",
+    testonly = 1,
+    srcs = ["socket_unix_stream.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ip_tcp_generic_loopback_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_tcp_generic_loopback.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_ip_tcp_generic_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ip_tcp_udp_generic_loopback_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_tcp_udp_generic.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ip_tcp_loopback_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_tcp_loopback.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_generic_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ip_tcp_loopback_non_blocking_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_tcp_loopback_nonblock.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_non_blocking_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ip_udp_loopback_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_udp_loopback.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_generic_test_cases",
+        ":socket_ip_udp_test_cases",
+        ":socket_non_stream_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ipv4_udp_unbound_external_networking_test",
+    testonly = 1,
+    srcs = [
+        "socket_ipv4_udp_unbound_external_networking_test.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_ipv4_udp_unbound_external_networking_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ipv4_tcp_unbound_external_networking_test",
+    testonly = 1,
+    srcs = [
+        "socket_ipv4_tcp_unbound_external_networking_test.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_ipv4_tcp_unbound_external_networking_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_bind_to_device_test",
+    testonly = 1,
+    srcs = [
+        "socket_bind_to_device.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_bind_to_device_util",
+        ":socket_test_util",
+        "//test/util:capability_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_bind_to_device_sequence_test",
+    testonly = 1,
+    srcs = [
+        "socket_bind_to_device_sequence.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_bind_to_device_util",
+        ":socket_test_util",
+        "//test/util:capability_util",
+        "@com_google_absl//absl/container:node_hash_map",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_bind_to_device_distribution_test",
+    testonly = 1,
+    srcs = [
+        "socket_bind_to_device_distribution.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_bind_to_device_util",
+        ":socket_test_util",
+        "//test/util:capability_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ip_udp_loopback_non_blocking_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_udp_loopback_nonblock.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_non_blocking_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ipv4_udp_unbound_loopback_test",
+    testonly = 1,
+    srcs = [
+        "socket_ipv4_udp_unbound_loopback.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_ipv4_udp_unbound_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_ip_unbound_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_unbound.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_domain_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_domain.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_generic_test_cases",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_domain_non_blocking_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_pair_nonblock.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_non_blocking_test_cases",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_filesystem_test",
+    testonly = 1,
+    srcs = [
+        "socket_filesystem.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_generic_test_cases",
+        ":socket_test_util",
+        ":socket_unix_cmsg_test_cases",
+        ":socket_unix_test_cases",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_filesystem_non_blocking_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_filesystem_nonblock.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_non_blocking_test_cases",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_inet_loopback_test",
+    testonly = 1,
+    srcs = ["socket_inet_loopback.cc"],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:save_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_inet_loopback_nogotsan_test",
+    testonly = 1,
+    srcs = ["socket_inet_loopback_nogotsan.cc"],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:save_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_netlink_test",
+    testonly = 1,
+    srcs = ["socket_netlink.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_netlink_route_test",
+    testonly = 1,
+    srcs = ["socket_netlink_route.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_netlink_route_util",
+        ":socket_netlink_util",
+        ":socket_test_util",
+        "//test/util:capability_util",
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings:str_format",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_netlink_uevent_test",
+    testonly = 1,
+    srcs = ["socket_netlink_uevent.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_netlink_util",
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+# These socket tests are in a library because the test cases are shared
+# across several test build targets.
+cc_library(
+    name = "socket_stream_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_stream.cc",
+    ],
+    hdrs = [
+        "socket_stream.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_blocking_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_blocking.cc",
+    ],
+    hdrs = [
+        "socket_blocking.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:timer_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_unix_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_unix.cc",
+    ],
+    hdrs = [
+        "socket_unix.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_unix_cmsg_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_unix_cmsg.cc",
+    ],
+    hdrs = [
+        "socket_unix_cmsg.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_stream_blocking_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_stream_blocking.cc",
+    ],
+    hdrs = [
+        "socket_stream_blocking.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:timer_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_stream_nonblocking_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_stream_nonblock.cc",
+    ],
+    hdrs = [
+        "socket_stream_nonblock.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_non_stream_blocking_test_cases",
+    testonly = 1,
+    srcs = [
+        "socket_non_stream_blocking.cc",
+    ],
+    hdrs = [
+        "socket_non_stream_blocking.h",
+    ],
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "socket_bind_to_device_util",
+    testonly = 1,
+    srcs = [
+        "socket_bind_to_device_util.cc",
+    ],
+    hdrs = [
+        "socket_bind_to_device_util.h",
+    ],
+    deps = [
+        "//test/util:test_util",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = 1,
+)
+
+cc_binary(
+    name = "socket_stream_local_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_stream_local.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_stream_test_cases",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_stream_blocking_local_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_stream_blocking_local.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_stream_blocking_test_cases",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_stream_blocking_tcp_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_tcp_loopback_blocking.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_stream_blocking_test_cases",
+        ":socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_stream_nonblock_local_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_stream_nonblock_local.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_stream_nonblocking_test_cases",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_unbound_dgram_test",
+    testonly = 1,
+    srcs = ["socket_unix_unbound_dgram.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_unbound_abstract_test",
+    testonly = 1,
+    srcs = ["socket_unix_unbound_abstract.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_unbound_filesystem_test",
+    testonly = 1,
+    srcs = ["socket_unix_unbound_filesystem.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_blocking_local_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_blocking_local.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_blocking_test_cases",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_blocking_ip_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_loopback_blocking.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_blocking_test_cases",
+        ":socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_non_stream_blocking_local_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_non_stream_blocking_local.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_non_stream_blocking_test_cases",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_non_stream_blocking_udp_test",
+    testonly = 1,
+    srcs = [
+        "socket_ip_udp_loopback_blocking.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_non_stream_blocking_test_cases",
+        ":socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_pair_test",
+    testonly = 1,
+    srcs = [
+        "socket_unix_pair.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":socket_unix_cmsg_test_cases",
+        ":socket_unix_test_cases",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_unbound_seqpacket_test",
+    testonly = 1,
+    srcs = ["socket_unix_unbound_seqpacket.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_unix_unbound_stream_test",
+    testonly = 1,
+    srcs = ["socket_unix_unbound_stream.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "socket_netdevice_test",
+    testonly = 1,
+    srcs = ["socket_netdevice.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_netlink_util",
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/base:endian",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "stat_test",
+    testonly = 1,
+    srcs = [
+        "file_base.h",
+        "stat.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "stat_times_test",
+    testonly = 1,
+    srcs = ["stat_times.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "statfs_test",
+    testonly = 1,
+    srcs = [
+        "file_base.h",
+        "statfs.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "symlink_test",
+    testonly = 1,
+    srcs = ["symlink.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "sync_test",
+    testonly = 1,
+    srcs = ["sync.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "sysinfo_test",
+    testonly = 1,
+    srcs = ["sysinfo.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "syslog_test",
+    testonly = 1,
+    srcs = ["syslog.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "sysret_test",
+    testonly = 1,
+    srcs = ["sysret.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:logging",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "tcp_socket_test",
+    testonly = 1,
+    srcs = ["tcp_socket.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "tgkill_test",
+    testonly = 1,
+    srcs = ["tgkill.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "time_test",
+    testonly = 1,
+    srcs = ["time.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:proc_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "timerfd_test",
+    testonly = 1,
+    srcs = ["timerfd.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_binary(
+    name = "timers_test",
+    testonly = 1,
+    srcs = ["timers.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:signal_util",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "tkill_test",
+    testonly = 1,
+    srcs = ["tkill.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:logging",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "truncate_test",
+    testonly = 1,
+    srcs = ["truncate.cc"],
+    linkstatic = 1,
+    deps = [
+        ":file_base",
+        "//test/util:capability_util",
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "tuntap_test",
+    testonly = 1,
+    srcs = ["tuntap.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        gtest,
+        ":socket_netlink_route_util",
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_binary(
+    name = "tuntap_hostinet_test",
+    testonly = 1,
+    srcs = ["tuntap_hostinet.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_library(
+    name = "udp_socket_test_cases",
+    testonly = 1,
+    srcs = [
+        "udp_socket_errqueue_test_case.cc",
+        "udp_socket_test_cases.cc",
+    ],
+    hdrs = ["udp_socket_test_cases.h"],
+    defines = select_system(),
+    deps = [
+        ":ip_socket_test_util",
+        ":socket_test_util",
+        ":unix_domain_socket_test_util",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:file_descriptor",
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_binary(
+    name = "udp_socket_test",
+    testonly = 1,
+    srcs = ["udp_socket.cc"],
+    linkstatic = 1,
+    deps = [
+        ":udp_socket_test_cases",
+    ],
+)
+
+cc_binary(
+    name = "udp_bind_test",
+    testonly = 1,
+    srcs = ["udp_bind.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "uidgid_test",
+    testonly = 1,
+    srcs = ["uidgid.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:uid_util",
+    ],
+)
+
+cc_binary(
+    name = "uname_test",
+    testonly = 1,
+    srcs = ["uname.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "unlink_test",
+    testonly = 1,
+    srcs = ["unlink.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "unshare_test",
+    testonly = 1,
+    srcs = ["unshare.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/synchronization",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "utimes_test",
+    testonly = 1,
+    srcs = ["utimes.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_binary(
+    name = "vdso_test",
+    testonly = 1,
+    srcs = ["vdso.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:fs_util",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:proc_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "vfork_test",
+    testonly = 1,
+    srcs = ["vfork.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:multiprocess_util",
+        "//test/util:test_util",
+        "//test/util:time_util",
+    ],
+)
+
+cc_binary(
+    name = "wait_test",
+    testonly = 1,
+    srcs = ["wait.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:logging",
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:signal_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+        "//test/util:time_util",
+    ],
+)
+
+cc_binary(
+    name = "write_test",
+    testonly = 1,
+    srcs = ["write.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:cleanup",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "memory_accounting_test",
+    testonly = 1,
+    srcs = ["memory_accounting.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "network_namespace_test",
+    testonly = 1,
+    srcs = ["network_namespace.cc"],
+    linkstatic = 1,
+    deps = [
+        ":socket_test_util",
+        gtest,
+        "//test/util:capability_util",
+        "//test/util:posix_error",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "semaphore_test",
+    testonly = 1,
+    srcs = ["semaphore.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "//test/util:thread_util",
+    ],
+)
+
+cc_binary(
+    name = "shm_test",
+    testonly = 1,
+    srcs = ["shm.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:multiprocess_util",
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_binary(
+    name = "fadvise64_test",
+    testonly = 1,
+    srcs = ["fadvise64.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        gtest,
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "vdso_clock_gettime_test",
+    testonly = 1,
+    srcs = ["vdso_clock_gettime.cc"],
+    linkstatic = 1,
+    deps = [
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "vsyscall_test",
+    testonly = 1,
+    srcs = ["vsyscall.cc"],
+    linkstatic = 1,
+    deps = [
+        gtest,
+        "//test/util:proc_util",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "proc_net_unix_test",
+    testonly = 1,
+    srcs = ["proc_net_unix.cc"],
+    linkstatic = 1,
+    deps = [
+        ":unix_domain_socket_test_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "memfd_test",
+    testonly = 1,
+    srcs = ["memfd.cc"],
+    linkstatic = 1,
+    deps = [
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        gtest,
+        "//test/util:memory_util",
+        "//test/util:multiprocess_util",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "proc_net_tcp_test",
+    testonly = 1,
+    srcs = ["proc_net_tcp.cc"],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "proc_net_udp_test",
+    testonly = 1,
+    srcs = ["proc_net_udp.cc"],
+    linkstatic = 1,
+    deps = [
+        ":ip_socket_test_util",
+        "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
+
+cc_binary(
+    name = "xattr_test",
+    testonly = 1,
+    srcs = [
+        "file_base.h",
+        "xattr.cc",
+    ],
+    linkstatic = 1,
+    deps = [
+        "//test/util:capability_util",
+        "//test/util:file_descriptor",
+        "//test/util:fs_util",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/strings",
+        gtest,
+        "//test/util:posix_error",
+        "//test/util:temp_path",
+        "//test/util:test_main",
+        "//test/util:test_util",
+    ],
+)
diff --git a/test/syscalls/linux/accept_bind.cc b/test/syscalls/linux/accept_bind.cc
new file mode 100644
index 000000000..f65a14fb8
--- /dev/null
+++ b/test/syscalls/linux/accept_bind.cc
@@ -0,0 +1,641 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(AllSocketPairTest, Listen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5),
+              SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, ListenIncreaseBacklog) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 10),
+              SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, ListenDecreaseBacklog) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 1),
+              SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, ListenWithoutBind) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(listen(sockets->first_fd(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, DoubleBind) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->second_addr(),
+                   sockets->second_addr_size()),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, BindListenBind) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->second_addr(),
+                   sockets->second_addr_size()),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, DoubleListen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, DoubleConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallFailsWithErrno(EISCONN));
+}
+
+TEST_P(AllSocketPairTest, Connect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, ConnectWithWrongType) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int type;
+  socklen_t typelen = sizeof(type);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_TYPE, &type, &typelen),
+      SyscallSucceeds());
+  switch (type) {
+    case SOCK_STREAM:
+      type = SOCK_SEQPACKET;
+      break;
+    case SOCK_SEQPACKET:
+      type = SOCK_STREAM;
+      break;
+  }
+
+  const FileDescriptor another_socket =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, type, 0));
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  if (sockets->first_addr()->sa_data[0] != 0) {
+    ASSERT_THAT(connect(another_socket.get(), sockets->first_addr(),
+                        sockets->first_addr_size()),
+                SyscallFailsWithErrno(EPROTOTYPE));
+  } else {
+    ASSERT_THAT(connect(another_socket.get(), sockets->first_addr(),
+                        sockets->first_addr_size()),
+                SyscallFailsWithErrno(ECONNREFUSED));
+  }
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, ConnectNonListening) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+TEST_P(AllSocketPairTest, ConnectToFilePath) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct sockaddr_un addr = {};
+  addr.sun_family = AF_UNIX;
+  constexpr char kPath[] = "/tmp";
+  memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+  ASSERT_THAT(
+      connect(sockets->second_fd(),
+              reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)),
+      SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+TEST_P(AllSocketPairTest, ConnectToInvalidAbstractPath) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct sockaddr_un addr = {};
+  addr.sun_family = AF_UNIX;
+  constexpr char kPath[] = "\0nonexistent";
+  memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+  ASSERT_THAT(
+      connect(sockets->second_fd(),
+              reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)),
+      SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+TEST_P(AllSocketPairTest, SelfConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->first_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, ConnectWithoutListen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+TEST_P(AllSocketPairTest, Accept) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  int accepted = -1;
+  ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallSucceeds());
+  ASSERT_THAT(close(accepted), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, AcceptValidAddrLen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  int accepted = -1;
+  struct sockaddr_un addr = {};
+  socklen_t addr_len = sizeof(addr);
+  ASSERT_THAT(
+      accepted = accept(sockets->first_fd(),
+                        reinterpret_cast<struct sockaddr*>(&addr), &addr_len),
+      SyscallSucceeds());
+  ASSERT_THAT(close(accepted), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, AcceptNegativeAddrLen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  // With a negative addr_len, accept returns EINVAL,
+  struct sockaddr_un addr = {};
+  socklen_t addr_len = -1;
+  ASSERT_THAT(accept(sockets->first_fd(),
+                     reinterpret_cast<struct sockaddr*>(&addr), &addr_len),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, AcceptLargePositiveAddrLen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  // With a large (positive) addr_len, accept does not return EINVAL.
+  int accepted = -1;
+  char addr_buf[200];
+  socklen_t addr_len = sizeof(addr_buf);
+  ASSERT_THAT(accepted = accept(sockets->first_fd(),
+                                reinterpret_cast<struct sockaddr*>(addr_buf),
+                                &addr_len),
+              SyscallSucceeds());
+  // addr_len should have been updated by accept().
+  EXPECT_LT(addr_len, sizeof(addr_buf));
+  ASSERT_THAT(close(accepted), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, AcceptVeryLargePositiveAddrLen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  // With a large (positive) addr_len, accept does not return EINVAL.
+  int accepted = -1;
+  char addr_buf[2000];
+  socklen_t addr_len = sizeof(addr_buf);
+  ASSERT_THAT(accepted = accept(sockets->first_fd(),
+                                reinterpret_cast<struct sockaddr*>(addr_buf),
+                                &addr_len),
+              SyscallSucceeds());
+  // addr_len should have been updated by accept().
+  EXPECT_LT(addr_len, sizeof(addr_buf));
+  ASSERT_THAT(close(accepted), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, AcceptWithoutBind) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, AcceptWithoutListen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, GetRemoteAddress) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  socklen_t addr_len = sockets->first_addr_size();
+  struct sockaddr_storage addr = {};
+  ASSERT_THAT(
+      getpeername(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len),
+      SyscallSucceeds());
+  EXPECT_EQ(addr_len, sockets->first_addr_len());
+  EXPECT_EQ(0, memcmp(&addr, sockets->first_addr(), sockets->first_addr_len()));
+}
+
+TEST_P(AllSocketPairTest, UnboundGetLocalAddress) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  socklen_t addr_len = sockets->first_addr_size();
+  struct sockaddr_storage addr = {};
+  ASSERT_THAT(
+      getsockname(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len),
+      SyscallSucceeds());
+  EXPECT_EQ(addr_len, 2);
+  EXPECT_EQ(
+      memcmp(&addr, sockets->second_addr(),
+             std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+      0);
+}
+
+TEST_P(AllSocketPairTest, BoundGetLocalAddress) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+                   sockets->second_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  socklen_t addr_len = sockets->first_addr_size();
+  struct sockaddr_storage addr = {};
+  ASSERT_THAT(
+      getsockname(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len),
+      SyscallSucceeds());
+  EXPECT_EQ(addr_len, sockets->second_addr_len());
+  EXPECT_EQ(
+      memcmp(&addr, sockets->second_addr(),
+             std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+      0);
+}
+
+TEST_P(AllSocketPairTest, BoundConnector) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+                   sockets->second_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, UnboundSenderAddr) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  int accepted = -1;
+  ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallSucceeds());
+  FileDescriptor accepted_fd(accepted);
+
+  int i = 0;
+  ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+              SyscallSucceedsWithValue(sizeof(i)));
+
+  struct sockaddr_storage addr;
+  socklen_t addr_len = sizeof(addr);
+  ASSERT_THAT(
+      RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0,
+                           reinterpret_cast<sockaddr*>(&addr), &addr_len),
+      SyscallSucceedsWithValue(sizeof(i)));
+  EXPECT_EQ(addr_len, 0);
+}
+
+TEST_P(AllSocketPairTest, BoundSenderAddr) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+                   sockets->second_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  int accepted = -1;
+  ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallSucceeds());
+  FileDescriptor accepted_fd(accepted);
+
+  int i = 0;
+  ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+              SyscallSucceedsWithValue(sizeof(i)));
+
+  struct sockaddr_storage addr;
+  socklen_t addr_len = sizeof(addr);
+  ASSERT_THAT(
+      RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0,
+                           reinterpret_cast<sockaddr*>(&addr), &addr_len),
+      SyscallSucceedsWithValue(sizeof(i)));
+  EXPECT_EQ(addr_len, sockets->second_addr_len());
+  EXPECT_EQ(
+      memcmp(&addr, sockets->second_addr(),
+             std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+      0);
+}
+
+TEST_P(AllSocketPairTest, BindAfterConnectSenderAddr) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+                   sockets->second_addr_size()),
+              SyscallSucceeds());
+
+  int accepted = -1;
+  ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallSucceeds());
+  FileDescriptor accepted_fd(accepted);
+
+  int i = 0;
+  ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+              SyscallSucceedsWithValue(sizeof(i)));
+
+  struct sockaddr_storage addr;
+  socklen_t addr_len = sizeof(addr);
+  ASSERT_THAT(
+      RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0,
+                           reinterpret_cast<sockaddr*>(&addr), &addr_len),
+      SyscallSucceedsWithValue(sizeof(i)));
+  EXPECT_EQ(addr_len, sockets->second_addr_len());
+  EXPECT_EQ(
+      memcmp(&addr, sockets->second_addr(),
+             std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+      0);
+}
+
+TEST_P(AllSocketPairTest, BindAfterAcceptSenderAddr) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  int accepted = -1;
+  ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallSucceeds());
+  FileDescriptor accepted_fd(accepted);
+
+  ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+                   sockets->second_addr_size()),
+              SyscallSucceeds());
+
+  int i = 0;
+  ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+              SyscallSucceedsWithValue(sizeof(i)));
+
+  struct sockaddr_storage addr;
+  socklen_t addr_len = sizeof(addr);
+  ASSERT_THAT(
+      RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0,
+                           reinterpret_cast<sockaddr*>(&addr), &addr_len),
+      SyscallSucceedsWithValue(sizeof(i)));
+  EXPECT_EQ(addr_len, sockets->second_addr_len());
+  EXPECT_EQ(
+      memcmp(&addr, sockets->second_addr(),
+             std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+      0);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, AllSocketPairTest,
+    ::testing::ValuesIn(VecCat<SocketPairKind>(
+        ApplyVec<SocketPairKind>(
+            FilesystemUnboundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET},
+                                   List<int>{0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(
+            AbstractUnboundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET},
+                                   List<int>{0, SOCK_NONBLOCK})))));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/accept_bind_stream.cc b/test/syscalls/linux/accept_bind_stream.cc
new file mode 100644
index 000000000..4857f160b
--- /dev/null
+++ b/test/syscalls/linux/accept_bind_stream.cc
@@ -0,0 +1,92 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(AllSocketPairTest, BoundSenderAddrCoalesced) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  int accepted = -1;
+  ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallSucceeds());
+  FileDescriptor closer(accepted);
+
+  int i = 0;
+  ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+              SyscallSucceedsWithValue(sizeof(i)));
+
+  ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+                   sockets->second_addr_size()),
+              SyscallSucceeds());
+
+  i = 0;
+  ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+              SyscallSucceedsWithValue(sizeof(i)));
+
+  int ri[2] = {0, 0};
+  struct sockaddr_storage addr;
+  socklen_t addr_len = sizeof(addr);
+  ASSERT_THAT(
+      RetryEINTR(recvfrom)(accepted, ri, sizeof(ri), 0,
+                           reinterpret_cast<sockaddr*>(&addr), &addr_len),
+      SyscallSucceedsWithValue(sizeof(ri)));
+  EXPECT_EQ(addr_len, sockets->second_addr_len());
+
+  EXPECT_EQ(
+      memcmp(&addr, sockets->second_addr(),
+             std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+      0);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, AllSocketPairTest,
+    ::testing::ValuesIn(VecCat<SocketPairKind>(
+        ApplyVec<SocketPairKind>(FilesystemUnboundUnixDomainSocketPair,
+                                 AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                                        List<int>{
+                                                            0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(
+            AbstractUnboundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                   List<int>{0, SOCK_NONBLOCK})))));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/access.cc b/test/syscalls/linux/access.cc
new file mode 100644
index 000000000..bcc25cef4
--- /dev/null
+++ b/test/syscalls/linux/access.cc
@@ -0,0 +1,170 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::Ge;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class AccessTest : public ::testing::Test {
+ public:
+  std::string CreateTempFile(int perm) {
+    const std::string path = NewTempAbsPath();
+    const int fd = open(path.c_str(), O_CREAT | O_RDONLY, perm);
+    TEST_PCHECK(fd > 0);
+    TEST_PCHECK(close(fd) == 0);
+    return path;
+  }
+
+ protected:
+  // SetUp creates various configurations of files.
+  void SetUp() override {
+    // Move to the temporary directory. This allows us to reason more easily
+    // about absolute and relative paths.
+    ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+
+    // Create an empty file, standard permissions.
+    relfile_ = NewTempRelPath();
+    int fd;
+    ASSERT_THAT(fd = open(relfile_.c_str(), O_CREAT | O_TRUNC, 0644),
+                SyscallSucceedsWithValue(Ge(0)));
+    ASSERT_THAT(close(fd), SyscallSucceeds());
+    absfile_ = GetAbsoluteTestTmpdir() + "/" + relfile_;
+
+    // Create an empty directory, no writable permissions.
+    absdir_ = NewTempAbsPath();
+    reldir_ = JoinPath(Basename(absdir_), "");
+    ASSERT_THAT(mkdir(reldir_.c_str(), 0555), SyscallSucceeds());
+
+    // This file doesn't exist.
+    relnone_ = NewTempRelPath();
+    absnone_ = GetAbsoluteTestTmpdir() + "/" + relnone_;
+  }
+
+  // TearDown unlinks created files.
+  void TearDown() override {
+    ASSERT_THAT(unlink(absfile_.c_str()), SyscallSucceeds());
+    ASSERT_THAT(rmdir(absdir_.c_str()), SyscallSucceeds());
+  }
+
+  std::string relfile_;
+  std::string reldir_;
+
+  std::string absfile_;
+  std::string absdir_;
+
+  std::string relnone_;
+  std::string absnone_;
+};
+
+TEST_F(AccessTest, RelativeFile) {
+  EXPECT_THAT(access(relfile_.c_str(), R_OK), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, RelativeDir) {
+  EXPECT_THAT(access(reldir_.c_str(), R_OK | X_OK), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, AbsFile) {
+  EXPECT_THAT(access(absfile_.c_str(), R_OK), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, AbsDir) {
+  EXPECT_THAT(access(absdir_.c_str(), R_OK | X_OK), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, RelDoesNotExist) {
+  EXPECT_THAT(access(relnone_.c_str(), R_OK), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_F(AccessTest, AbsDoesNotExist) {
+  EXPECT_THAT(access(absnone_.c_str(), R_OK), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_F(AccessTest, InvalidMode) {
+  EXPECT_THAT(access(relfile_.c_str(), 0xffffffff),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(AccessTest, NoPerms) {
+  // Drop capabilities that allow us to override permissions. We must drop
+  // PERMITTED because access() checks those instead of EFFECTIVE.
+  ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE));
+  ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH));
+
+  EXPECT_THAT(access(absdir_.c_str(), W_OK), SyscallFailsWithErrno(EACCES));
+}
+
+TEST_F(AccessTest, InvalidName) {
+  EXPECT_THAT(access(reinterpret_cast<char*>(0x1234), W_OK),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(AccessTest, UsrReadOnly) {
+  // Drop capabilities that allow us to override permissions. We must drop
+  // PERMITTED because access() checks those instead of EFFECTIVE.
+  ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE));
+  ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH));
+
+  const std::string filename = CreateTempFile(0400);
+  EXPECT_THAT(access(filename.c_str(), R_OK), SyscallSucceeds());
+  EXPECT_THAT(access(filename.c_str(), W_OK), SyscallFailsWithErrno(EACCES));
+  EXPECT_THAT(access(filename.c_str(), X_OK), SyscallFailsWithErrno(EACCES));
+  EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, UsrReadExec) {
+  // Drop capabilities that allow us to override permissions. We must drop
+  // PERMITTED because access() checks those instead of EFFECTIVE.
+  ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE));
+  ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH));
+
+  const std::string filename = CreateTempFile(0500);
+  EXPECT_THAT(access(filename.c_str(), R_OK | X_OK), SyscallSucceeds());
+  EXPECT_THAT(access(filename.c_str(), W_OK), SyscallFailsWithErrno(EACCES));
+  EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, UsrReadWrite) {
+  const std::string filename = CreateTempFile(0600);
+  EXPECT_THAT(access(filename.c_str(), R_OK | W_OK), SyscallSucceeds());
+  EXPECT_THAT(access(filename.c_str(), X_OK), SyscallFailsWithErrno(EACCES));
+  EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, UsrReadWriteExec) {
+  const std::string filename = CreateTempFile(0700);
+  EXPECT_THAT(access(filename.c_str(), R_OK | W_OK | X_OK), SyscallSucceeds());
+  EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/affinity.cc b/test/syscalls/linux/affinity.cc
new file mode 100644
index 000000000..128364c34
--- /dev/null
+++ b/test/syscalls/linux/affinity.cc
@@ -0,0 +1,242 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_split.h"
+#include "test/util/cleanup.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// These tests are for both the sched_getaffinity(2) and sched_setaffinity(2)
+// syscalls.
+class AffinityTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    EXPECT_THAT(
+        // Needs use the raw syscall to get the actual size.
+        cpuset_size_ = syscall(SYS_sched_getaffinity, /*pid=*/0,
+                               sizeof(cpu_set_t), &mask_),
+        SyscallSucceeds());
+    // Lots of tests rely on having more than 1 logical processor available.
+    EXPECT_GT(CPU_COUNT(&mask_), 1);
+  }
+
+  static PosixError ClearLowestBit(cpu_set_t* mask, size_t cpus) {
+    const size_t mask_size = CPU_ALLOC_SIZE(cpus);
+    for (size_t n = 0; n < cpus; ++n) {
+      if (CPU_ISSET_S(n, mask_size, mask)) {
+        CPU_CLR_S(n, mask_size, mask);
+        return NoError();
+      }
+    }
+    return PosixError(EINVAL, "No bit to clear, mask is empty");
+  }
+
+  PosixError ClearLowestBit() { return ClearLowestBit(&mask_, CPU_SETSIZE); }
+
+  // Stores the initial cpu mask for this process.
+  cpu_set_t mask_ = {};
+  int cpuset_size_ = 0;
+};
+
+// sched_getaffinity(2) is implemented.
+TEST_F(AffinityTest, SchedGetAffinityImplemented) {
+  EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_),
+              SyscallSucceeds());
+}
+
+// PID is not found.
+TEST_F(AffinityTest, SchedGetAffinityInvalidPID) {
+  // Flaky, but it's tough to avoid a race condition when finding an unused pid
+  EXPECT_THAT(sched_getaffinity(/*pid=*/INT_MAX - 1, sizeof(cpu_set_t), &mask_),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+// PID is not found.
+TEST_F(AffinityTest, SchedSetAffinityInvalidPID) {
+  // Flaky, but it's tough to avoid a race condition when finding an unused pid
+  EXPECT_THAT(sched_setaffinity(/*pid=*/INT_MAX - 1, sizeof(cpu_set_t), &mask_),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+TEST_F(AffinityTest, SchedSetAffinityZeroMask) {
+  CPU_ZERO(&mask_);
+  EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// N.B. This test case relies on cpuset_size_ larger than the actual number of
+// of all existing CPUs. Check your machine if the test fails.
+TEST_F(AffinityTest, SchedSetAffinityNonexistentCPUDropped) {
+  cpu_set_t mask = mask_;
+  // Add a nonexistent CPU.
+  //
+  // The number needs to be larger than the possible number of CPU available,
+  // but smaller than the number of the CPU that the kernel claims to support --
+  // it's implicitly returned by raw sched_getaffinity syscall.
+  CPU_SET(cpuset_size_ * 8 - 1, &mask);
+  EXPECT_THAT(
+      // Use raw syscall because it will be rejected by the libc wrapper
+      // otherwise.
+      syscall(SYS_sched_setaffinity, /*pid=*/0, sizeof(cpu_set_t), &mask),
+      SyscallSucceeds())
+      << "failed with cpumask : " << CPUSetToString(mask)
+      << ", cpuset_size_ : " << cpuset_size_;
+  cpu_set_t newmask;
+  EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &newmask),
+              SyscallSucceeds());
+  EXPECT_TRUE(CPU_EQUAL(&mask_, &newmask))
+      << "got: " << CPUSetToString(newmask)
+      << " != expected: " << CPUSetToString(mask_);
+}
+
+TEST_F(AffinityTest, SchedSetAffinityOnlyNonexistentCPUFails) {
+  // Make an empty cpu set.
+  CPU_ZERO(&mask_);
+  // Add a nonexistent CPU.
+  //
+  // The number needs to be larger than the possible number of CPU available,
+  // but smaller than the number of the CPU that the kernel claims to support --
+  // it's implicitly returned by raw sched_getaffinity syscall.
+  int cpu = cpuset_size_ * 8 - 1;
+  if (cpu <= NumCPUs()) {
+    GTEST_SKIP() << "Skipping test: cpu " << cpu << " exists";
+  }
+  CPU_SET(cpu, &mask_);
+  EXPECT_THAT(
+      // Use raw syscall because it will be rejected by the libc wrapper
+      // otherwise.
+      syscall(SYS_sched_setaffinity, /*pid=*/0, sizeof(cpu_set_t), &mask_),
+      SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(AffinityTest, SchedSetAffinityInvalidSize) {
+  EXPECT_GT(cpuset_size_, 0);
+  // Not big enough.
+  EXPECT_THAT(sched_getaffinity(/*pid=*/0, cpuset_size_ - 1, &mask_),
+              SyscallFailsWithErrno(EINVAL));
+  // Not a multiple of word size.
+  EXPECT_THAT(sched_getaffinity(/*pid=*/0, cpuset_size_ + 1, &mask_),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(AffinityTest, Sanity) {
+  ASSERT_NO_ERRNO(ClearLowestBit());
+  EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_),
+              SyscallSucceeds());
+  cpu_set_t newmask;
+  EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &newmask),
+              SyscallSucceeds());
+  EXPECT_TRUE(CPU_EQUAL(&mask_, &newmask))
+      << "got: " << CPUSetToString(newmask)
+      << " != expected: " << CPUSetToString(mask_);
+}
+
+TEST_F(AffinityTest, NewThread) {
+  SKIP_IF(CPU_COUNT(&mask_) < 3);
+  ASSERT_NO_ERRNO(ClearLowestBit());
+  ASSERT_NO_ERRNO(ClearLowestBit());
+  EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_),
+              SyscallSucceeds());
+  ScopedThread([this]() {
+    cpu_set_t child_mask;
+    ASSERT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask),
+                SyscallSucceeds());
+    ASSERT_TRUE(CPU_EQUAL(&child_mask, &mask_))
+        << "child cpu mask: " << CPUSetToString(child_mask)
+        << " != parent cpu mask: " << CPUSetToString(mask_);
+  });
+}
+
+TEST_F(AffinityTest, ConsistentWithProcCpuInfo) {
+  // Count how many cpus are shown in /proc/cpuinfo.
+  std::string cpuinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo"));
+  int count = 0;
+  for (auto const& line : absl::StrSplit(cpuinfo, '\n')) {
+    if (absl::StartsWith(line, "processor")) {
+      count++;
+    }
+  }
+  EXPECT_GE(count, CPU_COUNT(&mask_));
+}
+
+TEST_F(AffinityTest, ConsistentWithProcStat) {
+  // Count how many cpus are shown in /proc/stat.
+  std::string stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
+  int count = 0;
+  for (auto const& line : absl::StrSplit(stat, '\n')) {
+    if (absl::StartsWith(line, "cpu") && !absl::StartsWith(line, "cpu ")) {
+      count++;
+    }
+  }
+  EXPECT_GE(count, CPU_COUNT(&mask_));
+}
+
+TEST_F(AffinityTest, SmallCpuMask) {
+  const int num_cpus = NumCPUs();
+  const size_t mask_size = CPU_ALLOC_SIZE(num_cpus);
+  cpu_set_t* mask = CPU_ALLOC(num_cpus);
+  ASSERT_NE(mask, nullptr);
+  const auto free_mask = Cleanup([&] { CPU_FREE(mask); });
+
+  CPU_ZERO_S(mask_size, mask);
+  ASSERT_THAT(sched_getaffinity(0, mask_size, mask), SyscallSucceeds());
+}
+
+TEST_F(AffinityTest, LargeCpuMask) {
+  // Allocate mask bigger than cpu_set_t normally allocates.
+  const size_t cpus = CPU_SETSIZE * 8;
+  const size_t mask_size = CPU_ALLOC_SIZE(cpus);
+
+  cpu_set_t* large_mask = CPU_ALLOC(cpus);
+  auto free_mask = Cleanup([large_mask] { CPU_FREE(large_mask); });
+  CPU_ZERO_S(mask_size, large_mask);
+
+  // Check that get affinity with large mask works as expected.
+  ASSERT_THAT(sched_getaffinity(/*pid=*/0, mask_size, large_mask),
+              SyscallSucceeds());
+  EXPECT_TRUE(CPU_EQUAL(&mask_, large_mask))
+      << "got: " << CPUSetToString(*large_mask, cpus)
+      << " != expected: " << CPUSetToString(mask_);
+
+  // Check that set affinity with large mask works as expected.
+  ASSERT_NO_ERRNO(ClearLowestBit(large_mask, cpus));
+  EXPECT_THAT(sched_setaffinity(/*pid=*/0, mask_size, large_mask),
+              SyscallSucceeds());
+
+  cpu_set_t* new_mask = CPU_ALLOC(cpus);
+  auto free_new_mask = Cleanup([new_mask] { CPU_FREE(new_mask); });
+  CPU_ZERO_S(mask_size, new_mask);
+  EXPECT_THAT(sched_getaffinity(/*pid=*/0, mask_size, new_mask),
+              SyscallSucceeds());
+
+  EXPECT_TRUE(CPU_EQUAL_S(mask_size, large_mask, new_mask))
+      << "got: " << CPUSetToString(*new_mask, cpus)
+      << " != expected: " << CPUSetToString(*large_mask, cpus);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/aio.cc b/test/syscalls/linux/aio.cc
new file mode 100644
index 000000000..806d5729e
--- /dev/null
+++ b/test/syscalls/linux/aio.cc
@@ -0,0 +1,430 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/aio_abi.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/proc_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::_;
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// Returns the size of the VMA containing the given address.
+PosixErrorOr<size_t> VmaSizeAt(uintptr_t addr) {
+  ASSIGN_OR_RETURN_ERRNO(std::string proc_self_maps,
+                         GetContents("/proc/self/maps"));
+  ASSIGN_OR_RETURN_ERRNO(auto entries, ParseProcMaps(proc_self_maps));
+  // Use binary search to find the first VMA that might contain addr.
+  ProcMapsEntry target = {};
+  target.end = addr;
+  auto it =
+      std::upper_bound(entries.begin(), entries.end(), target,
+                       [](const ProcMapsEntry& x, const ProcMapsEntry& y) {
+                         return x.end < y.end;
+                       });
+  // Check that it actually contains addr.
+  if (it == entries.end() || addr < it->start) {
+    return PosixError(ENOENT, absl::StrCat("no VMA contains address ", addr));
+  }
+  return it->end - it->start;
+}
+
+constexpr char kData[] = "hello world!";
+
+int SubmitCtx(aio_context_t ctx, long nr, struct iocb** iocbpp) {
+  return syscall(__NR_io_submit, ctx, nr, iocbpp);
+}
+
+class AIOTest : public FileTest {
+ public:
+  AIOTest() : ctx_(0) {}
+
+  int SetupContext(unsigned int nr) {
+    return syscall(__NR_io_setup, nr, &ctx_);
+  }
+
+  int Submit(long nr, struct iocb** iocbpp) {
+    return SubmitCtx(ctx_, nr, iocbpp);
+  }
+
+  int GetEvents(long min, long max, struct io_event* events,
+                struct timespec* timeout) {
+    return RetryEINTR(syscall)(__NR_io_getevents, ctx_, min, max, events,
+                               timeout);
+  }
+
+  int DestroyContext() { return syscall(__NR_io_destroy, ctx_); }
+
+  void TearDown() override {
+    FileTest::TearDown();
+    if (ctx_ != 0) {
+      ASSERT_THAT(DestroyContext(), SyscallSucceeds());
+      ctx_ = 0;
+    }
+  }
+
+  struct iocb CreateCallback() {
+    struct iocb cb = {};
+    cb.aio_data = 0x123;
+    cb.aio_fildes = test_file_fd_.get();
+    cb.aio_lio_opcode = IOCB_CMD_PWRITE;
+    cb.aio_buf = reinterpret_cast<uint64_t>(kData);
+    cb.aio_offset = 0;
+    cb.aio_nbytes = strlen(kData);
+    return cb;
+  }
+
+ protected:
+  aio_context_t ctx_;
+};
+
+TEST_F(AIOTest, BasicWrite) {
+  // Copied from fs/aio.c.
+  constexpr unsigned AIO_RING_MAGIC = 0xa10a10a1;
+  struct aio_ring {
+    unsigned id;
+    unsigned nr;
+    unsigned head;
+    unsigned tail;
+    unsigned magic;
+    unsigned compat_features;
+    unsigned incompat_features;
+    unsigned header_length;
+    struct io_event io_events[0];
+  };
+
+  // Setup a context that is 128 entries deep.
+  ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+  // Check that 'ctx_' points to a valid address. libaio uses it to check if
+  // aio implementation uses aio_ring. gVisor doesn't and returns all zeroes.
+  // Linux implements aio_ring, so skip the zeroes check.
+  //
+  // TODO(gvisor.dev/issue/204): Remove when gVisor implements aio_ring.
+  auto ring = reinterpret_cast<struct aio_ring*>(ctx_);
+  auto magic = IsRunningOnGvisor() ? 0 : AIO_RING_MAGIC;
+  EXPECT_EQ(ring->magic, magic);
+
+  struct iocb cb = CreateCallback();
+  struct iocb* cbs[1] = {&cb};
+
+  // Submit the request.
+  ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
+
+  // Get the reply.
+  struct io_event events[1];
+  ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
+
+  // Verify that it is as expected.
+  EXPECT_EQ(events[0].data, 0x123);
+  EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb));
+  EXPECT_EQ(events[0].res, strlen(kData));
+
+  // Verify that the file contains the contents.
+  char verify_buf[sizeof(kData)] = {};
+  ASSERT_THAT(read(test_file_fd_.get(), verify_buf, sizeof(kData)),
+              SyscallSucceedsWithValue(strlen(kData)));
+  EXPECT_STREQ(verify_buf, kData);
+}
+
+TEST_F(AIOTest, BadWrite) {
+  // Create a pipe and immediately close the read end.
+  int pipefd[2];
+  ASSERT_THAT(pipe(pipefd), SyscallSucceeds());
+
+  FileDescriptor rfd(pipefd[0]);
+  FileDescriptor wfd(pipefd[1]);
+
+  rfd.reset();  // Close the read end.
+
+  // Setup a context that is 128 entries deep.
+  ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+  struct iocb cb = CreateCallback();
+  // Try to write to the read end.
+  cb.aio_fildes = wfd.get();
+  struct iocb* cbs[1] = {&cb};
+
+  // Submit the request.
+  ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
+
+  // Get the reply.
+  struct io_event events[1];
+  ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
+
+  // Verify that it fails with the right error code.
+  EXPECT_EQ(events[0].data, 0x123);
+  EXPECT_EQ(events[0].obj, reinterpret_cast<uint64_t>(&cb));
+  EXPECT_LT(events[0].res, 0);
+}
+
+TEST_F(AIOTest, ExitWithPendingIo) {
+  // Setup a context that is 100 entries deep.
+  ASSERT_THAT(SetupContext(100), SyscallSucceeds());
+
+  struct iocb cb = CreateCallback();
+  struct iocb* cbs[] = {&cb};
+
+  // Submit a request but don't complete it to make it pending.
+  for (int i = 0; i < 100; ++i) {
+    EXPECT_THAT(Submit(1, cbs), SyscallSucceeds());
+  }
+
+  ASSERT_THAT(DestroyContext(), SyscallSucceeds());
+  ctx_ = 0;
+}
+
+int Submitter(void* arg) {
+  auto test = reinterpret_cast<AIOTest*>(arg);
+
+  struct iocb cb = test->CreateCallback();
+  struct iocb* cbs[1] = {&cb};
+
+  // Submit the request.
+  TEST_CHECK(test->Submit(1, cbs) == 1);
+  return 0;
+}
+
+TEST_F(AIOTest, CloneVm) {
+  // Setup a context that is 128 entries deep.
+  ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+  const size_t kStackSize = 5 * kPageSize;
+  std::unique_ptr<char[]> stack(new char[kStackSize]);
+  char* bp = stack.get() + kStackSize;
+  pid_t child;
+  ASSERT_THAT(child = clone(Submitter, bp, CLONE_VM | SIGCHLD,
+                            reinterpret_cast<void*>(this)),
+              SyscallSucceeds());
+
+  // Get the reply.
+  struct io_event events[1];
+  ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
+
+  // Verify that it is as expected.
+  EXPECT_EQ(events[0].data, 0x123);
+  EXPECT_EQ(events[0].res, strlen(kData));
+
+  // Verify that the file contains the contents.
+  char verify_buf[32] = {};
+  ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)),
+              SyscallSucceeds());
+  EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0);
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+              SyscallSucceedsWithValue(child));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+// Tests that AIO context can be remapped to a different address.
+TEST_F(AIOTest, Mremap) {
+  // Setup a context that is 128 entries deep.
+  ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+  const size_t ctx_size =
+      ASSERT_NO_ERRNO_AND_VALUE(VmaSizeAt(reinterpret_cast<uintptr_t>(ctx_)));
+
+  struct iocb cb = CreateCallback();
+  struct iocb* cbs[1] = {&cb};
+
+  // Reserve address space for the mremap target so we have something safe to
+  // map over.
+  Mapping dst =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(ctx_size, PROT_READ, MAP_PRIVATE));
+
+  // Remap context 'handle' to a different address.
+  ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(),
+                     MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()),
+              IsPosixErrorOkAndHolds(dst.ptr()));
+  aio_context_t old_ctx = ctx_;
+  ctx_ = reinterpret_cast<aio_context_t>(dst.addr());
+  // io_destroy() will unmap dst now.
+  dst.release();
+
+  // Check that submitting the request with the old 'ctx_' fails.
+  ASSERT_THAT(SubmitCtx(old_ctx, 1, cbs), SyscallFailsWithErrno(EINVAL));
+
+  // Submit the request with the new 'ctx_'.
+  ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
+
+  // Remap again.
+  dst = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(ctx_size, PROT_READ, MAP_PRIVATE));
+  ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(),
+                     MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()),
+              IsPosixErrorOkAndHolds(dst.ptr()));
+  ctx_ = reinterpret_cast<aio_context_t>(dst.addr());
+  dst.release();
+
+  // Get the reply with yet another 'ctx_' and verify it.
+  struct io_event events[1];
+  ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(events[0].data, 0x123);
+  EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb));
+  EXPECT_EQ(events[0].res, strlen(kData));
+
+  // Verify that the file contains the contents.
+  char verify_buf[sizeof(kData)] = {};
+  ASSERT_THAT(read(test_file_fd_.get(), verify_buf, sizeof(kData)),
+              SyscallSucceedsWithValue(strlen(kData)));
+  EXPECT_STREQ(verify_buf, kData);
+}
+
+// Tests that AIO context cannot be expanded with mremap.
+TEST_F(AIOTest, MremapExpansion) {
+  // Setup a context that is 128 entries deep.
+  ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+  const size_t ctx_size =
+      ASSERT_NO_ERRNO_AND_VALUE(VmaSizeAt(reinterpret_cast<uintptr_t>(ctx_)));
+
+  // Reserve address space for the mremap target so we have something safe to
+  // map over.
+  Mapping dst = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(ctx_size + kPageSize, PROT_NONE, MAP_PRIVATE));
+
+  // Test that remapping to a larger address range fails.
+  ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(),
+                     MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()),
+              PosixErrorIs(EFAULT, _));
+
+  // mm/mremap.c:sys_mremap() => mremap_to() does do_munmap() of the destination
+  // before it hits the VM_DONTEXPAND check in vma_to_resize(), so we should no
+  // longer munmap it (another thread may have created a mapping there).
+  dst.release();
+}
+
+// Tests that AIO calls fail if context's address is inaccessible.
+TEST_F(AIOTest, Mprotect) {
+  // Setup a context that is 128 entries deep.
+  ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+  struct iocb cb = CreateCallback();
+  struct iocb* cbs[1] = {&cb};
+
+  ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
+
+  // Makes the context 'handle' inaccessible and check that all subsequent
+  // calls fail.
+  ASSERT_THAT(mprotect(reinterpret_cast<void*>(ctx_), kPageSize, PROT_NONE),
+              SyscallSucceeds());
+  struct io_event events[1];
+  EXPECT_THAT(GetEvents(1, 1, events, nullptr), SyscallFailsWithErrno(EINVAL));
+  ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(DestroyContext(), SyscallFailsWithErrno(EINVAL));
+
+  // Prevent TearDown from attempting to destroy the context and fail.
+  ctx_ = 0;
+}
+
+TEST_F(AIOTest, Timeout) {
+  // Setup a context that is 128 entries deep.
+  ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+  struct timespec timeout;
+  timeout.tv_sec = 0;
+  timeout.tv_nsec = 10;
+  struct io_event events[1];
+  ASSERT_THAT(GetEvents(1, 1, events, &timeout), SyscallSucceedsWithValue(0));
+}
+
+class AIOReadWriteParamTest : public AIOTest,
+                              public ::testing::WithParamInterface<int> {};
+
+TEST_P(AIOReadWriteParamTest, BadOffset) {
+  // Setup a context that is 128 entries deep.
+  ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+  struct iocb cb = CreateCallback();
+  struct iocb* cbs[1] = {&cb};
+
+  // Create a buffer that we can write to.
+  char buf[] = "hello world!";
+  cb.aio_buf = reinterpret_cast<uint64_t>(buf);
+
+  // Set the operation on the callback and give a negative offset.
+  const int opcode = GetParam();
+  cb.aio_lio_opcode = opcode;
+
+  iovec iov = {};
+  if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) {
+    // Create a valid iovec and set it in the callback.
+    iov.iov_base = reinterpret_cast<void*>(buf);
+    iov.iov_len = 1;
+    cb.aio_buf = reinterpret_cast<uint64_t>(&iov);
+    // aio_nbytes is the number of iovecs.
+    cb.aio_nbytes = 1;
+  }
+
+  // Pass a negative offset.
+  cb.aio_offset = -1;
+
+  // Should get error on submission.
+  ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL));
+}
+
+INSTANTIATE_TEST_SUITE_P(BadOffset, AIOReadWriteParamTest,
+                         ::testing::Values(IOCB_CMD_PREAD, IOCB_CMD_PWRITE,
+                                           IOCB_CMD_PREADV, IOCB_CMD_PWRITEV));
+
+class AIOVectorizedParamTest : public AIOTest,
+                               public ::testing::WithParamInterface<int> {};
+
+TEST_P(AIOVectorizedParamTest, BadIOVecs) {
+  // Setup a context that is 128 entries deep.
+  ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+  struct iocb cb = CreateCallback();
+  struct iocb* cbs[1] = {&cb};
+
+  // Modify the callback to use the operation from the param.
+  cb.aio_lio_opcode = GetParam();
+
+  // Create an iovec with address in kernel range, and pass that as the buffer.
+  iovec iov = {};
+  iov.iov_base = reinterpret_cast<void*>(0xFFFFFFFF00000000);
+  iov.iov_len = 1;
+  cb.aio_buf = reinterpret_cast<uint64_t>(&iov);
+  // aio_nbytes is the number of iovecs.
+  cb.aio_nbytes = 1;
+
+  // Should get error on submission.
+  ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EFAULT));
+}
+
+INSTANTIATE_TEST_SUITE_P(BadIOVecs, AIOVectorizedParamTest,
+                         ::testing::Values(IOCB_CMD_PREADV, IOCB_CMD_PWRITEV));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/alarm.cc b/test/syscalls/linux/alarm.cc
new file mode 100644
index 000000000..940c97285
--- /dev/null
+++ b/test/syscalls/linux/alarm.cc
@@ -0,0 +1,192 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// N.B. Below, main blocks SIGALRM. Test cases must unblock it if they want
+// delivery.
+
+void do_nothing_handler(int sig, siginfo_t* siginfo, void* arg) {}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and read.
+TEST(AlarmTest, Interrupt_NoRandomSave) {
+  int pipe_fds[2];
+  ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+  FileDescriptor read_fd(pipe_fds[0]);
+  FileDescriptor write_fd(pipe_fds[1]);
+
+  // Use a signal handler that interrupts but does nothing rather than using the
+  // default terminate action.
+  struct sigaction sa;
+  sa.sa_sigaction = do_nothing_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = 0;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+  // Actually allow SIGALRM delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+  // Alarm in 20 second, which should be well after read blocks below.
+  ASSERT_THAT(alarm(20), SyscallSucceeds());
+
+  char buf;
+  ASSERT_THAT(read(read_fd.get(), &buf, 1), SyscallFailsWithErrno(EINTR));
+}
+
+/* Count of the number of SIGALARMS handled. */
+static volatile int alarms_received = 0;
+
+void inc_alarms_handler(int sig, siginfo_t* siginfo, void* arg) {
+  alarms_received++;
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and read.
+TEST(AlarmTest, Restart_NoRandomSave) {
+  alarms_received = 0;
+
+  int pipe_fds[2];
+  ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+  FileDescriptor read_fd(pipe_fds[0]);
+  // Write end closed by thread below.
+
+  struct sigaction sa;
+  sa.sa_sigaction = inc_alarms_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+  // Spawn a thread to eventually unblock the read below.
+  ScopedThread t([pipe_fds] {
+    absl::SleepFor(absl::Seconds(30));
+    EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+  });
+
+  // Actually allow SIGALRM delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+  // Alarm in 20 second, which should be well after read blocks below, but
+  // before it returns.
+  ASSERT_THAT(alarm(20), SyscallSucceeds());
+
+  // Read and eventually get an EOF from the writer closing.  If SA_RESTART
+  // didn't work, then the alarm would not have fired and we wouldn't increment
+  // our alarms_received count in our signal handler, or we would have not
+  // restarted the syscall gracefully, which we expect below in order to be
+  // able to get the final EOF on the pipe.
+  char buf;
+  ASSERT_THAT(read(read_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_EQ(alarms_received, 1);
+
+  t.Join();
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and pause.
+TEST(AlarmTest, SaSiginfo_NoRandomSave) {
+  // Use a signal handler that interrupts but does nothing rather than using the
+  // default terminate action.
+  struct sigaction sa;
+  sa.sa_sigaction = do_nothing_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+  // Actually allow SIGALRM delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+  // Alarm in 20 second, which should be well after pause blocks below.
+  ASSERT_THAT(alarm(20), SyscallSucceeds());
+  ASSERT_THAT(pause(), SyscallFailsWithErrno(EINTR));
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and pause.
+TEST(AlarmTest, SaInterrupt_NoRandomSave) {
+  // Use a signal handler that interrupts but does nothing rather than using the
+  // default terminate action.
+  struct sigaction sa;
+  sa.sa_sigaction = do_nothing_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_INTERRUPT;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+  // Actually allow SIGALRM delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+  // Alarm in 20 second, which should be well after pause blocks below.
+  ASSERT_THAT(alarm(20), SyscallSucceeds());
+  ASSERT_THAT(pause(), SyscallFailsWithErrno(EINTR));
+}
+
+TEST(AlarmTest, UserModeSpinning) {
+  alarms_received = 0;
+
+  struct sigaction sa = {};
+  sa.sa_sigaction = inc_alarms_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+  // Actually allow SIGALRM delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+  // Alarm in 20 second, which should be well into the loop below.
+  ASSERT_THAT(alarm(20), SyscallSucceeds());
+  // Make sure that the signal gets delivered even if we are spinning in user
+  // mode when it arrives.
+  while (!alarms_received) {
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  // These tests depend on delivering SIGALRM to the main thread. Block SIGALRM
+  // so that any other threads created by TestInit will also have SIGALRM
+  // blocked.
+  sigset_t set;
+  sigemptyset(&set);
+  sigaddset(&set, SIGALRM);
+  TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+
+  gvisor::testing::TestInit(&argc, &argv);
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/arch_prctl.cc b/test/syscalls/linux/arch_prctl.cc
new file mode 100644
index 000000000..81bf5a775
--- /dev/null
+++ b/test/syscalls/linux/arch_prctl.cc
@@ -0,0 +1,48 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+// glibc does not provide a prototype for arch_prctl() so declare it here.
+extern "C" int arch_prctl(int code, uintptr_t addr);
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ArchPrctlTest, GetSetFS) {
+  uintptr_t orig;
+  const uintptr_t kNonCanonicalFsbase = 0x4141414142424242;
+
+  // Get the original FS.base and then set it to the same value (this is
+  // intentional because FS.base is the TLS pointer so we cannot change it
+  // arbitrarily).
+  ASSERT_THAT(arch_prctl(ARCH_GET_FS, reinterpret_cast<uintptr_t>(&orig)),
+              SyscallSucceeds());
+  ASSERT_THAT(arch_prctl(ARCH_SET_FS, orig), SyscallSucceeds());
+
+  // Trying to set FS.base to a non-canonical value should return an error.
+  ASSERT_THAT(arch_prctl(ARCH_SET_FS, kNonCanonicalFsbase),
+              SyscallFailsWithErrno(EPERM));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/bad.cc b/test/syscalls/linux/bad.cc
new file mode 100644
index 000000000..a26fc6af3
--- /dev/null
+++ b/test/syscalls/linux/bad.cc
@@ -0,0 +1,45 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+#ifdef __x86_64__
+// get_kernel_syms is not supported in Linux > 2.6, and not implemented in
+// gVisor.
+constexpr uint32_t kNotImplementedSyscall = SYS_get_kernel_syms;
+#elif __aarch64__
+// Use the last of arch_specific_syscalls which are not implemented on arm64.
+constexpr uint32_t kNotImplementedSyscall = __NR_arch_specific_syscall + 15;
+#endif
+
+TEST(BadSyscallTest, NotImplemented) {
+  EXPECT_THAT(syscall(kNotImplementedSyscall), SyscallFailsWithErrno(ENOSYS));
+}
+
+TEST(BadSyscallTest, NegativeOne) {
+  EXPECT_THAT(syscall(-1), SyscallFailsWithErrno(ENOSYS));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/base_poll_test.cc b/test/syscalls/linux/base_poll_test.cc
new file mode 100644
index 000000000..ab7a19dd0
--- /dev/null
+++ b/test/syscalls/linux/base_poll_test.cc
@@ -0,0 +1,65 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/base_poll_test.h"
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+static volatile int timer_fired = 0;
+static void SigAlarmHandler(int, siginfo_t*, void*) { timer_fired = 1; }
+
+BasePollTest::BasePollTest() {
+  // Register our SIGALRM handler, but save the original so we can restore in
+  // the destructor.
+  struct sigaction sa = {};
+  sa.sa_sigaction = SigAlarmHandler;
+  sigfillset(&sa.sa_mask);
+  TEST_PCHECK(sigaction(SIGALRM, &sa, &original_alarm_sa_) == 0);
+}
+
+BasePollTest::~BasePollTest() {
+  ClearTimer();
+  TEST_PCHECK(sigaction(SIGALRM, &original_alarm_sa_, nullptr) == 0);
+}
+
+void BasePollTest::SetTimer(absl::Duration duration) {
+  pid_t tgid = getpid();
+  pid_t tid = gettid();
+  ClearTimer();
+
+  // Create a new timer thread.
+  timer_ = absl::make_unique<TimerThread>(absl::Now() + duration, tgid, tid);
+}
+
+bool BasePollTest::TimerFired() const { return timer_fired; }
+
+void BasePollTest::ClearTimer() {
+  timer_.reset();
+  timer_fired = 0;
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/base_poll_test.h b/test/syscalls/linux/base_poll_test.h
new file mode 100644
index 000000000..0d4a6701e
--- /dev/null
+++ b/test/syscalls/linux/base_poll_test.h
@@ -0,0 +1,101 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_
+#define GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_
+
+#include <signal.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// TimerThread is a cancelable timer.
+class TimerThread {
+ public:
+  TimerThread(absl::Time deadline, pid_t tgid, pid_t tid)
+      : thread_([=] {
+          mu_.Lock();
+          mu_.AwaitWithDeadline(absl::Condition(&cancel_), deadline);
+          if (!cancel_) {
+            TEST_PCHECK(tgkill(tgid, tid, SIGALRM) == 0);
+          }
+          mu_.Unlock();
+        }) {}
+
+  ~TimerThread() { Cancel(); }
+
+  void Cancel() {
+    absl::MutexLock ml(&mu_);
+    cancel_ = true;
+  }
+
+ private:
+  mutable absl::Mutex mu_;
+  bool cancel_ ABSL_GUARDED_BY(mu_) = false;
+
+  // Must be last to ensure that the destructor for the thread is run before
+  // any other member of the object is destroyed.
+  ScopedThread thread_;
+};
+
+// Base test fixture for poll, select, ppoll, and pselect tests.
+//
+// This fixture makes use of SIGALRM. The handler is saved in SetUp() and
+// restored in TearDown().
+class BasePollTest : public ::testing::Test {
+ protected:
+  BasePollTest();
+  ~BasePollTest() override;
+
+  // Sets a timer that will send a signal to the calling thread after
+  // `duration`.
+  void SetTimer(absl::Duration duration);
+
+  // Returns true if the timer has fired.
+  bool TimerFired() const;
+
+  // Stops the pending timer (if any) and clear the "fired" state.
+  void ClearTimer();
+
+ private:
+  // Thread that implements the timer. If the timer is stopped, timer_ is null.
+  //
+  // We have to use a thread for this purpose because tests using this fixture
+  // expect to be interrupted by the timer signal, but itimers/alarm(2) send
+  // thread-group-directed signals, which may be handled by any thread in the
+  // test process.
+  std::unique_ptr<TimerThread> timer_;
+
+  // The original SIGALRM handler, to restore in destructor.
+  struct sigaction original_alarm_sa_;
+};
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_
diff --git a/test/syscalls/linux/bind.cc b/test/syscalls/linux/bind.cc
new file mode 100644
index 000000000..9547c4ab2
--- /dev/null
+++ b/test/syscalls/linux/bind.cc
@@ -0,0 +1,145 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(AllSocketPairTest, Bind) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, BindTooLong) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  // first_addr is a sockaddr_storage being used as a sockaddr_un. Use the full
+  // length which is longer than expected for a Unix socket.
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sizeof(sockaddr_storage)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, DoubleBindSocket) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  EXPECT_THAT(
+      bind(sockets->first_fd(), sockets->first_addr(),
+           sockets->first_addr_size()),
+      // Linux 4.09 returns EINVAL here, but some time before 4.19 it switched
+      // to EADDRINUSE.
+      AnyOf(SyscallFailsWithErrno(EADDRINUSE), SyscallFailsWithErrno(EINVAL)));
+}
+
+TEST_P(AllSocketPairTest, GetLocalAddr) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  socklen_t addressLength = sockets->first_addr_size();
+  struct sockaddr_storage address = {};
+  ASSERT_THAT(getsockname(sockets->first_fd(), (struct sockaddr*)(&address),
+                          &addressLength),
+              SyscallSucceeds());
+  EXPECT_EQ(
+      0, memcmp(&address, sockets->first_addr(), sockets->first_addr_size()));
+}
+
+TEST_P(AllSocketPairTest, GetLocalAddrWithoutBind) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  socklen_t addressLength = sockets->first_addr_size();
+  struct sockaddr_storage received_address = {};
+  ASSERT_THAT(
+      getsockname(sockets->first_fd(), (struct sockaddr*)(&received_address),
+                  &addressLength),
+      SyscallSucceeds());
+  struct sockaddr_storage want_address = {};
+  want_address.ss_family = sockets->first_addr()->sa_family;
+  EXPECT_EQ(0, memcmp(&received_address, &want_address, addressLength));
+}
+
+TEST_P(AllSocketPairTest, GetRemoteAddressWithoutConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  socklen_t addressLength = sockets->first_addr_size();
+  struct sockaddr_storage address = {};
+  ASSERT_THAT(getpeername(sockets->second_fd(), (struct sockaddr*)(&address),
+                          &addressLength),
+              SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(AllSocketPairTest, DoubleBindAddress) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  EXPECT_THAT(bind(sockets->second_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(AllSocketPairTest, Unbind) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+
+  // Filesystem Unix sockets do not release their address when closed.
+  if (sockets->first_addr()->sa_data[0] != 0) {
+    ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(),
+                     sockets->first_addr_size()),
+                SyscallFailsWithErrno(EADDRINUSE));
+    return;
+  }
+
+  ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, AllSocketPairTest,
+    ::testing::ValuesIn(VecCat<SocketPairKind>(
+        ApplyVec<SocketPairKind>(
+            FilesystemUnboundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM,
+                                             SOCK_SEQPACKET},
+                                   List<int>{0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(
+            AbstractUnboundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM,
+                                             SOCK_SEQPACKET},
+                                   List<int>{0, SOCK_NONBLOCK})))));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/brk.cc b/test/syscalls/linux/brk.cc
new file mode 100644
index 000000000..a03a44465
--- /dev/null
+++ b/test/syscalls/linux/brk.cc
@@ -0,0 +1,31 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST(BrkTest, BrkSyscallReturnsOldBrkOnFailure) {
+  auto old_brk = sbrk(0);
+  EXPECT_THAT(syscall(SYS_brk, reinterpret_cast<void*>(-1)),
+              SyscallSucceedsWithValue(reinterpret_cast<uintptr_t>(old_brk)));
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/chdir.cc b/test/syscalls/linux/chdir.cc
new file mode 100644
index 000000000..3182c228b
--- /dev/null
+++ b/test/syscalls/linux/chdir.cc
@@ -0,0 +1,64 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ChdirTest, Success) {
+  auto old_dir = GetAbsoluteTestTmpdir();
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(chdir(temp_dir.path().c_str()), SyscallSucceeds());
+  // Temp path destructor deletes the newly created tmp dir and Sentry rejects
+  // saving when its current dir is still pointing to the path. Switch to a
+  // permanent path here.
+  EXPECT_THAT(chdir(old_dir.c_str()), SyscallSucceeds());
+}
+
+TEST(ChdirTest, PermissionDenied) {
+  // Drop capabilities that allow us to override directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */));
+  EXPECT_THAT(chdir(temp_dir.path().c_str()), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChdirTest, NotDir) {
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  EXPECT_THAT(chdir(temp_file.path().c_str()), SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(ChdirTest, NotExist) {
+  EXPECT_THAT(chdir("/foo/bar"), SyscallFailsWithErrno(ENOENT));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/chmod.cc b/test/syscalls/linux/chmod.cc
new file mode 100644
index 000000000..a06b5cfd6
--- /dev/null
+++ b/test/syscalls/linux/chmod.cc
@@ -0,0 +1,264 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ChmodTest, ChmodFileSucceeds) {
+  // Drop capabilities that allow us to override file permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  ASSERT_THAT(chmod(file.path().c_str(), 0466), SyscallSucceeds());
+  EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, ChmodDirSucceeds) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string fileInDir = NewTempAbsPathInDir(dir.path());
+
+  ASSERT_THAT(chmod(dir.path().c_str(), 0466), SyscallSucceeds());
+  EXPECT_THAT(open(fileInDir.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodFileSucceeds_NoRandomSave) {
+  // Drop capabilities that allow us to file directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666));
+  int fd;
+  ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds());
+
+  {
+    const DisableSave ds;  // File permissions are reduced.
+    ASSERT_THAT(fchmod(fd, 0444), SyscallSucceeds());
+    EXPECT_THAT(close(fd), SyscallSucceeds());
+  }
+
+  EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodDirSucceeds_NoRandomSave) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  int fd;
+  ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY),
+              SyscallSucceeds());
+
+  {
+    const DisableSave ds;  // File permissions are reduced.
+    ASSERT_THAT(fchmod(fd, 0), SyscallSucceeds());
+    EXPECT_THAT(close(fd), SyscallSucceeds());
+  }
+
+  EXPECT_THAT(open(dir.path().c_str(), O_RDONLY),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodBadF) {
+  ASSERT_THAT(fchmod(-1, 0444), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ChmodTest, FchmodatBadF) {
+  ASSERT_THAT(fchmodat(-1, "foo", 0444, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ChmodTest, FchmodatNotDir) {
+  ASSERT_THAT(fchmodat(-1, "", 0444, 0), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(ChmodTest, FchmodatFileAbsolutePath) {
+  // Drop capabilities that allow us to override file permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  ASSERT_THAT(fchmodat(-1, file.path().c_str(), 0444, 0), SyscallSucceeds());
+  EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodatDirAbsolutePath) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  int fd;
+  ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  ASSERT_THAT(fchmodat(-1, dir.path().c_str(), 0, 0), SyscallSucceeds());
+  EXPECT_THAT(open(dir.path().c_str(), O_RDONLY),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodatFile) {
+  // Drop capabilities that allow us to override file permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  int parent_fd;
+  ASSERT_THAT(
+      parent_fd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY | O_DIRECTORY),
+      SyscallSucceeds());
+
+  ASSERT_THAT(
+      fchmodat(parent_fd, std::string(Basename(temp_file.path())).c_str(), 0444,
+               0),
+      SyscallSucceeds());
+  EXPECT_THAT(close(parent_fd), SyscallSucceeds());
+
+  EXPECT_THAT(open(temp_file.path().c_str(), O_RDWR),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodatDir) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  int parent_fd;
+  ASSERT_THAT(
+      parent_fd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY | O_DIRECTORY),
+      SyscallSucceeds());
+
+  int fd;
+  ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  ASSERT_THAT(
+      fchmodat(parent_fd, std::string(Basename(dir.path())).c_str(), 0, 0),
+      SyscallSucceeds());
+  EXPECT_THAT(close(parent_fd), SyscallSucceeds());
+
+  EXPECT_THAT(open(dir.path().c_str(), O_RDONLY | O_DIRECTORY),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, ChmodDowngradeWritability_NoRandomSave) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666));
+
+  int fd;
+  ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds());
+
+  const DisableSave ds;  // Permissions are dropped.
+  ASSERT_THAT(chmod(file.path().c_str(), 0444), SyscallSucceeds());
+  EXPECT_THAT(write(fd, "hello", 5), SyscallSucceedsWithValue(5));
+
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(ChmodTest, ChmodFileToNoPermissionsSucceeds) {
+  // Drop capabilities that allow us to override file permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666));
+
+  ASSERT_THAT(chmod(file.path().c_str(), 0), SyscallSucceeds());
+
+  EXPECT_THAT(open(file.path().c_str(), O_RDONLY),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodDowngradeWritability_NoRandomSave) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  int fd;
+  ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR | O_CREAT, 0666),
+              SyscallSucceeds());
+
+  const DisableSave ds;  // Permissions are dropped.
+  ASSERT_THAT(fchmod(fd, 0444), SyscallSucceeds());
+  EXPECT_THAT(write(fd, "hello", 5), SyscallSucceedsWithValue(5));
+
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(ChmodTest, FchmodFileToNoPermissionsSucceeds_NoRandomSave) {
+  // Drop capabilities that allow us to override file permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666));
+
+  int fd;
+  ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds());
+
+  {
+    const DisableSave ds;  // Permissions are dropped.
+    ASSERT_THAT(fchmod(fd, 0), SyscallSucceeds());
+    EXPECT_THAT(close(fd), SyscallSucceeds());
+  }
+
+  EXPECT_THAT(open(file.path().c_str(), O_RDONLY),
+              SyscallFailsWithErrno(EACCES));
+}
+
+// Verify that we can get a RW FD after chmod, even if a RO fd is left open.
+TEST(ChmodTest, ChmodWritableWithOpenFD) {
+  // FIXME(b/72455313): broken on hostfs.
+  if (IsRunningOnGvisor()) {
+    return;
+  }
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0444));
+
+  FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  ASSERT_THAT(fchmod(fd1.get(), 0644), SyscallSucceeds());
+
+  // This FD is writable, even though fd1 has a read-only reference to the file.
+  FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  // fd1 is not writable, but fd2 is.
+  char c = 'a';
+  EXPECT_THAT(WriteFd(fd1.get(), &c, 1), SyscallFailsWithErrno(EBADF));
+  EXPECT_THAT(WriteFd(fd2.get(), &c, 1), SyscallSucceedsWithValue(1));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/chown.cc b/test/syscalls/linux/chown.cc
new file mode 100644
index 000000000..7a28b674d
--- /dev/null
+++ b/test/syscalls/linux/chown.cc
@@ -0,0 +1,206 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <grp.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "absl/synchronization/notification.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+ABSL_FLAG(int32_t, scratch_uid1, 65534, "first scratch UID");
+ABSL_FLAG(int32_t, scratch_uid2, 65533, "second scratch UID");
+ABSL_FLAG(int32_t, scratch_gid, 65534, "first scratch GID");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ChownTest, FchownBadF) {
+  ASSERT_THAT(fchown(-1, 0, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ChownTest, FchownatBadF) {
+  ASSERT_THAT(fchownat(-1, "fff", 0, 0, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ChownTest, FchownatEmptyPath) {
+  const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const auto fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY | O_RDONLY));
+  ASSERT_THAT(fchownat(fd.get(), "", 0, 0, 0), SyscallFailsWithErrno(ENOENT));
+}
+
+using Chown =
+    std::function<PosixError(const std::string&, uid_t owner, gid_t group)>;
+
+class ChownParamTest : public ::testing::TestWithParam<Chown> {};
+
+TEST_P(ChownParamTest, ChownFileSucceeds) {
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_CHOWN))) {
+    ASSERT_NO_ERRNO(SetCapability(CAP_CHOWN, false));
+  }
+
+  const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // At least *try* setting to a group other than the EGID.
+  gid_t gid;
+  EXPECT_THAT(gid = getegid(), SyscallSucceeds());
+  int num_groups;
+  EXPECT_THAT(num_groups = getgroups(0, nullptr), SyscallSucceeds());
+  if (num_groups > 0) {
+    std::vector<gid_t> list(num_groups);
+    EXPECT_THAT(getgroups(list.size(), list.data()), SyscallSucceeds());
+    gid = list[0];
+  }
+
+  EXPECT_NO_ERRNO(GetParam()(file.path(), geteuid(), gid));
+
+  struct stat s = {};
+  ASSERT_THAT(stat(file.path().c_str(), &s), SyscallSucceeds());
+  EXPECT_EQ(s.st_uid, geteuid());
+  EXPECT_EQ(s.st_gid, gid);
+}
+
+TEST_P(ChownParamTest, ChownFilePermissionDenied) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+  const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0777));
+
+  // Drop privileges and change IDs only in child thread, or else this parent
+  // thread won't be able to open some log files after the test ends.
+  ScopedThread([&] {
+    // Drop privileges.
+    if (HaveCapability(CAP_CHOWN).ValueOrDie()) {
+      EXPECT_NO_ERRNO(SetCapability(CAP_CHOWN, false));
+    }
+
+    // Change EUID and EGID.
+    //
+    // See note about POSIX below.
+    EXPECT_THAT(
+        syscall(SYS_setresgid, -1, absl::GetFlag(FLAGS_scratch_gid), -1),
+        SyscallSucceeds());
+    EXPECT_THAT(
+        syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid1), -1),
+        SyscallSucceeds());
+
+    EXPECT_THAT(GetParam()(file.path(), geteuid(), getegid()),
+                PosixErrorIs(EPERM, ::testing::ContainsRegex("chown")));
+  });
+}
+
+TEST_P(ChownParamTest, ChownFileSucceedsAsRoot) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_CHOWN))));
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_SETUID))));
+
+  const std::string filename = NewTempAbsPath();
+
+  absl::Notification fileCreated, fileChowned;
+  // Change UID only in child thread, or else this parent thread won't be able
+  // to open some log files after the test ends.
+  ScopedThread t([&] {
+    // POSIX requires that all threads in a process share the same UIDs, so
+    // the NPTL setresuid wrappers use signals to make all threads execute the
+    // setresuid syscall. However, we want this thread to have its own set of
+    // credentials different from the parent process, so we use the raw
+    // syscall.
+    EXPECT_THAT(
+        syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid2), -1),
+        SyscallSucceeds());
+
+    // Create file and immediately close it.
+    FileDescriptor fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0644));
+    fd.reset();  // Close the fd.
+
+    fileCreated.Notify();
+    fileChowned.WaitForNotification();
+
+    EXPECT_THAT(open(filename.c_str(), O_RDWR), SyscallFailsWithErrno(EACCES));
+    FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDONLY));
+  });
+
+  fileCreated.WaitForNotification();
+
+  // Set file's owners to someone different.
+  EXPECT_NO_ERRNO(GetParam()(filename, absl::GetFlag(FLAGS_scratch_uid1),
+                             absl::GetFlag(FLAGS_scratch_gid)));
+
+  struct stat s;
+  EXPECT_THAT(stat(filename.c_str(), &s), SyscallSucceeds());
+  EXPECT_EQ(s.st_uid, absl::GetFlag(FLAGS_scratch_uid1));
+  EXPECT_EQ(s.st_gid, absl::GetFlag(FLAGS_scratch_gid));
+
+  fileChowned.Notify();
+}
+
+PosixError errorFromReturn(const std::string& name, int ret) {
+  if (ret == -1) {
+    return PosixError(errno, absl::StrCat(name, " failed"));
+  }
+  return NoError();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    ChownKinds, ChownParamTest,
+    ::testing::Values(
+        [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+          int rc = chown(path.c_str(), owner, group);
+          MaybeSave();
+          return errorFromReturn("chown", rc);
+        },
+        [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+          int rc = lchown(path.c_str(), owner, group);
+          MaybeSave();
+          return errorFromReturn("lchown", rc);
+        },
+        [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+          ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, O_RDWR));
+          int rc = fchown(fd.get(), owner, group);
+          MaybeSave();
+          return errorFromReturn("fchown", rc);
+        },
+        [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+          ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, O_RDWR));
+          int rc = fchownat(fd.get(), "", owner, group, AT_EMPTY_PATH);
+          MaybeSave();
+          return errorFromReturn("fchownat-fd", rc);
+        },
+        [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+          ASSIGN_OR_RETURN_ERRNO(auto dirfd, Open(std::string(Dirname(path)),
+                                                  O_DIRECTORY | O_RDONLY));
+          int rc = fchownat(dirfd.get(), std::string(Basename(path)).c_str(),
+                            owner, group, 0);
+          MaybeSave();
+          return errorFromReturn("fchownat-dirfd", rc);
+        }));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc
new file mode 100644
index 000000000..85ec013d5
--- /dev/null
+++ b/test/syscalls/linux/chroot.cc
@@ -0,0 +1,366 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/mount_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::HasSubstr;
+using ::testing::Not;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ChrootTest, Success) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds());
+}
+
+TEST(ChrootTest, PermissionDenied) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  // CAP_DAC_READ_SEARCH and CAP_DAC_OVERRIDE may override Execute permission on
+  // directories.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */));
+  EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChrootTest, NotDir) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  EXPECT_THAT(chroot(temp_file.path().c_str()), SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(ChrootTest, NotExist) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  EXPECT_THAT(chroot("/foo/bar"), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(ChrootTest, WithoutCapability) {
+  // Unset CAP_SYS_CHROOT.
+  ASSERT_NO_ERRNO(SetCapability(CAP_SYS_CHROOT, false));
+
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallFailsWithErrno(EPERM));
+}
+
+TEST(ChrootTest, CreatesNewRoot) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  // Grab the initial cwd.
+  char initial_cwd[1024];
+  ASSERT_THAT(syscall(__NR_getcwd, initial_cwd, sizeof(initial_cwd)),
+              SyscallSucceeds());
+
+  auto new_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto file_in_new_root =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(new_root.path()));
+
+  // chroot into new_root.
+  ASSERT_THAT(chroot(new_root.path().c_str()), SyscallSucceeds());
+
+  // getcwd should return "(unreachable)" followed by the initial_cwd.
+  char cwd[1024];
+  ASSERT_THAT(syscall(__NR_getcwd, cwd, sizeof(cwd)), SyscallSucceeds());
+  std::string expected_cwd = "(unreachable)";
+  expected_cwd += initial_cwd;
+  EXPECT_STREQ(cwd, expected_cwd.c_str());
+
+  // Should not be able to stat file by its full path.
+  struct stat statbuf;
+  EXPECT_THAT(stat(file_in_new_root.path().c_str(), &statbuf),
+              SyscallFailsWithErrno(ENOENT));
+
+  // Should be able to stat file at new rooted path.
+  auto basename = std::string(Basename(file_in_new_root.path()));
+  auto rootedFile = "/" + basename;
+  ASSERT_THAT(stat(rootedFile.c_str(), &statbuf), SyscallSucceeds());
+
+  // Should be able to stat cwd at '.' even though it's outside root.
+  ASSERT_THAT(stat(".", &statbuf), SyscallSucceeds());
+
+  // chdir into new root.
+  ASSERT_THAT(chdir("/"), SyscallSucceeds());
+
+  // getcwd should return "/".
+  EXPECT_THAT(syscall(__NR_getcwd, cwd, sizeof(cwd)), SyscallSucceeds());
+  EXPECT_STREQ(cwd, "/");
+
+  // Statting '.', '..', '/', and '/..' all return the same dev and inode.
+  struct stat statbuf_dot;
+  ASSERT_THAT(stat(".", &statbuf_dot), SyscallSucceeds());
+  struct stat statbuf_dotdot;
+  ASSERT_THAT(stat("..", &statbuf_dotdot), SyscallSucceeds());
+  EXPECT_EQ(statbuf_dot.st_dev, statbuf_dotdot.st_dev);
+  EXPECT_EQ(statbuf_dot.st_ino, statbuf_dotdot.st_ino);
+  struct stat statbuf_slash;
+  ASSERT_THAT(stat("/", &statbuf_slash), SyscallSucceeds());
+  EXPECT_EQ(statbuf_dot.st_dev, statbuf_slash.st_dev);
+  EXPECT_EQ(statbuf_dot.st_ino, statbuf_slash.st_ino);
+  struct stat statbuf_slashdotdot;
+  ASSERT_THAT(stat("/..", &statbuf_slashdotdot), SyscallSucceeds());
+  EXPECT_EQ(statbuf_dot.st_dev, statbuf_slashdotdot.st_dev);
+  EXPECT_EQ(statbuf_dot.st_ino, statbuf_slashdotdot.st_ino);
+}
+
+TEST(ChrootTest, DotDotFromOpenFD) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  auto dir_outside_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(dir_outside_root.path(), O_RDONLY | O_DIRECTORY));
+  auto new_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  // chroot into new_root.
+  ASSERT_THAT(chroot(new_root.path().c_str()), SyscallSucceeds());
+
+  // openat on fd with path .. will succeed.
+  int other_fd;
+  ASSERT_THAT(other_fd = openat(fd.get(), "..", O_RDONLY), SyscallSucceeds());
+  EXPECT_THAT(close(other_fd), SyscallSucceeds());
+
+  // getdents on fd should not error.
+  char buf[1024];
+  ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)),
+              SyscallSucceeds());
+}
+
+// Test that link resolution in a chroot can escape the root by following an
+// open proc fd. Regression test for b/32316719.
+TEST(ChrootTest, ProcFdLinkResolutionInChroot) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  const TempPath file_outside_chroot =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file_outside_chroot.path(), O_RDONLY));
+
+  const FileDescriptor proc_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open("/proc", O_DIRECTORY | O_RDONLY | O_CLOEXEC));
+
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds());
+
+  // Opening relative to an already open fd to a node outside the chroot works.
+  const FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      OpenAt(proc_fd.get(), "self/fd", O_DIRECTORY | O_RDONLY | O_CLOEXEC));
+
+  // Proc fd symlinks can escape the chroot if the fd the symlink refers to
+  // refers to an object outside the chroot.
+  struct stat s = {};
+  EXPECT_THAT(
+      fstatat(proc_self_fd.get(), absl::StrCat(fd.get()).c_str(), &s, 0),
+      SyscallSucceeds());
+
+  // Try to stat the stdin fd. Internally, this is handled differently from a
+  // proc fd entry pointing to a file, since stdin is backed by a host fd, and
+  // isn't a walkable path on the filesystem inside the sandbox.
+  EXPECT_THAT(fstatat(proc_self_fd.get(), "0", &s, 0), SyscallSucceeds());
+}
+
+// This test will verify that when you hold a fd to proc before entering
+// a chroot that any files inside the chroot will appear rooted to the
+// base chroot when examining /proc/self/fd/{num}.
+TEST(ChrootTest, ProcMemSelfFdsNoEscapeProcOpen) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  // Get a FD to /proc before we enter the chroot.
+  const FileDescriptor proc =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY));
+
+  // Create and enter a chroot directory.
+  const auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds());
+
+  // Open a file inside the chroot at /foo.
+  const FileDescriptor foo =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/foo", O_CREAT | O_RDONLY, 0644));
+
+  // Examine /proc/self/fd/{foo_fd} to see if it exposes the fact that we're
+  // inside a chroot, the path should be /foo and NOT {chroot_dir}/foo.
+  const std::string fd_path = absl::StrCat("self/fd/", foo.get());
+  char buf[1024] = {};
+  size_t bytes_read = 0;
+  ASSERT_THAT(bytes_read =
+                  readlinkat(proc.get(), fd_path.c_str(), buf, sizeof(buf) - 1),
+              SyscallSucceeds());
+
+  // The link should resolve to something.
+  ASSERT_GT(bytes_read, 0);
+
+  // Assert that the link doesn't contain the chroot path and is only /foo.
+  EXPECT_STREQ(buf, "/foo");
+}
+
+// This test will verify that a file inside a chroot when mmapped will not
+// expose the full file path via /proc/self/maps and instead honor the chroot.
+TEST(ChrootTest, ProcMemSelfMapsNoEscapeProcOpen) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  // Get a FD to /proc before we enter the chroot.
+  const FileDescriptor proc =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY));
+
+  // Create and enter a chroot directory.
+  const auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds());
+
+  // Open a file inside the chroot at /foo.
+  const FileDescriptor foo =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/foo", O_CREAT | O_RDONLY, 0644));
+
+  // Mmap the newly created file.
+  void* foo_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+                       foo.get(), 0);
+  ASSERT_THAT(reinterpret_cast<int64_t>(foo_map), SyscallSucceeds());
+
+  // Always unmap.
+  auto cleanup_map = Cleanup(
+      [&] { EXPECT_THAT(munmap(foo_map, kPageSize), SyscallSucceeds()); });
+
+  // Examine /proc/self/maps to be sure that /foo doesn't appear to be
+  // mapped with the full chroot path.
+  const FileDescriptor maps =
+      ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), "self/maps", O_RDONLY));
+
+  size_t bytes_read = 0;
+  char buf[8 * 1024] = {};
+  ASSERT_THAT(bytes_read = ReadFd(maps.get(), buf, sizeof(buf)),
+              SyscallSucceeds());
+
+  // The maps file should have something.
+  ASSERT_GT(bytes_read, 0);
+
+  // Finally we want to make sure the maps don't contain the chroot path
+  ASSERT_EQ(std::string(buf, bytes_read).find(temp_dir.path()),
+            std::string::npos);
+}
+
+// Test that mounts outside the chroot will not appear in /proc/self/mounts or
+// /proc/self/mountinfo.
+TEST(ChrootTest, ProcMountsMountinfoNoEscape) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+  // We are going to create some mounts and then chroot. In order to be able to
+  // unmount the mounts after the test run, we must chdir to the root and use
+  // relative paths for all mounts. That way, as long as we never chdir into
+  // the new root, we can access the mounts via relative paths and unmount them.
+  ASSERT_THAT(chdir("/"), SyscallSucceeds());
+
+  // Create nested tmpfs mounts. Note the use of relative paths in Mount calls.
+  auto const outer_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const outer_mount = ASSERT_NO_ERRNO_AND_VALUE(Mount(
+      "none", JoinPath(".", outer_dir.path()), "tmpfs", 0, "mode=0700", 0));
+
+  auto const inner_dir =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(outer_dir.path()));
+  auto const inner_mount = ASSERT_NO_ERRNO_AND_VALUE(Mount(
+      "none", JoinPath(".", inner_dir.path()), "tmpfs", 0, "mode=0700", 0));
+
+  // Filenames that will be checked for mounts, all relative to /proc dir.
+  std::string paths[3] = {"mounts", "self/mounts", "self/mountinfo"};
+
+  for (const std::string& path : paths) {
+    // We should have both inner and outer mounts.
+    const std::string contents =
+        ASSERT_NO_ERRNO_AND_VALUE(GetContents(JoinPath("/proc", path)));
+    EXPECT_THAT(contents, AllOf(HasSubstr(outer_dir.path()),
+                                HasSubstr(inner_dir.path())));
+    // We better have at least two mounts: the mounts we created plus the root.
+    std::vector<absl::string_view> submounts =
+        absl::StrSplit(contents, '\n', absl::SkipWhitespace());
+    EXPECT_GT(submounts.size(), 2);
+  }
+
+  // Get a FD to /proc before we enter the chroot.
+  const FileDescriptor proc =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY));
+
+  // Chroot to outer mount.
+  ASSERT_THAT(chroot(outer_dir.path().c_str()), SyscallSucceeds());
+
+  for (const std::string& path : paths) {
+    const FileDescriptor proc_file =
+        ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), path, O_RDONLY));
+
+    // Only two mounts visible from this chroot: the inner and outer.  Both
+    // paths should be relative to the new chroot.
+    const std::string contents =
+        ASSERT_NO_ERRNO_AND_VALUE(GetContentsFD(proc_file.get()));
+    EXPECT_THAT(contents,
+                AllOf(HasSubstr(absl::StrCat(Basename(inner_dir.path()))),
+                      Not(HasSubstr(outer_dir.path())),
+                      Not(HasSubstr(inner_dir.path()))));
+    std::vector<absl::string_view> submounts =
+        absl::StrSplit(contents, '\n', absl::SkipWhitespace());
+    EXPECT_EQ(submounts.size(), 2);
+  }
+
+  // Chroot to inner mount.  We must use an absolute path accessible to our
+  // chroot.
+  const std::string inner_dir_basename =
+      absl::StrCat("/", Basename(inner_dir.path()));
+  ASSERT_THAT(chroot(inner_dir_basename.c_str()), SyscallSucceeds());
+
+  for (const std::string& path : paths) {
+    const FileDescriptor proc_file =
+        ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), path, O_RDONLY));
+    const std::string contents =
+        ASSERT_NO_ERRNO_AND_VALUE(GetContentsFD(proc_file.get()));
+
+    // Only the inner mount visible from this chroot.
+    std::vector<absl::string_view> submounts =
+        absl::StrSplit(contents, '\n', absl::SkipWhitespace());
+    EXPECT_EQ(submounts.size(), 1);
+  }
+
+  // Chroot back to ".".
+  ASSERT_THAT(chroot("."), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/clock_getres.cc b/test/syscalls/linux/clock_getres.cc
new file mode 100644
index 000000000..c408b936c
--- /dev/null
+++ b/test/syscalls/linux/clock_getres.cc
@@ -0,0 +1,37 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/time.h>
+#include <time.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// clock_getres works regardless of whether or not a timespec is passed.
+TEST(ClockGetres, Timespec) {
+  struct timespec ts;
+  EXPECT_THAT(clock_getres(CLOCK_MONOTONIC, &ts), SyscallSucceeds());
+  EXPECT_THAT(clock_getres(CLOCK_MONOTONIC, nullptr), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/clock_gettime.cc b/test/syscalls/linux/clock_gettime.cc
new file mode 100644
index 000000000..7f6015049
--- /dev/null
+++ b/test/syscalls/linux/clock_gettime.cc
@@ -0,0 +1,163 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <pthread.h>
+#include <sys/time.h>
+
+#include <cerrno>
+#include <cstdint>
+#include <ctime>
+#include <list>
+#include <memory>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+int64_t clock_gettime_nsecs(clockid_t id) {
+  struct timespec ts;
+  TEST_PCHECK(clock_gettime(id, &ts) == 0);
+  return (ts.tv_sec * 1000000000 + ts.tv_nsec);
+}
+
+// Spin on the CPU for at least ns nanoseconds, based on
+// CLOCK_THREAD_CPUTIME_ID.
+void spin_ns(int64_t ns) {
+  int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID);
+  int64_t end = start + ns;
+
+  do {
+    constexpr int kLoopCount = 1000000;  // large and arbitrary
+    // volatile to prevent the compiler from skipping this loop.
+    for (volatile int i = 0; i < kLoopCount; i++) {
+    }
+  } while (clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID) < end);
+}
+
+// Test that CLOCK_PROCESS_CPUTIME_ID is a superset of CLOCK_THREAD_CPUTIME_ID.
+TEST(ClockGettime, CputimeId) {
+  constexpr int kNumThreads = 13;  // arbitrary
+
+  absl::Duration spin_time = absl::Seconds(1);
+
+  // Start off the worker threads and compute the aggregate time spent by
+  // the workers. Note that we test CLOCK_PROCESS_CPUTIME_ID by having the
+  // workers execute in parallel and verifying that CLOCK_PROCESS_CPUTIME_ID
+  // accumulates the runtime of all threads.
+  int64_t start = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID);
+
+  // Create a kNumThreads threads.
+  std::list<ScopedThread> threads;
+  for (int i = 0; i < kNumThreads; i++) {
+    threads.emplace_back(
+        [spin_time] { spin_ns(absl::ToInt64Nanoseconds(spin_time)); });
+  }
+  for (auto& t : threads) {
+    t.Join();
+  }
+
+  int64_t end = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID);
+
+  // The aggregate time spent in the worker threads must be at least
+  // 'kNumThreads' times the time each thread spun.
+  ASSERT_GE(end - start, kNumThreads * absl::ToInt64Nanoseconds(spin_time));
+}
+
+TEST(ClockGettime, JavaThreadTime) {
+  clockid_t clockid;
+  ASSERT_EQ(0, pthread_getcpuclockid(pthread_self(), &clockid));
+  struct timespec tp;
+  ASSERT_THAT(clock_getres(clockid, &tp), SyscallSucceeds());
+  EXPECT_TRUE(tp.tv_sec > 0 || tp.tv_nsec > 0);
+  // A thread cputime is updated each 10msec and there is no approximation
+  // if a task is running.
+  do {
+    ASSERT_THAT(clock_gettime(clockid, &tp), SyscallSucceeds());
+  } while (tp.tv_sec == 0 && tp.tv_nsec == 0);
+  EXPECT_TRUE(tp.tv_sec > 0 || tp.tv_nsec > 0);
+}
+
+// There is not much to test here, since CLOCK_REALTIME may be discontiguous.
+TEST(ClockGettime, RealtimeWorks) {
+  struct timespec tp;
+  EXPECT_THAT(clock_gettime(CLOCK_REALTIME, &tp), SyscallSucceeds());
+}
+
+class MonotonicClockTest : public ::testing::TestWithParam<clockid_t> {};
+
+TEST_P(MonotonicClockTest, IsMonotonic) {
+  auto end = absl::Now() + absl::Seconds(5);
+
+  struct timespec tp;
+  EXPECT_THAT(clock_gettime(GetParam(), &tp), SyscallSucceeds());
+
+  auto prev = absl::TimeFromTimespec(tp);
+  while (absl::Now() < end) {
+    EXPECT_THAT(clock_gettime(GetParam(), &tp), SyscallSucceeds());
+    auto now = absl::TimeFromTimespec(tp);
+    EXPECT_GE(now, prev);
+    prev = now;
+  }
+}
+
+std::string PrintClockId(::testing::TestParamInfo<clockid_t> info) {
+  switch (info.param) {
+    case CLOCK_MONOTONIC:
+      return "CLOCK_MONOTONIC";
+    case CLOCK_MONOTONIC_COARSE:
+      return "CLOCK_MONOTONIC_COARSE";
+    case CLOCK_MONOTONIC_RAW:
+      return "CLOCK_MONOTONIC_RAW";
+    case CLOCK_BOOTTIME:
+      // CLOCK_BOOTTIME is a monotonic clock.
+      return "CLOCK_BOOTTIME";
+    default:
+      return absl::StrCat(info.param);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(ClockGettime, MonotonicClockTest,
+                         ::testing::Values(CLOCK_MONOTONIC,
+                                           CLOCK_MONOTONIC_COARSE,
+                                           CLOCK_MONOTONIC_RAW, CLOCK_BOOTTIME),
+                         PrintClockId);
+
+TEST(ClockGettime, UnimplementedReturnsEINVAL) {
+  SKIP_IF(!IsRunningOnGvisor());
+
+  struct timespec tp;
+  EXPECT_THAT(clock_gettime(CLOCK_REALTIME_ALARM, &tp),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(clock_gettime(CLOCK_BOOTTIME_ALARM, &tp),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(ClockGettime, InvalidClockIDReturnsEINVAL) {
+  struct timespec tp;
+  EXPECT_THAT(clock_gettime(-1, &tp), SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/clock_nanosleep.cc b/test/syscalls/linux/clock_nanosleep.cc
new file mode 100644
index 000000000..b55cddc52
--- /dev/null
+++ b/test/syscalls/linux/clock_nanosleep.cc
@@ -0,0 +1,179 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <time.h>
+
+#include <atomic>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/util/cleanup.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// sys_clock_nanosleep is defined because the glibc clock_nanosleep returns
+// error numbers directly and does not set errno. This makes our Syscall
+// matchers look a little weird when expecting failure:
+// "SyscallSucceedsWithValue(ERRNO)".
+int sys_clock_nanosleep(clockid_t clkid, int flags,
+                        const struct timespec* request,
+                        struct timespec* remain) {
+  return syscall(SYS_clock_nanosleep, clkid, flags, request, remain);
+}
+
+PosixErrorOr<absl::Time> GetTime(clockid_t clk) {
+  struct timespec ts = {};
+  const int rc = clock_gettime(clk, &ts);
+  MaybeSave();
+  if (rc < 0) {
+    return PosixError(errno, "clock_gettime");
+  }
+  return absl::TimeFromTimespec(ts);
+}
+
+class WallClockNanosleepTest : public ::testing::TestWithParam<clockid_t> {};
+
+TEST_P(WallClockNanosleepTest, InvalidValues) {
+  const struct timespec invalid[] = {
+      {.tv_sec = -1, .tv_nsec = -1},       {.tv_sec = 0, .tv_nsec = INT32_MIN},
+      {.tv_sec = 0, .tv_nsec = INT32_MAX}, {.tv_sec = 0, .tv_nsec = -1},
+      {.tv_sec = -1, .tv_nsec = 0},
+  };
+
+  for (auto const ts : invalid) {
+    EXPECT_THAT(sys_clock_nanosleep(GetParam(), 0, &ts, nullptr),
+                SyscallFailsWithErrno(EINVAL));
+  }
+}
+
+TEST_P(WallClockNanosleepTest, SleepOneSecond) {
+  constexpr absl::Duration kSleepDuration = absl::Seconds(1);
+  struct timespec duration = absl::ToTimespec(kSleepDuration);
+
+  const absl::Time before = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+  EXPECT_THAT(
+      RetryEINTR(sys_clock_nanosleep)(GetParam(), 0, &duration, &duration),
+      SyscallSucceeds());
+  const absl::Time after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+
+  EXPECT_GE(after - before, kSleepDuration);
+}
+
+TEST_P(WallClockNanosleepTest, InterruptedNanosleep) {
+  constexpr absl::Duration kSleepDuration = absl::Seconds(60);
+  struct timespec duration = absl::ToTimespec(kSleepDuration);
+
+  // Install no-op signal handler for SIGALRM.
+  struct sigaction sa = {};
+  sigfillset(&sa.sa_mask);
+  sa.sa_handler = +[](int signo) {};
+  const auto cleanup_sa =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+  // Measure time since setting the alarm, since the alarm will interrupt the
+  // sleep and hence determine how long we sleep.
+  const absl::Time before = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+
+  // Set an alarm to go off while sleeping.
+  struct itimerval timer = {};
+  timer.it_value.tv_sec = 1;
+  timer.it_value.tv_usec = 0;
+  timer.it_interval.tv_sec = 1;
+  timer.it_interval.tv_usec = 0;
+  const auto cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, timer));
+
+  EXPECT_THAT(sys_clock_nanosleep(GetParam(), 0, &duration, &duration),
+              SyscallFailsWithErrno(EINTR));
+  const absl::Time after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+
+  // Remaining time updated.
+  const absl::Duration remaining = absl::DurationFromTimespec(duration);
+  EXPECT_GE(after - before + remaining, kSleepDuration);
+}
+
+// Remaining time is *not* updated if nanosleep completes uninterrupted.
+TEST_P(WallClockNanosleepTest, UninterruptedNanosleep) {
+  constexpr absl::Duration kSleepDuration = absl::Milliseconds(10);
+  const struct timespec duration = absl::ToTimespec(kSleepDuration);
+
+  while (true) {
+    constexpr int kRemainingMagic = 42;
+    struct timespec remaining;
+    remaining.tv_sec = kRemainingMagic;
+    remaining.tv_nsec = kRemainingMagic;
+
+    int ret = sys_clock_nanosleep(GetParam(), 0, &duration, &remaining);
+    if (ret == EINTR) {
+      // Retry from beginning. We want a single uninterrupted call.
+      continue;
+    }
+
+    EXPECT_THAT(ret, SyscallSucceeds());
+    EXPECT_EQ(remaining.tv_sec, kRemainingMagic);
+    EXPECT_EQ(remaining.tv_nsec, kRemainingMagic);
+    break;
+  }
+}
+
+TEST_P(WallClockNanosleepTest, SleepUntil) {
+  const absl::Time now = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+  const absl::Time until = now + absl::Seconds(2);
+  const struct timespec ts = absl::ToTimespec(until);
+
+  EXPECT_THAT(
+      RetryEINTR(sys_clock_nanosleep)(GetParam(), TIMER_ABSTIME, &ts, nullptr),
+      SyscallSucceeds());
+  const absl::Time after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+
+  EXPECT_GE(after, until);
+}
+
+INSTANTIATE_TEST_SUITE_P(Sleepers, WallClockNanosleepTest,
+                         ::testing::Values(CLOCK_REALTIME, CLOCK_MONOTONIC));
+
+TEST(ClockNanosleepProcessTest, SleepFiveSeconds) {
+  const absl::Duration kSleepDuration = absl::Seconds(5);
+  struct timespec duration = absl::ToTimespec(kSleepDuration);
+
+  // Ensure that CLOCK_PROCESS_CPUTIME_ID advances.
+  std::atomic<bool> done(false);
+  ScopedThread t([&] {
+    while (!done.load()) {
+    }
+  });
+  const auto cleanup_done = Cleanup([&] { done.store(true); });
+
+  const absl::Time before =
+      ASSERT_NO_ERRNO_AND_VALUE(GetTime(CLOCK_PROCESS_CPUTIME_ID));
+  EXPECT_THAT(RetryEINTR(sys_clock_nanosleep)(CLOCK_PROCESS_CPUTIME_ID, 0,
+                                              &duration, &duration),
+              SyscallSucceeds());
+  const absl::Time after =
+      ASSERT_NO_ERRNO_AND_VALUE(GetTime(CLOCK_PROCESS_CPUTIME_ID));
+  EXPECT_GE(after - before, kSleepDuration);
+}
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/concurrency.cc b/test/syscalls/linux/concurrency.cc
new file mode 100644
index 000000000..7cd6a75bd
--- /dev/null
+++ b/test/syscalls/linux/concurrency.cc
@@ -0,0 +1,127 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+
+#include <atomic>
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/platform_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// Test that a thread that never yields to the OS does not prevent other threads
+// from running.
+TEST(ConcurrencyTest, SingleProcessMultithreaded) {
+  std::atomic<int> a(0);
+
+  ScopedThread t([&a]() {
+    while (!a.load()) {
+    }
+  });
+
+  absl::SleepFor(absl::Seconds(1));
+
+  // We are still able to execute code in this thread. The other hasn't
+  // permanently hung execution in both threads.
+  a.store(1);
+}
+
+// Test that multiple threads in this process continue to execute in parallel,
+// even if an unrelated second process is spawned. Regression test for
+// b/32119508.
+TEST(ConcurrencyTest, MultiProcessMultithreaded) {
+  // In PID 1, start TIDs 1 and 2, and put both to sleep.
+  //
+  // Start PID 3, which spins for 5 seconds, then exits.
+  //
+  // TIDs 1 and 2 wake and attempt to Activate, which cannot occur until PID 3
+  // exits.
+  //
+  // Both TIDs 1 and 2 should be woken. If they are not both woken, the test
+  // hangs.
+  //
+  // This is all fundamentally racy. If we are failing to wake all threads, the
+  // expectation is that this test becomes flaky, rather than consistently
+  // failing.
+  //
+  // If additional background threads fail to block, we may never schedule the
+  // child, at which point this test effectively becomes
+  // MultiProcessConcurrency. That's not expected to occur.
+
+  std::atomic<int> a(0);
+  ScopedThread t([&a]() {
+    // Block so that PID 3 can execute and we can wait on its exit.
+    absl::SleepFor(absl::Seconds(1));
+    while (!a.load()) {
+    }
+  });
+
+  pid_t child_pid = fork();
+  if (child_pid == 0) {
+    // Busy wait without making any blocking syscalls.
+    auto end = absl::Now() + absl::Seconds(5);
+    while (absl::Now() < end) {
+    }
+    _exit(0);
+  }
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  absl::SleepFor(absl::Seconds(1));
+
+  // If only TID 1 is woken, thread.Join will hang.
+  // If only TID 2 is woken, both will hang.
+  a.store(1);
+  t.Join();
+
+  int status = 0;
+  EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status));
+  EXPECT_EQ(WEXITSTATUS(status), 0);
+}
+
+// Test that multiple processes can execute concurrently, even if one process
+// never yields.
+TEST(ConcurrencyTest, MultiProcessConcurrency) {
+  SKIP_IF(PlatformSupportMultiProcess() == PlatformSupport::NotSupported);
+
+  pid_t child_pid = fork();
+  if (child_pid == 0) {
+    while (true) {
+    }
+  }
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  absl::SleepFor(absl::Seconds(5));
+
+  // We are still able to execute code in this process. The other hasn't
+  // permanently hung execution in both processes.
+  ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+  int status = 0;
+
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  ASSERT_TRUE(WIFSIGNALED(status));
+  ASSERT_EQ(WTERMSIG(status), SIGKILL);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/connect_external.cc b/test/syscalls/linux/connect_external.cc
new file mode 100644
index 000000000..1edb50e47
--- /dev/null
+++ b/test/syscalls/linux/connect_external.cc
@@ -0,0 +1,163 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <string>
+#include <tuple>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/test_util.h"
+
+// This file contains tests specific to connecting to host UDS managed outside
+// the sandbox / test.
+//
+// A set of ultity sockets will be created externally in $TEST_UDS_TREE and
+// $TEST_UDS_ATTACH_TREE for these tests to interact with.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+struct ProtocolSocket {
+  int protocol;
+  std::string name;
+};
+
+// Parameter is (socket root dir, ProtocolSocket).
+using GoferStreamSeqpacketTest =
+    ::testing::TestWithParam<std::tuple<std::string, ProtocolSocket>>;
+
+// Connect to a socket and verify that write/read work.
+//
+// An "echo" socket doesn't work for dgram sockets because our socket is
+// unnamed. The server thus has no way to reply to us.
+TEST_P(GoferStreamSeqpacketTest, Echo) {
+  std::string env;
+  ProtocolSocket proto;
+  std::tie(env, proto) = GetParam();
+
+  char* val = getenv(env.c_str());
+  ASSERT_NE(val, nullptr);
+  std::string root(val);
+
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, proto.protocol, 0));
+
+  std::string socket_path = JoinPath(root, proto.name, "echo");
+
+  struct sockaddr_un addr = {};
+  addr.sun_family = AF_UNIX;
+  memcpy(addr.sun_path, socket_path.c_str(), socket_path.length());
+
+  ASSERT_THAT(connect(sock.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                      sizeof(addr)),
+              SyscallSucceeds());
+
+  constexpr int kBufferSize = 64;
+  char send_buffer[kBufferSize];
+  memset(send_buffer, 'a', sizeof(send_buffer));
+
+  ASSERT_THAT(WriteFd(sock.get(), send_buffer, sizeof(send_buffer)),
+              SyscallSucceedsWithValue(sizeof(send_buffer)));
+
+  char recv_buffer[kBufferSize];
+  ASSERT_THAT(ReadFd(sock.get(), recv_buffer, sizeof(recv_buffer)),
+              SyscallSucceedsWithValue(sizeof(recv_buffer)));
+  ASSERT_EQ(0, memcmp(send_buffer, recv_buffer, sizeof(send_buffer)));
+}
+
+// It is not possible to connect to a bound but non-listening socket.
+TEST_P(GoferStreamSeqpacketTest, NonListening) {
+  std::string env;
+  ProtocolSocket proto;
+  std::tie(env, proto) = GetParam();
+
+  char* val = getenv(env.c_str());
+  ASSERT_NE(val, nullptr);
+  std::string root(val);
+
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, proto.protocol, 0));
+
+  std::string socket_path = JoinPath(root, proto.name, "nonlistening");
+
+  struct sockaddr_un addr = {};
+  addr.sun_family = AF_UNIX;
+  memcpy(addr.sun_path, socket_path.c_str(), socket_path.length());
+
+  ASSERT_THAT(connect(sock.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                      sizeof(addr)),
+              SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    StreamSeqpacket, GoferStreamSeqpacketTest,
+    ::testing::Combine(
+        // Test access via standard path and attach point.
+        ::testing::Values("TEST_UDS_TREE", "TEST_UDS_ATTACH_TREE"),
+        ::testing::Values(ProtocolSocket{SOCK_STREAM, "stream"},
+                          ProtocolSocket{SOCK_SEQPACKET, "seqpacket"})));
+
+// Parameter is socket root dir.
+using GoferDgramTest = ::testing::TestWithParam<std::string>;
+
+// Connect to a socket and verify that write works.
+//
+// An "echo" socket doesn't work for dgram sockets because our socket is
+// unnamed. The server thus has no way to reply to us.
+TEST_P(GoferDgramTest, Null) {
+  std::string env = GetParam();
+  char* val = getenv(env.c_str());
+  ASSERT_NE(val, nullptr);
+  std::string root(val);
+
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_DGRAM, 0));
+
+  std::string socket_path = JoinPath(root, "dgram/null");
+
+  struct sockaddr_un addr = {};
+  addr.sun_family = AF_UNIX;
+  memcpy(addr.sun_path, socket_path.c_str(), socket_path.length());
+
+  ASSERT_THAT(connect(sock.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                      sizeof(addr)),
+              SyscallSucceeds());
+
+  constexpr int kBufferSize = 64;
+  char send_buffer[kBufferSize];
+  memset(send_buffer, 'a', sizeof(send_buffer));
+
+  ASSERT_THAT(WriteFd(sock.get(), send_buffer, sizeof(send_buffer)),
+              SyscallSucceedsWithValue(sizeof(send_buffer)));
+}
+
+INSTANTIATE_TEST_SUITE_P(Dgram, GoferDgramTest,
+                         // Test access via standard path and attach point.
+                         ::testing::Values("TEST_UDS_TREE",
+                                           "TEST_UDS_ATTACH_TREE"));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/creat.cc b/test/syscalls/linux/creat.cc
new file mode 100644
index 000000000..3c270d6da
--- /dev/null
+++ b/test/syscalls/linux/creat.cc
@@ -0,0 +1,68 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr int kMode = 0666;
+
+TEST(CreatTest, CreatCreatesNewFile) {
+  std::string const path = NewTempAbsPath();
+  struct stat buf;
+  int fd;
+  ASSERT_THAT(stat(path.c_str(), &buf), SyscallFailsWithErrno(ENOENT));
+  ASSERT_THAT(fd = creat(path.c_str(), kMode), SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  EXPECT_THAT(stat(path.c_str(), &buf), SyscallSucceeds());
+}
+
+TEST(CreatTest, CreatTruncatesExistingFile) {
+  auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  int fd;
+  ASSERT_NO_ERRNO(SetContents(temp_path.path(), "non-empty"));
+  ASSERT_THAT(fd = creat(temp_path.path().c_str(), kMode), SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  std::string new_contents;
+  ASSERT_NO_ERRNO(GetContents(temp_path.path(), &new_contents));
+  EXPECT_EQ("", new_contents);
+}
+
+TEST(CreatTest, CreatWithNameTooLong) {
+  // Start with a unique name, and pad it to NAME_MAX + 1;
+  std::string name = NewTempRelPath();
+  int padding = (NAME_MAX + 1) - name.size();
+  name.append(padding, 'x');
+  const std::string& path = JoinPath(GetAbsoluteTestTmpdir(), name);
+
+  // Creation should return ENAMETOOLONG.
+  ASSERT_THAT(creat(path.c_str(), kMode), SyscallFailsWithErrno(ENAMETOOLONG));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc
new file mode 100644
index 000000000..3c88c4cbd
--- /dev/null
+++ b/test/syscalls/linux/dev.cc
@@ -0,0 +1,167 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(DevTest, LseekDevUrandom) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/urandom", O_RDONLY));
+  EXPECT_THAT(lseek(fd.get(), -10, SEEK_CUR), SyscallSucceeds());
+  EXPECT_THAT(lseek(fd.get(), -10, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+}
+
+TEST(DevTest, LseekDevNull) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+  EXPECT_THAT(lseek(fd.get(), -10, SEEK_CUR), SyscallSucceeds());
+  EXPECT_THAT(lseek(fd.get(), -10, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(DevTest, LseekDevZero) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(DevTest, LseekDevFull) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_RDONLY));
+  EXPECT_THAT(lseek(fd.get(), 123, SEEK_SET), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(lseek(fd.get(), 123, SEEK_CUR), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(lseek(fd.get(), 123, SEEK_END), SyscallSucceedsWithValue(0));
+}
+
+TEST(DevTest, LseekDevNullFreshFile) {
+  // Seeks to /dev/null always return 0.
+  const FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+  const FileDescriptor fd2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+
+  EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd3 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+  EXPECT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+}
+
+TEST(DevTest, OpenTruncate) {
+  // Truncation is ignored on linux and gvisor for device files.
+  ASSERT_NO_ERRNO_AND_VALUE(
+      Open("/dev/null", O_CREAT | O_TRUNC | O_WRONLY, 0644));
+  ASSERT_NO_ERRNO_AND_VALUE(
+      Open("/dev/zero", O_CREAT | O_TRUNC | O_WRONLY, 0644));
+  ASSERT_NO_ERRNO_AND_VALUE(
+      Open("/dev/full", O_CREAT | O_TRUNC | O_WRONLY, 0644));
+}
+
+TEST(DevTest, Pread64DevNull) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+  char buf[1];
+  EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(0));
+}
+
+TEST(DevTest, Pread64DevZero) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+  char buf[1];
+  EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(1));
+}
+
+TEST(DevTest, Pread64DevFull) {
+  // /dev/full behaves like /dev/zero with respect to reads.
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_RDONLY));
+  char buf[1];
+  EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(1));
+}
+
+TEST(DevTest, ReadDevNull) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+  std::vector<char> buf(1);
+  EXPECT_THAT(ReadFd(fd.get(), buf.data(), 1), SyscallSucceeds());
+}
+
+// Do not allow random save as it could lead to partial reads.
+TEST(DevTest, ReadDevZero_NoRandomSave) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+
+  constexpr int kReadSize = 128 * 1024;
+  std::vector<char> buf(kReadSize, 1);
+  EXPECT_THAT(ReadFd(fd.get(), buf.data(), kReadSize),
+              SyscallSucceedsWithValue(kReadSize));
+  EXPECT_EQ(std::vector<char>(kReadSize, 0), buf);
+}
+
+TEST(DevTest, WriteDevNull) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_WRONLY));
+  EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallSucceedsWithValue(1));
+}
+
+TEST(DevTest, WriteDevZero) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY));
+  EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallSucceedsWithValue(1));
+}
+
+TEST(DevTest, WriteDevFull) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_WRONLY));
+  EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallFailsWithErrno(ENOSPC));
+}
+
+TEST(DevTest, TTYExists) {
+  struct stat statbuf = {};
+  ASSERT_THAT(stat("/dev/tty", &statbuf), SyscallSucceeds());
+  // Check that it's a character device with rw-rw-rw- permissions.
+  EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666);
+}
+
+TEST(DevTest, OpenDevFuse) {
+  // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new
+  // device registration is complete.
+  SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor());
+
+  ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY));
+}
+
+}  // namespace
+}  // namespace testing
+
+}  // namespace gvisor
diff --git a/test/syscalls/linux/dup.cc b/test/syscalls/linux/dup.cc
new file mode 100644
index 000000000..4f773bc75
--- /dev/null
+++ b/test/syscalls/linux/dup.cc
@@ -0,0 +1,133 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/eventfd_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<FileDescriptor> Dup2(const FileDescriptor& fd, int target_fd) {
+  int new_fd = dup2(fd.get(), target_fd);
+  if (new_fd < 0) {
+    return PosixError(errno, "Dup2");
+  }
+  return FileDescriptor(new_fd);
+}
+
+PosixErrorOr<FileDescriptor> Dup3(const FileDescriptor& fd, int target_fd,
+                                  int flags) {
+  int new_fd = dup3(fd.get(), target_fd, flags);
+  if (new_fd < 0) {
+    return PosixError(errno, "Dup2");
+  }
+  return FileDescriptor(new_fd);
+}
+
+void CheckSameFile(const FileDescriptor& fd1, const FileDescriptor& fd2) {
+  struct stat stat_result1, stat_result2;
+  ASSERT_THAT(fstat(fd1.get(), &stat_result1), SyscallSucceeds());
+  ASSERT_THAT(fstat(fd2.get(), &stat_result2), SyscallSucceeds());
+  EXPECT_EQ(stat_result1.st_dev, stat_result2.st_dev);
+  EXPECT_EQ(stat_result1.st_ino, stat_result2.st_ino);
+}
+
+TEST(DupTest, Dup) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+  // Dup the descriptor and make sure it's the same file.
+  FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup());
+  ASSERT_NE(fd.get(), nfd.get());
+  CheckSameFile(fd, nfd);
+}
+
+TEST(DupTest, DupClearsCloExec) {
+  // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag set.
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_CLOEXEC));
+  EXPECT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+
+  // Duplicate the descriptor. Ensure that it doesn't have FD_CLOEXEC set.
+  FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup());
+  ASSERT_NE(fd.get(), nfd.get());
+  CheckSameFile(fd, nfd);
+  EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+}
+
+TEST(DupTest, Dup2) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+  // Regular dup once.
+  FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup());
+
+  ASSERT_NE(fd.get(), nfd.get());
+  CheckSameFile(fd, nfd);
+
+  // Dup over the file above.
+  int target_fd = nfd.release();
+  FileDescriptor nfd2 = ASSERT_NO_ERRNO_AND_VALUE(Dup2(fd, target_fd));
+  EXPECT_EQ(target_fd, nfd2.get());
+  CheckSameFile(fd, nfd2);
+}
+
+TEST(DupTest, Dup2SameFD) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+  // Should succeed.
+  ASSERT_THAT(dup2(fd.get(), fd.get()), SyscallSucceedsWithValue(fd.get()));
+}
+
+TEST(DupTest, Dup3) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+  // Regular dup once.
+  FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup());
+  ASSERT_NE(fd.get(), nfd.get());
+  CheckSameFile(fd, nfd);
+
+  // Dup over the file above, check that it has no CLOEXEC.
+  nfd = ASSERT_NO_ERRNO_AND_VALUE(Dup3(fd, nfd.release(), 0));
+  CheckSameFile(fd, nfd);
+  EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+
+  // Dup over the file again, check that it does not CLOEXEC.
+  nfd = ASSERT_NO_ERRNO_AND_VALUE(Dup3(fd, nfd.release(), O_CLOEXEC));
+  CheckSameFile(fd, nfd);
+  EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+}
+
+TEST(DupTest, Dup3FailsSameFD) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+  // Only dup3 fails if the new and old fd are the same.
+  ASSERT_THAT(dup3(fd.get(), fd.get(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc
new file mode 100644
index 000000000..f57d38dc7
--- /dev/null
+++ b/test/syscalls/linux/epoll.cc
@@ -0,0 +1,428 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/epoll_util.h"
+#include "test/util/eventfd_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr int kFDsPerEpoll = 3;
+constexpr uint64_t kMagicConstant = 0x0102030405060708;
+
+uint64_t ms_elapsed(const struct timespec* begin, const struct timespec* end) {
+  return (end->tv_sec - begin->tv_sec) * 1000 +
+         (end->tv_nsec - begin->tv_nsec) / 1000000;
+}
+
+TEST(EpollTest, AllWritable) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  std::vector<FileDescriptor> eventfds;
+  for (int i = 0; i < kFDsPerEpoll; i++) {
+    eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+    ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(),
+                                    EPOLLIN | EPOLLOUT, kMagicConstant + i));
+  }
+
+  struct epoll_event result[kFDsPerEpoll];
+  ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallSucceedsWithValue(kFDsPerEpoll));
+  for (int i = 0; i < kFDsPerEpoll; i++) {
+    ASSERT_EQ(result[i].events, EPOLLOUT);
+  }
+}
+
+TEST(EpollTest, LastReadable) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  std::vector<FileDescriptor> eventfds;
+  for (int i = 0; i < kFDsPerEpoll; i++) {
+    eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+    ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(),
+                                    EPOLLIN | EPOLLOUT, kMagicConstant + i));
+  }
+
+  uint64_t tmp = 1;
+  ASSERT_THAT(WriteFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)),
+              SyscallSucceedsWithValue(sizeof(tmp)));
+
+  struct epoll_event result[kFDsPerEpoll];
+  ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallSucceedsWithValue(kFDsPerEpoll));
+
+  int i;
+  for (i = 0; i < kFDsPerEpoll - 1; i++) {
+    EXPECT_EQ(result[i].events, EPOLLOUT);
+  }
+  EXPECT_EQ(result[i].events, EPOLLOUT | EPOLLIN);
+  EXPECT_EQ(result[i].data.u64, kMagicConstant + i);
+}
+
+TEST(EpollTest, LastNonWritable) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  std::vector<FileDescriptor> eventfds;
+  for (int i = 0; i < kFDsPerEpoll; i++) {
+    eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+    ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(),
+                                    EPOLLIN | EPOLLOUT, kMagicConstant + i));
+  }
+
+  // Write the maximum value to the event fd so that writing to it again would
+  // block.
+  uint64_t tmp = ULLONG_MAX - 1;
+  ASSERT_THAT(WriteFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)),
+              SyscallSucceedsWithValue(sizeof(tmp)));
+
+  struct epoll_event result[kFDsPerEpoll];
+  ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallSucceedsWithValue(kFDsPerEpoll));
+
+  int i;
+  for (i = 0; i < kFDsPerEpoll - 1; i++) {
+    EXPECT_EQ(result[i].events, EPOLLOUT);
+  }
+  EXPECT_EQ(result[i].events, EPOLLIN);
+  EXPECT_THAT(ReadFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)),
+              sizeof(tmp));
+  EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallSucceedsWithValue(kFDsPerEpoll));
+
+  for (i = 0; i < kFDsPerEpoll; i++) {
+    EXPECT_EQ(result[i].events, EPOLLOUT);
+  }
+}
+
+TEST(EpollTest, Timeout_NoRandomSave) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  std::vector<FileDescriptor> eventfds;
+  for (int i = 0; i < kFDsPerEpoll; i++) {
+    eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+    ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN,
+                                    kMagicConstant + i));
+  }
+
+  constexpr int kTimeoutMs = 200;
+  struct timespec begin;
+  struct timespec end;
+  struct epoll_event result[kFDsPerEpoll];
+
+  {
+    const DisableSave ds;  // Timing-related.
+    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds());
+
+    ASSERT_THAT(
+        RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, kTimeoutMs),
+        SyscallSucceedsWithValue(0));
+    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds());
+  }
+
+  // Check the lower bound on the timeout.  Checking for an upper bound is
+  // fragile because Linux can overrun the timeout due to scheduling delays.
+  EXPECT_GT(ms_elapsed(&begin, &end), kTimeoutMs - 1);
+}
+
+void* writer(void* arg) {
+  int fd = *reinterpret_cast<int*>(arg);
+  uint64_t tmp = 1;
+
+  usleep(200000);
+  if (WriteFd(fd, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+    fprintf(stderr, "writer failed: errno %s\n", strerror(errno));
+  }
+
+  return nullptr;
+}
+
+TEST(EpollTest, WaitThenUnblock) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  std::vector<FileDescriptor> eventfds;
+  for (int i = 0; i < kFDsPerEpoll; i++) {
+    eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+    ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN,
+                                    kMagicConstant + i));
+  }
+
+  // Fire off a thread that will make at least one of the event fds readable.
+  pthread_t thread;
+  int make_readable = eventfds[0].get();
+  ASSERT_THAT(pthread_create(&thread, nullptr, writer, &make_readable),
+              SyscallSucceedsWithValue(0));
+
+  struct epoll_event result[kFDsPerEpoll];
+  EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallSucceedsWithValue(1));
+  EXPECT_THAT(pthread_detach(thread), SyscallSucceeds());
+}
+
+void sighandler(int s) {}
+
+void* signaler(void* arg) {
+  pthread_t* t = reinterpret_cast<pthread_t*>(arg);
+  // Repeatedly send the real-time signal until we are detached, because it's
+  // difficult to know exactly when epoll_wait on another thread (which this
+  // is intending to interrupt) has started blocking.
+  while (1) {
+    usleep(200000);
+    pthread_kill(*t, SIGRTMIN);
+  }
+  return nullptr;
+}
+
+TEST(EpollTest, UnblockWithSignal) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  std::vector<FileDescriptor> eventfds;
+  for (int i = 0; i < kFDsPerEpoll; i++) {
+    eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+    ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN,
+                                    kMagicConstant + i));
+  }
+
+  signal(SIGRTMIN, sighandler);
+  // Unblock the real time signals that InitGoogle blocks :(
+  sigset_t unblock;
+  sigemptyset(&unblock);
+  sigaddset(&unblock, SIGRTMIN);
+  ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &unblock, nullptr), SyscallSucceeds());
+
+  pthread_t thread;
+  pthread_t cur = pthread_self();
+  ASSERT_THAT(pthread_create(&thread, nullptr, signaler, &cur),
+              SyscallSucceedsWithValue(0));
+
+  struct epoll_event result[kFDsPerEpoll];
+  EXPECT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallFailsWithErrno(EINTR));
+  EXPECT_THAT(pthread_cancel(thread), SyscallSucceeds());
+  EXPECT_THAT(pthread_detach(thread), SyscallSucceeds());
+}
+
+TEST(EpollTest, TimeoutNoFds) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  struct epoll_event result[kFDsPerEpoll];
+  EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+              SyscallSucceedsWithValue(0));
+}
+
+struct addr_ctx {
+  int epollfd;
+  int eventfd;
+};
+
+void* fd_adder(void* arg) {
+  struct addr_ctx* actx = reinterpret_cast<struct addr_ctx*>(arg);
+  struct epoll_event event;
+  event.events = EPOLLIN | EPOLLOUT;
+  event.data.u64 = 0xdeadbeeffacefeed;
+
+  usleep(200000);
+  if (epoll_ctl(actx->epollfd, EPOLL_CTL_ADD, actx->eventfd, &event) == -1) {
+    fprintf(stderr, "epoll_ctl failed: %s\n", strerror(errno));
+  }
+
+  return nullptr;
+}
+
+TEST(EpollTest, UnblockWithNewFD) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+
+  pthread_t thread;
+  struct addr_ctx actx = {epollfd.get(), eventfd.get()};
+  ASSERT_THAT(pthread_create(&thread, nullptr, fd_adder, &actx),
+              SyscallSucceedsWithValue(0));
+
+  struct epoll_event result[kFDsPerEpoll];
+  // Wait while no FDs are ready, but after 200ms fd_adder will add a ready FD
+  // to epoll which will wake us up.
+  EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallSucceedsWithValue(1));
+  EXPECT_THAT(pthread_detach(thread), SyscallSucceeds());
+  EXPECT_EQ(result[0].data.u64, 0xdeadbeeffacefeed);
+}
+
+TEST(EpollTest, Oneshot) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  std::vector<FileDescriptor> eventfds;
+  for (int i = 0; i < kFDsPerEpoll; i++) {
+    eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+    ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN,
+                                    kMagicConstant + i));
+  }
+
+  struct epoll_event event;
+  event.events = EPOLLOUT | EPOLLONESHOT;
+  event.data.u64 = kMagicConstant;
+  ASSERT_THAT(
+      epoll_ctl(epollfd.get(), EPOLL_CTL_MOD, eventfds[0].get(), &event),
+      SyscallSucceeds());
+
+  struct epoll_event result[kFDsPerEpoll];
+  // One-shot entry means that the first epoll_wait should succeed.
+  ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+  // One-shot entry means that the second epoll_wait should timeout.
+  EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST(EpollTest, EdgeTriggered_NoRandomSave) {
+  // Test edge-triggered entry: make it edge-triggered, first wait should
+  // return it, second one should time out, make it writable again, third wait
+  // should return it, fourth wait should timeout.
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+  ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfd.get(),
+                                  EPOLLOUT | EPOLLET, kMagicConstant));
+
+  struct epoll_event result[kFDsPerEpoll];
+
+  {
+    const DisableSave ds;  // May trigger spurious event.
+
+    // Edge-triggered entry means that the first epoll_wait should return the
+    // event.
+    ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1),
+                SyscallSucceedsWithValue(1));
+    EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+    // Edge-triggered entry means that the second epoll_wait should time out.
+    ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 100),
+                SyscallSucceedsWithValue(0));
+  }
+
+  uint64_t tmp = ULLONG_MAX - 1;
+
+  // Make an fd non-writable.
+  ASSERT_THAT(WriteFd(eventfd.get(), &tmp, sizeof(tmp)),
+              SyscallSucceedsWithValue(sizeof(tmp)));
+
+  // Make the same fd non-writable to trigger a change, which will trigger an
+  // edge-triggered event.
+  ASSERT_THAT(ReadFd(eventfd.get(), &tmp, sizeof(tmp)),
+              SyscallSucceedsWithValue(sizeof(tmp)));
+
+  {
+    const DisableSave ds;  // May trigger spurious event.
+
+    // An edge-triggered event should now be returned.
+    ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1),
+                SyscallSucceedsWithValue(1));
+    EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+    // The edge-triggered event had been consumed above, we don't expect to
+    // get it again.
+    ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 100),
+                SyscallSucceedsWithValue(0));
+  }
+}
+
+TEST(EpollTest, OneshotAndEdgeTriggered) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+  ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfd.get(),
+                                  EPOLLOUT | EPOLLET | EPOLLONESHOT,
+                                  kMagicConstant));
+
+  struct epoll_event result[kFDsPerEpoll];
+  // First time one shot edge-triggered entry means that epoll_wait should
+  // return the event.
+  ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+  // Edge-triggered entry means that the second epoll_wait should time out.
+  ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+              SyscallSucceedsWithValue(0));
+
+  uint64_t tmp = ULLONG_MAX - 1;
+  // Make an fd non-writable.
+  ASSERT_THAT(WriteFd(eventfd.get(), &tmp, sizeof(tmp)),
+              SyscallSucceedsWithValue(sizeof(tmp)));
+  // Make the same fd non-writable to trigger a change, which will not trigger
+  // an edge-triggered event because we've also included EPOLLONESHOT.
+  ASSERT_THAT(ReadFd(eventfd.get(), &tmp, sizeof(tmp)),
+              SyscallSucceedsWithValue(sizeof(tmp)));
+  ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST(EpollTest, CycleOfOneDisallowed) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+
+  struct epoll_event event;
+  event.events = EPOLLOUT;
+  event.data.u64 = kMagicConstant;
+
+  ASSERT_THAT(epoll_ctl(epollfd.get(), EPOLL_CTL_ADD, epollfd.get(), &event),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(EpollTest, CycleOfThreeDisallowed) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  auto epollfd1 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  auto epollfd2 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+
+  ASSERT_NO_ERRNO(
+      RegisterEpollFD(epollfd.get(), epollfd1.get(), EPOLLIN, kMagicConstant));
+  ASSERT_NO_ERRNO(
+      RegisterEpollFD(epollfd1.get(), epollfd2.get(), EPOLLIN, kMagicConstant));
+
+  struct epoll_event event;
+  event.events = EPOLLIN;
+  event.data.u64 = kMagicConstant;
+  EXPECT_THAT(epoll_ctl(epollfd2.get(), EPOLL_CTL_ADD, epollfd.get(), &event),
+              SyscallFailsWithErrno(ELOOP));
+}
+
+TEST(EpollTest, CloseFile) {
+  auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+  ASSERT_NO_ERRNO(
+      RegisterEpollFD(epollfd.get(), eventfd.get(), EPOLLOUT, kMagicConstant));
+
+  struct epoll_event result[kFDsPerEpoll];
+  ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+  // Close the event fd early.
+  eventfd.reset();
+
+  EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+              SyscallSucceedsWithValue(0));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc
new file mode 100644
index 000000000..dc794415e
--- /dev/null
+++ b/test/syscalls/linux/eventfd.cc
@@ -0,0 +1,222 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/epoll_util.h"
+#include "test/util/eventfd_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(EventfdTest, Nonblock) {
+  FileDescriptor efd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
+
+  uint64_t l;
+  ASSERT_THAT(read(efd.get(), &l, sizeof(l)), SyscallFailsWithErrno(EAGAIN));
+
+  l = 1;
+  ASSERT_THAT(write(efd.get(), &l, sizeof(l)), SyscallSucceeds());
+
+  l = 0;
+  ASSERT_THAT(read(efd.get(), &l, sizeof(l)), SyscallSucceeds());
+  EXPECT_EQ(l, 1);
+
+  ASSERT_THAT(read(efd.get(), &l, sizeof(l)), SyscallFailsWithErrno(EAGAIN));
+}
+
+void* read_three_times(void* arg) {
+  int efd = *reinterpret_cast<int*>(arg);
+  uint64_t l;
+  EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l)));
+  EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l)));
+  EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l)));
+  return nullptr;
+}
+
+TEST(EventfdTest, BlockingWrite) {
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_SEMAPHORE));
+  int efd = fd.get();
+
+  pthread_t p;
+  ASSERT_THAT(pthread_create(&p, nullptr, read_three_times,
+                             reinterpret_cast<void*>(&efd)),
+              SyscallSucceeds());
+
+  uint64_t l = 1;
+  ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds());
+  EXPECT_EQ(l, 1);
+
+  ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds());
+  EXPECT_EQ(l, 1);
+
+  ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds());
+  EXPECT_EQ(l, 1);
+
+  ASSERT_THAT(pthread_join(p, nullptr), SyscallSucceeds());
+}
+
+TEST(EventfdTest, SmallWrite) {
+  FileDescriptor efd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
+
+  uint64_t l = 16;
+  ASSERT_THAT(write(efd.get(), &l, 4), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(EventfdTest, SmallRead) {
+  FileDescriptor efd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
+
+  uint64_t l = 1;
+  ASSERT_THAT(write(efd.get(), &l, sizeof(l)), SyscallSucceeds());
+
+  l = 0;
+  ASSERT_THAT(read(efd.get(), &l, 4), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(EventfdTest, IllegalSeek) {
+  FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0));
+  EXPECT_THAT(lseek(efd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST(EventfdTest, IllegalPread) {
+  FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0));
+  int l;
+  EXPECT_THAT(pread(efd.get(), &l, sizeof(l), 0),
+              SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST(EventfdTest, IllegalPwrite) {
+  FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0));
+  EXPECT_THAT(pwrite(efd.get(), "x", 1, 0), SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST(EventfdTest, BigWrite) {
+  FileDescriptor efd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
+
+  uint64_t big[16];
+  big[0] = 16;
+  ASSERT_THAT(write(efd.get(), big, sizeof(big)), SyscallSucceeds());
+}
+
+TEST(EventfdTest, BigRead) {
+  FileDescriptor efd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
+
+  uint64_t l = 1;
+  ASSERT_THAT(write(efd.get(), &l, sizeof(l)), SyscallSucceeds());
+
+  uint64_t big[16];
+  ASSERT_THAT(read(efd.get(), big, sizeof(big)), SyscallSucceeds());
+  EXPECT_EQ(big[0], 1);
+}
+
+TEST(EventfdTest, BigWriteBigRead) {
+  FileDescriptor efd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
+
+  uint64_t l[16];
+  l[0] = 16;
+  ASSERT_THAT(write(efd.get(), l, sizeof(l)), SyscallSucceeds());
+  ASSERT_THAT(read(efd.get(), l, sizeof(l)), SyscallSucceeds());
+  EXPECT_EQ(l[0], 1);
+}
+
+TEST(EventfdTest, SpliceFromPipePartialSucceeds) {
+  int pipes[2];
+  ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds());
+  const FileDescriptor pipe_rfd(pipes[0]);
+  const FileDescriptor pipe_wfd(pipes[1]);
+  constexpr uint64_t kVal{1};
+
+  FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK));
+
+  uint64_t event_array[2];
+  event_array[0] = kVal;
+  event_array[1] = kVal;
+  ASSERT_THAT(write(pipe_wfd.get(), event_array, sizeof(event_array)),
+              SyscallSucceedsWithValue(sizeof(event_array)));
+  EXPECT_THAT(splice(pipe_rfd.get(), /*__offin=*/nullptr, efd.get(),
+                     /*__offout=*/nullptr, sizeof(event_array[0]) + 1,
+                     SPLICE_F_NONBLOCK),
+              SyscallSucceedsWithValue(sizeof(event_array[0])));
+
+  uint64_t val;
+  ASSERT_THAT(read(efd.get(), &val, sizeof(val)),
+              SyscallSucceedsWithValue(sizeof(val)));
+  EXPECT_EQ(val, kVal);
+}
+
+// NotifyNonZero is inherently racy, so random save is disabled.
+TEST(EventfdTest, NotifyNonZero_NoRandomSave) {
+  // Waits will time out at 10 seconds.
+  constexpr int kEpollTimeoutMs = 10000;
+  // Create an eventfd descriptor.
+  FileDescriptor efd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(7, EFD_NONBLOCK | EFD_SEMAPHORE));
+  // Create an epoll fd to listen to efd.
+  FileDescriptor epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+  // Add efd to epoll.
+  ASSERT_NO_ERRNO(
+      RegisterEpollFD(epollfd.get(), efd.get(), EPOLLIN | EPOLLET, efd.get()));
+
+  // Use epoll to get a value from efd.
+  struct epoll_event out_ev;
+  int wait_out = epoll_wait(epollfd.get(), &out_ev, 1, kEpollTimeoutMs);
+  EXPECT_EQ(wait_out, 1);
+  EXPECT_EQ(efd.get(), out_ev.data.fd);
+  uint64_t val = 0;
+  ASSERT_THAT(read(efd.get(), &val, sizeof(val)), SyscallSucceeds());
+  EXPECT_EQ(val, 1);
+
+  // Start a thread that, after this thread blocks on epoll_wait, will write to
+  // efd. This is racy -- it's possible that this write will happen after
+  // epoll_wait times out.
+  ScopedThread t([&efd] {
+    sleep(5);
+    uint64_t val = 1;
+    EXPECT_THAT(write(efd.get(), &val, sizeof(val)),
+                SyscallSucceedsWithValue(sizeof(val)));
+  });
+
+  // epoll_wait should return once the thread writes.
+  wait_out = epoll_wait(epollfd.get(), &out_ev, 1, kEpollTimeoutMs);
+  EXPECT_EQ(wait_out, 1);
+  EXPECT_EQ(efd.get(), out_ev.data.fd);
+
+  val = 0;
+  ASSERT_THAT(read(efd.get(), &val, sizeof(val)), SyscallSucceeds());
+  EXPECT_EQ(val, 1);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/exceptions.cc b/test/syscalls/linux/exceptions.cc
new file mode 100644
index 000000000..420b9543f
--- /dev/null
+++ b/test/syscalls/linux/exceptions.cc
@@ -0,0 +1,367 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/platform_util.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Default value for the x87 FPU control word. See Intel SDM Vol 1, Ch 8.1.5
+// "x87 FPU Control Word".
+constexpr uint16_t kX87ControlWordDefault = 0x37f;
+
+// Mask for the divide-by-zero exception.
+constexpr uint16_t kX87ControlWordDiv0Mask = 1 << 2;
+
+// Default value for the SSE control register (MXCSR). See Intel SDM Vol 1, Ch
+// 11.6.4 "Initialization of SSE/SSE3 Extensions".
+constexpr uint32_t kMXCSRDefault = 0x1f80;
+
+// Mask for the divide-by-zero exception.
+constexpr uint32_t kMXCSRDiv0Mask = 1 << 9;
+
+// Flag for a pending divide-by-zero exception.
+constexpr uint32_t kMXCSRDiv0Flag = 1 << 2;
+
+void inline Halt() { asm("hlt\r\n"); }
+
+void inline SetAlignmentCheck() {
+  asm("subq $128, %%rsp\r\n"  // Avoid potential red zone clobber
+      "pushf\r\n"
+      "pop %%rax\r\n"
+      "or $0x40000, %%rax\r\n"
+      "push %%rax\r\n"
+      "popf\r\n"
+      "addq $128, %%rsp\r\n"
+      :
+      :
+      : "ax");
+}
+
+void inline ClearAlignmentCheck() {
+  asm("subq $128, %%rsp\r\n"  // Avoid potential red zone clobber
+      "pushf\r\n"
+      "pop %%rax\r\n"
+      "mov $0x40000, %%rbx\r\n"
+      "not %%rbx\r\n"
+      "and %%rbx, %%rax\r\n"
+      "push %%rax\r\n"
+      "popf\r\n"
+      "addq $128, %%rsp\r\n"
+      :
+      :
+      : "ax", "bx");
+}
+
+void inline Int3Normal() { asm(".byte 0xcd, 0x03\r\n"); }
+
+void inline Int3Compact() { asm(".byte 0xcc\r\n"); }
+
+void InIOHelper(int width, int value) {
+  EXPECT_EXIT(
+      {
+        switch (width) {
+          case 1:
+            asm volatile("inb %%dx, %%al" ::"d"(value) : "%eax");
+            break;
+          case 2:
+            asm volatile("inw %%dx, %%ax" ::"d"(value) : "%eax");
+            break;
+          case 4:
+            asm volatile("inl %%dx, %%eax" ::"d"(value) : "%eax");
+            break;
+          default:
+            FAIL() << "invalid input width, only 1, 2 or 4 is allowed";
+        }
+      },
+      ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+TEST(ExceptionTest, Halt) {
+  // In order to prevent the regular handler from messing with things (and
+  // perhaps refaulting until some other signal occurs), we reset the handler to
+  // the default action here and ensure that it dies correctly.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+  EXPECT_EXIT(Halt(), ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+TEST(ExceptionTest, DivideByZero) {
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa));
+
+  EXPECT_EXIT(
+      {
+        uint32_t remainder;
+        uint32_t quotient;
+        uint32_t divisor = 0;
+        uint64_t value = 1;
+        asm("divl 0(%2)\r\n"
+            : "=d"(remainder), "=a"(quotient)
+            : "r"(&divisor), "d"(value >> 32), "a"(value));
+        TEST_CHECK(quotient > 0);  // Force dependency.
+      },
+      ::testing::KilledBySignal(SIGFPE), "");
+}
+
+// By default, x87 exceptions are masked and simply return a default value.
+TEST(ExceptionTest, X87DivideByZeroMasked) {
+  int32_t quotient;
+  int32_t value = 1;
+  int32_t divisor = 0;
+  asm("fildl %[value]\r\n"
+      "fidivl %[divisor]\r\n"
+      "fistpl %[quotient]\r\n"
+      : [ quotient ] "=m"(quotient)
+      : [ value ] "m"(value), [ divisor ] "m"(divisor));
+
+  EXPECT_EQ(quotient, INT32_MIN);
+}
+
+// When unmasked, division by zero raises SIGFPE.
+TEST(ExceptionTest, X87DivideByZeroUnmasked) {
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa));
+
+  EXPECT_EXIT(
+      {
+        // Clear the divide by zero exception mask.
+        constexpr uint16_t kControlWord =
+            kX87ControlWordDefault & ~kX87ControlWordDiv0Mask;
+
+        int32_t quotient;
+        int32_t value = 1;
+        int32_t divisor = 0;
+        asm volatile(
+            "fldcw %[cw]\r\n"
+            "fildl %[value]\r\n"
+            "fidivl %[divisor]\r\n"
+            "fistpl %[quotient]\r\n"
+            : [ quotient ] "=m"(quotient)
+            : [ cw ] "m"(kControlWord), [ value ] "m"(value),
+              [ divisor ] "m"(divisor));
+      },
+      ::testing::KilledBySignal(SIGFPE), "");
+}
+
+// Pending exceptions in the x87 status register are not clobbered by syscalls.
+TEST(ExceptionTest, X87StatusClobber) {
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa));
+
+  EXPECT_EXIT(
+      {
+        // Clear the divide by zero exception mask.
+        constexpr uint16_t kControlWord =
+            kX87ControlWordDefault & ~kX87ControlWordDiv0Mask;
+
+        int32_t quotient;
+        int32_t value = 1;
+        int32_t divisor = 0;
+        asm volatile(
+            "fildl %[value]\r\n"
+            "fidivl %[divisor]\r\n"
+            // Exception is masked, so it does not occur here.
+            "fistpl %[quotient]\r\n"
+
+            // SYS_getpid placed in rax by constraint.
+            "syscall\r\n"
+
+            // Unmask exception. The syscall didn't clobber the pending
+            // exception, so now it can be raised.
+            //
+            // N.B. "a floating-point exception will be generated upon execution
+            // of the *next* floating-point instruction".
+            "fldcw %[cw]\r\n"
+            "fwait\r\n"
+            : [ quotient ] "=m"(quotient)
+            : [ value ] "m"(value), [ divisor ] "m"(divisor), "a"(SYS_getpid),
+              [ cw ] "m"(kControlWord)
+            : "rcx", "r11");
+      },
+      ::testing::KilledBySignal(SIGFPE), "");
+}
+
+// By default, SSE exceptions are masked and simply return a default value.
+TEST(ExceptionTest, SSEDivideByZeroMasked) {
+  uint32_t status;
+  int32_t quotient;
+  int32_t value = 1;
+  int32_t divisor = 0;
+  asm("cvtsi2ssl %[value], %%xmm0\r\n"
+      "cvtsi2ssl %[divisor], %%xmm1\r\n"
+      "divss %%xmm1, %%xmm0\r\n"
+      "cvtss2sil %%xmm0, %[quotient]\r\n"
+      : [ quotient ] "=r"(quotient), [ status ] "=r"(status)
+      : [ value ] "r"(value), [ divisor ] "r"(divisor)
+      : "xmm0", "xmm1");
+
+  EXPECT_EQ(quotient, INT32_MIN);
+}
+
+// When unmasked, division by zero raises SIGFPE.
+TEST(ExceptionTest, SSEDivideByZeroUnmasked) {
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa));
+
+  EXPECT_EXIT(
+      {
+        // Clear the divide by zero exception mask.
+        constexpr uint32_t kMXCSR = kMXCSRDefault & ~kMXCSRDiv0Mask;
+
+        int32_t quotient;
+        int32_t value = 1;
+        int32_t divisor = 0;
+        asm volatile(
+            "ldmxcsr %[mxcsr]\r\n"
+            "cvtsi2ssl %[value], %%xmm0\r\n"
+            "cvtsi2ssl %[divisor], %%xmm1\r\n"
+            "divss %%xmm1, %%xmm0\r\n"
+            "cvtss2sil %%xmm0, %[quotient]\r\n"
+            : [ quotient ] "=r"(quotient)
+            : [ mxcsr ] "m"(kMXCSR), [ value ] "r"(value),
+              [ divisor ] "r"(divisor)
+            : "xmm0", "xmm1");
+      },
+      ::testing::KilledBySignal(SIGFPE), "");
+}
+
+// Pending exceptions in the SSE status register are not clobbered by syscalls.
+TEST(ExceptionTest, SSEStatusClobber) {
+  uint32_t mxcsr;
+  int32_t quotient;
+  int32_t value = 1;
+  int32_t divisor = 0;
+  asm("cvtsi2ssl %[value], %%xmm0\r\n"
+      "cvtsi2ssl %[divisor], %%xmm1\r\n"
+      "divss %%xmm1, %%xmm0\r\n"
+      // Exception is masked, so it does not occur here.
+      "cvtss2sil %%xmm0, %[quotient]\r\n"
+
+      // SYS_getpid placed in rax by constraint.
+      "syscall\r\n"
+
+      // Intel SDM Vol 1, Ch 10.2.3.1 "SIMD Floating-Point Mask and Flag Bits":
+      // "If LDMXCSR or FXRSTOR clears a mask bit and sets the corresponding
+      // exception flag bit, a SIMD floating-point exception will not be
+      // generated as a result of this change. The unmasked exception will be
+      // generated only upon the execution of the next SSE/SSE2/SSE3 instruction
+      // that detects the unmasked exception condition."
+      //
+      // Though ambiguous, empirical evidence indicates that this means that
+      // exception flags set in the status register will never cause an
+      // exception to be raised; only a new exception condition will do so.
+      //
+      // Thus here we just check for the flag itself rather than trying to raise
+      // the exception.
+      "stmxcsr %[mxcsr]\r\n"
+      : [ quotient ] "=r"(quotient), [ mxcsr ] "+m"(mxcsr)
+      : [ value ] "r"(value), [ divisor ] "r"(divisor), "a"(SYS_getpid)
+      : "xmm0", "xmm1", "rcx", "r11");
+
+  EXPECT_TRUE(mxcsr & kMXCSRDiv0Flag);
+}
+
+TEST(ExceptionTest, IOAccessFault) {
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+  InIOHelper(1, 0x0);
+  InIOHelper(2, 0x7);
+  InIOHelper(4, 0x6);
+  InIOHelper(1, 0xffff);
+  InIOHelper(2, 0xffff);
+  InIOHelper(4, 0xfffd);
+}
+
+TEST(ExceptionTest, Alignment) {
+  SetAlignmentCheck();
+  ClearAlignmentCheck();
+}
+
+TEST(ExceptionTest, AlignmentHalt) {
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+  // Reported upstream. We need to ensure that bad flags are cleared even in
+  // fault paths. Set the alignment flag and then generate an exception.
+  EXPECT_EXIT(
+      {
+        SetAlignmentCheck();
+        Halt();
+      },
+      ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+TEST(ExceptionTest, AlignmentCheck) {
+  SKIP_IF(PlatformSupportAlignmentCheck() != PlatformSupport::Allowed);
+
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGBUS, sa));
+
+  EXPECT_EXIT(
+      {
+        char array[16];
+        SetAlignmentCheck();
+        for (int i = 0; i < 8; i++) {
+          // At least 7/8 offsets will be unaligned here.
+          uint64_t* ptr = reinterpret_cast<uint64_t*>(&array[i]);
+          asm("mov %0, 0(%0)\r\n" : : "r"(ptr) : "ax");
+        }
+      },
+      ::testing::KilledBySignal(SIGBUS), "");
+}
+
+TEST(ExceptionTest, Int3Normal) {
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGTRAP, sa));
+
+  EXPECT_EXIT(Int3Normal(), ::testing::KilledBySignal(SIGTRAP), "");
+}
+
+TEST(ExceptionTest, Int3Compact) {
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_DFL;
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGTRAP, sa));
+
+  EXPECT_EXIT(Int3Compact(), ::testing::KilledBySignal(SIGTRAP), "");
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc
new file mode 100644
index 000000000..c5acfc794
--- /dev/null
+++ b/test/syscalls/linux/exec.cc
@@ -0,0 +1,904 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/exec.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/types/optional.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr char kBasicWorkload[] = "test/syscalls/linux/exec_basic_workload";
+constexpr char kExitScript[] = "test/syscalls/linux/exit_script";
+constexpr char kStateWorkload[] = "test/syscalls/linux/exec_state_workload";
+constexpr char kProcExeWorkload[] =
+    "test/syscalls/linux/exec_proc_exe_workload";
+constexpr char kAssertClosedWorkload[] =
+    "test/syscalls/linux/exec_assert_closed_workload";
+constexpr char kPriorityWorkload[] = "test/syscalls/linux/priority_execve";
+
+constexpr char kExit42[] = "--exec_exit_42";
+constexpr char kExecWithThread[] = "--exec_exec_with_thread";
+constexpr char kExecFromThread[] = "--exec_exec_from_thread";
+
+// Runs file specified by dirfd and pathname with argv and checks that the exit
+// status is expect_status and that stderr contains expect_stderr.
+void CheckExecHelper(const absl::optional<int32_t> dirfd,
+                     const std::string& pathname, const ExecveArray& argv,
+                     const ExecveArray& envv, const int flags,
+                     int expect_status, const std::string& expect_stderr) {
+  int pipe_fds[2];
+  ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds());
+
+  FileDescriptor read_fd(pipe_fds[0]);
+  FileDescriptor write_fd(pipe_fds[1]);
+
+  pid_t child;
+  int execve_errno;
+
+  const auto remap_stderr = [pipe_fds] {
+    // Remap stdin and stdout to /dev/null.
+    int fd = open("/dev/null", O_RDWR | O_CLOEXEC);
+    if (fd < 0) {
+      _exit(errno);
+    }
+
+    int ret = dup2(fd, 0);
+    if (ret < 0) {
+      _exit(errno);
+    }
+
+    ret = dup2(fd, 1);
+    if (ret < 0) {
+      _exit(errno);
+    }
+
+    // And stderr to the pipe.
+    ret = dup2(pipe_fds[1], 2);
+    if (ret < 0) {
+      _exit(errno);
+    }
+
+    // Here, we'd ideally close all other FDs inherited from the parent.
+    // However, that's not worth the effort and CloexecNormalFile and
+    // CloexecEventfd depend on that not happening.
+  };
+
+  Cleanup kill;
+  if (dirfd.has_value()) {
+    kill = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(*dirfd, pathname, argv,
+                                                     envv, flags, remap_stderr,
+                                                     &child, &execve_errno));
+  } else {
+    kill = ASSERT_NO_ERRNO_AND_VALUE(
+        ForkAndExec(pathname, argv, envv, remap_stderr, &child, &execve_errno));
+  }
+
+  ASSERT_EQ(0, execve_errno);
+
+  // Not needed anymore.
+  write_fd.reset();
+
+  // Read stderr until the child exits.
+  std::string output;
+  constexpr int kSize = 128;
+  char buf[kSize];
+  int n;
+  do {
+    ASSERT_THAT(n = ReadFd(read_fd.get(), buf, kSize), SyscallSucceeds());
+    if (n > 0) {
+      output.append(buf, n);
+    }
+  } while (n > 0);
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds());
+  EXPECT_EQ(status, expect_status);
+
+  // Process cleanup no longer needed.
+  kill.Release();
+
+  EXPECT_TRUE(absl::StrContains(output, expect_stderr)) << output;
+}
+
+void CheckExec(const std::string& filename, const ExecveArray& argv,
+               const ExecveArray& envv, int expect_status,
+               const std::string& expect_stderr) {
+  CheckExecHelper(/*dirfd=*/absl::optional<int32_t>(), filename, argv, envv,
+                  /*flags=*/0, expect_status, expect_stderr);
+}
+
+void CheckExecveat(const int32_t dirfd, const std::string& pathname,
+                   const ExecveArray& argv, const ExecveArray& envv,
+                   const int flags, int expect_status,
+                   const std::string& expect_stderr) {
+  CheckExecHelper(absl::optional<int32_t>(dirfd), pathname, argv, envv, flags,
+                  expect_status, expect_stderr);
+}
+
+TEST(ExecTest, EmptyPath) {
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec("", {}, {}, nullptr, &execve_errno));
+  EXPECT_EQ(execve_errno, ENOENT);
+}
+
+TEST(ExecTest, Basic) {
+  CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload)}, {},
+            ArgEnvExitStatus(0, 0),
+            absl::StrCat(RunfilePath(kBasicWorkload), "\n"));
+}
+
+TEST(ExecTest, OneArg) {
+  CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload), "1"}, {},
+            ArgEnvExitStatus(1, 0),
+            absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n"));
+}
+
+TEST(ExecTest, FiveArg) {
+  CheckExec(RunfilePath(kBasicWorkload),
+            {RunfilePath(kBasicWorkload), "1", "2", "3", "4", "5"}, {},
+            ArgEnvExitStatus(5, 0),
+            absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n2\n3\n4\n5\n"));
+}
+
+TEST(ExecTest, OneEnv) {
+  CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload)}, {"1"},
+            ArgEnvExitStatus(0, 1),
+            absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n"));
+}
+
+TEST(ExecTest, FiveEnv) {
+  CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload)},
+            {"1", "2", "3", "4", "5"}, ArgEnvExitStatus(0, 5),
+            absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n2\n3\n4\n5\n"));
+}
+
+TEST(ExecTest, OneArgOneEnv) {
+  CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload), "arg"},
+            {"env"}, ArgEnvExitStatus(1, 1),
+            absl::StrCat(RunfilePath(kBasicWorkload), "\narg\nenv\n"));
+}
+
+TEST(ExecTest, InterpreterScript) {
+  CheckExec(RunfilePath(kExitScript), {RunfilePath(kExitScript), "25"}, {},
+            ArgEnvExitStatus(25, 0), "");
+}
+
+// Everything after the path in the interpreter script is a single argument.
+TEST(ExecTest, InterpreterScriptArgSplit) {
+  // Symlink through /tmp to ensure the path is short enough.
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload)));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " foo bar"),
+      0755));
+
+  CheckExec(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0),
+            absl::StrCat(link.path(), "\nfoo bar\n", script.path(), "\n"));
+}
+
+// Original argv[0] is replaced with the script path.
+TEST(ExecTest, InterpreterScriptArgvZero) {
+  // Symlink through /tmp to ensure the path is short enough.
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload)));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755));
+
+  CheckExec(script.path(), {"REPLACED"}, {}, ArgEnvExitStatus(1, 0),
+            absl::StrCat(link.path(), "\n", script.path(), "\n"));
+}
+
+// Original argv[0] is replaced with the script path, exactly as passed to
+// execve.
+TEST(ExecTest, InterpreterScriptArgvZeroRelative) {
+  // Symlink through /tmp to ensure the path is short enough.
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload)));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755));
+
+  auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD());
+  auto script_relative =
+      ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, script.path()));
+
+  CheckExec(script_relative, {"REPLACED"}, {}, ArgEnvExitStatus(1, 0),
+            absl::StrCat(link.path(), "\n", script_relative, "\n"));
+}
+
+// argv[0] is added as the script path, even if there was none.
+TEST(ExecTest, InterpreterScriptArgvZeroAdded) {
+  // Symlink through /tmp to ensure the path is short enough.
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload)));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755));
+
+  CheckExec(script.path(), {}, {}, ArgEnvExitStatus(1, 0),
+            absl::StrCat(link.path(), "\n", script.path(), "\n"));
+}
+
+// A NUL byte in the script line ends parsing.
+TEST(ExecTest, InterpreterScriptArgNUL) {
+  // Symlink through /tmp to ensure the path is short enough.
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload)));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(),
+      absl::StrCat("#!", link.path(), " foo", std::string(1, '\0'), "bar"),
+      0755));
+
+  CheckExec(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0),
+            absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n"));
+}
+
+// Trailing whitespace following interpreter path is ignored.
+TEST(ExecTest, InterpreterScriptTrailingWhitespace) {
+  // Symlink through /tmp to ensure the path is short enough.
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload)));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), "  "), 0755));
+
+  CheckExec(script.path(), {script.path()}, {}, ArgEnvExitStatus(1, 0),
+            absl::StrCat(link.path(), "\n", script.path(), "\n"));
+}
+
+// Multiple whitespace characters between interpreter and arg allowed.
+TEST(ExecTest, InterpreterScriptArgWhitespace) {
+  // Symlink through /tmp to ensure the path is short enough.
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload)));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), "  foo"), 0755));
+
+  CheckExec(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0),
+            absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n"));
+}
+
+TEST(ExecTest, InterpreterScriptNoPath) {
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "#!", 0755));
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script.path(), {script.path()}, {}, nullptr, &execve_errno));
+  EXPECT_EQ(execve_errno, ENOEXEC);
+}
+
+// AT_EXECFN is the path passed to execve.
+TEST(ExecTest, ExecFn) {
+  // Symlink through /tmp to ensure the path is short enough.
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kStateWorkload)));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " PrintExecFn"),
+      0755));
+
+  // Pass the script as a relative path and assert that is what appears in
+  // AT_EXECFN.
+  auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD());
+  auto script_relative =
+      ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, script.path()));
+
+  CheckExec(script_relative, {script_relative}, {}, ArgEnvExitStatus(0, 0),
+            absl::StrCat(script_relative, "\n"));
+}
+
+TEST(ExecTest, ExecName) {
+  std::string path = RunfilePath(kStateWorkload);
+
+  CheckExec(path, {path, "PrintExecName"}, {}, ArgEnvExitStatus(0, 0),
+            absl::StrCat(Basename(path).substr(0, 15), "\n"));
+}
+
+TEST(ExecTest, ExecNameScript) {
+  // Symlink through /tmp to ensure the path is short enough.
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kStateWorkload)));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(),
+      absl::StrCat("#!", link.path(), " PrintExecName"), 0755));
+
+  std::string script_path = script.path();
+
+  CheckExec(script_path, {script_path}, {}, ArgEnvExitStatus(0, 0),
+            absl::StrCat(Basename(script_path).substr(0, 15), "\n"));
+}
+
+// execve may be called by a multithreaded process.
+TEST(ExecTest, WithSiblingThread) {
+  CheckExec("/proc/self/exe", {"/proc/self/exe", kExecWithThread}, {},
+            W_EXITCODE(42, 0), "");
+}
+
+// execve may be called from a thread other than the leader of a multithreaded
+// process.
+TEST(ExecTest, FromSiblingThread) {
+  CheckExec("/proc/self/exe", {"/proc/self/exe", kExecFromThread}, {},
+            W_EXITCODE(42, 0), "");
+}
+
+TEST(ExecTest, NotFound) {
+  char* const argv[] = {nullptr};
+  char* const envp[] = {nullptr};
+  EXPECT_THAT(execve("/file/does/not/exist", argv, envp),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(ExecTest, NoExecPerm) {
+  char* const argv[] = {nullptr};
+  char* const envp[] = {nullptr};
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  EXPECT_THAT(execve(f.path().c_str(), argv, envp),
+              SyscallFailsWithErrno(EACCES));
+}
+
+// A signal handler we never expect to be called.
+void SignalHandler(int signo) {
+  std::cerr << "Signal " << signo << " raised." << std::endl;
+  exit(1);
+}
+
+// Signal handlers are reset on execve(2), unless they have default or ignored
+// disposition.
+TEST(ExecStateTest, HandlerReset) {
+  struct sigaction sa;
+  sa.sa_handler = SignalHandler;
+  ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+  ExecveArray args = {
+      RunfilePath(kStateWorkload),
+      "CheckSigHandler",
+      absl::StrCat(SIGUSR1),
+      absl::StrCat(absl::Hex(reinterpret_cast<uintptr_t>(SIG_DFL))),
+  };
+
+  CheckExec(RunfilePath(kStateWorkload), args, {}, W_EXITCODE(0, 0), "");
+}
+
+// Ignored signal dispositions are not reset.
+TEST(ExecStateTest, IgnorePreserved) {
+  struct sigaction sa;
+  sa.sa_handler = SIG_IGN;
+  ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+  ExecveArray args = {
+      RunfilePath(kStateWorkload),
+      "CheckSigHandler",
+      absl::StrCat(SIGUSR1),
+      absl::StrCat(absl::Hex(reinterpret_cast<uintptr_t>(SIG_IGN))),
+  };
+
+  CheckExec(RunfilePath(kStateWorkload), args, {}, W_EXITCODE(0, 0), "");
+}
+
+// Signal masks are not reset on exec
+TEST(ExecStateTest, SignalMask) {
+  sigset_t s;
+  sigemptyset(&s);
+  sigaddset(&s, SIGUSR1);
+  ASSERT_THAT(sigprocmask(SIG_BLOCK, &s, nullptr), SyscallSucceeds());
+
+  ExecveArray args = {
+      RunfilePath(kStateWorkload),
+      "CheckSigBlocked",
+      absl::StrCat(SIGUSR1),
+  };
+
+  CheckExec(RunfilePath(kStateWorkload), args, {}, W_EXITCODE(0, 0), "");
+}
+
+// itimers persist across execve.
+// N.B. Timers created with timer_create(2) should not be preserved!
+TEST(ExecStateTest, ItimerPreserved) {
+  // The fork in ForkAndExec clears itimers, so only set them up after fork.
+  auto setup_itimer = [] {
+    // Ignore SIGALRM, as we don't actually care about timer
+    // expirations.
+    struct sigaction sa;
+    sa.sa_handler = SIG_IGN;
+    int ret = sigaction(SIGALRM, &sa, nullptr);
+    if (ret < 0) {
+      _exit(errno);
+    }
+
+    struct itimerval itv;
+    itv.it_interval.tv_sec = 1;
+    itv.it_interval.tv_usec = 0;
+    itv.it_value.tv_sec = 1;
+    itv.it_value.tv_usec = 0;
+    ret = setitimer(ITIMER_REAL, &itv, nullptr);
+    if (ret < 0) {
+      _exit(errno);
+    }
+  };
+
+  std::string filename = RunfilePath(kStateWorkload);
+  ExecveArray argv = {
+      filename,
+      "CheckItimerEnabled",
+      absl::StrCat(ITIMER_REAL),
+  };
+
+  pid_t child;
+  int execve_errno;
+  auto kill = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(filename, argv, {}, setup_itimer, &child, &execve_errno));
+  ASSERT_EQ(0, execve_errno);
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds());
+  EXPECT_EQ(0, status);
+
+  // Process cleanup no longer needed.
+  kill.Release();
+}
+
+TEST(ProcSelfExe, ChangesAcrossExecve) {
+  // See exec_proc_exe_workload for more details. We simply
+  // assert that the /proc/self/exe link changes across execve.
+  CheckExec(RunfilePath(kProcExeWorkload),
+            {RunfilePath(kProcExeWorkload),
+             ASSERT_NO_ERRNO_AND_VALUE(ProcessExePath(getpid()))},
+            {}, W_EXITCODE(0, 0), "");
+}
+
+TEST(ExecTest, CloexecNormalFile) {
+  TempPath tempFile = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "bar", 0755));
+  const FileDescriptor fd_closed_on_exec =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY | O_CLOEXEC));
+
+  CheckExec(RunfilePath(kAssertClosedWorkload),
+            {RunfilePath(kAssertClosedWorkload),
+             absl::StrCat(fd_closed_on_exec.get())},
+            {}, W_EXITCODE(0, 0), "");
+
+  // The assert closed workload exits with code 2 if the file still exists.  We
+  // can use this to do a negative test.
+  const FileDescriptor fd_open_on_exec =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY));
+
+  CheckExec(
+      RunfilePath(kAssertClosedWorkload),
+      {RunfilePath(kAssertClosedWorkload), absl::StrCat(fd_open_on_exec.get())},
+      {}, W_EXITCODE(2, 0), "");
+}
+
+TEST(ExecTest, CloexecEventfd) {
+  int efd;
+  ASSERT_THAT(efd = eventfd(0, EFD_CLOEXEC), SyscallSucceeds());
+  FileDescriptor fd(efd);
+
+  CheckExec(RunfilePath(kAssertClosedWorkload),
+            {RunfilePath(kAssertClosedWorkload), absl::StrCat(fd.get())}, {},
+            W_EXITCODE(0, 0), "");
+}
+
+constexpr int kLinuxMaxSymlinks = 40;
+
+TEST(ExecTest, SymlinkLimitExceeded) {
+  std::string path = RunfilePath(kBasicWorkload);
+
+  // Hold onto TempPath objects so they are not destructed prematurely.
+  std::vector<TempPath> symlinks;
+  for (int i = 0; i < kLinuxMaxSymlinks + 1; i++) {
+    symlinks.push_back(
+        ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateSymlinkTo("/tmp", path)));
+    path = symlinks[i].path();
+  }
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(path, {path}, {}, /*child=*/nullptr, &execve_errno));
+  EXPECT_EQ(execve_errno, ELOOP);
+}
+
+TEST(ExecTest, SymlinkLimitRefreshedForInterpreter) {
+  std::string tmp_dir = "/tmp";
+  std::string interpreter_path = "/bin/echo";
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      tmp_dir, absl::StrCat("#!", interpreter_path), 0755));
+  std::string script_path = script.path();
+
+  // Hold onto TempPath objects so they are not destructed prematurely.
+  std::vector<TempPath> interpreter_symlinks;
+  std::vector<TempPath> script_symlinks;
+  // Replace both the interpreter and script paths with symlink chains of just
+  // over half the symlink limit each; this is the minimum required to test that
+  // the symlink limit applies separately to each traversal, while tolerating
+  // some symlinks in the resolution of (the original) interpreter_path and
+  // script_path.
+  for (int i = 0; i < (kLinuxMaxSymlinks / 2) + 1; i++) {
+    interpreter_symlinks.push_back(ASSERT_NO_ERRNO_AND_VALUE(
+        TempPath::CreateSymlinkTo(tmp_dir, interpreter_path)));
+    interpreter_path = interpreter_symlinks[i].path();
+    script_symlinks.push_back(ASSERT_NO_ERRNO_AND_VALUE(
+        TempPath::CreateSymlinkTo(tmp_dir, script_path)));
+    script_path = script_symlinks[i].path();
+  }
+
+  CheckExec(script_path, {script_path}, {}, ArgEnvExitStatus(0, 0), "");
+}
+
+TEST(ExecveatTest, BasicWithFDCWD) {
+  std::string path = RunfilePath(kBasicWorkload);
+  CheckExecveat(AT_FDCWD, path, {path}, {}, /*flags=*/0, ArgEnvExitStatus(0, 0),
+                absl::StrCat(path, "\n"));
+}
+
+TEST(ExecveatTest, Basic) {
+  std::string absolute_path = RunfilePath(kBasicWorkload);
+  std::string parent_dir = std::string(Dirname(absolute_path));
+  std::string base = std::string(Basename(absolute_path));
+  const FileDescriptor dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY));
+
+  CheckExecveat(dirfd.get(), base, {absolute_path}, {}, /*flags=*/0,
+                ArgEnvExitStatus(0, 0), absl::StrCat(absolute_path, "\n"));
+}
+
+TEST(ExecveatTest, FDNotADirectory) {
+  std::string absolute_path = RunfilePath(kBasicWorkload);
+  std::string base = std::string(Basename(absolute_path));
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(absolute_path, 0));
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(fd.get(), base, {absolute_path}, {},
+                                            /*flags=*/0, /*child=*/nullptr,
+                                            &execve_errno));
+  EXPECT_EQ(execve_errno, ENOTDIR);
+}
+
+TEST(ExecveatTest, AbsolutePathWithFDCWD) {
+  std::string path = RunfilePath(kBasicWorkload);
+  CheckExecveat(AT_FDCWD, path, {path}, {}, ArgEnvExitStatus(0, 0), 0,
+                absl::StrCat(path, "\n"));
+}
+
+TEST(ExecveatTest, AbsolutePath) {
+  std::string path = RunfilePath(kBasicWorkload);
+  // File descriptor should be ignored when an absolute path is given.
+  const int32_t badFD = -1;
+  CheckExecveat(badFD, path, {path}, {}, ArgEnvExitStatus(0, 0), 0,
+                absl::StrCat(path, "\n"));
+}
+
+TEST(ExecveatTest, EmptyPathBasic) {
+  std::string path = RunfilePath(kBasicWorkload);
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_PATH));
+
+  CheckExecveat(fd.get(), "", {path}, {}, AT_EMPTY_PATH, ArgEnvExitStatus(0, 0),
+                absl::StrCat(path, "\n"));
+}
+
+TEST(ExecveatTest, EmptyPathWithDirFD) {
+  std::string path = RunfilePath(kBasicWorkload);
+  std::string parent_dir = std::string(Dirname(path));
+  const FileDescriptor dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY));
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(dirfd.get(), "", {path}, {},
+                                            AT_EMPTY_PATH,
+                                            /*child=*/nullptr, &execve_errno));
+  EXPECT_EQ(execve_errno, EACCES);
+}
+
+TEST(ExecveatTest, EmptyPathWithoutEmptyPathFlag) {
+  std::string path = RunfilePath(kBasicWorkload);
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_PATH));
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(
+      fd.get(), "", {path}, {}, /*flags=*/0, /*child=*/nullptr, &execve_errno));
+  EXPECT_EQ(execve_errno, ENOENT);
+}
+
+TEST(ExecveatTest, AbsolutePathWithEmptyPathFlag) {
+  std::string path = RunfilePath(kBasicWorkload);
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_PATH));
+
+  CheckExecveat(fd.get(), path, {path}, {}, AT_EMPTY_PATH,
+                ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n"));
+}
+
+TEST(ExecveatTest, RelativePathWithEmptyPathFlag) {
+  std::string absolute_path = RunfilePath(kBasicWorkload);
+  std::string parent_dir = std::string(Dirname(absolute_path));
+  std::string base = std::string(Basename(absolute_path));
+  const FileDescriptor dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY));
+
+  CheckExecveat(dirfd.get(), base, {absolute_path}, {}, AT_EMPTY_PATH,
+                ArgEnvExitStatus(0, 0), absl::StrCat(absolute_path, "\n"));
+}
+
+TEST(ExecveatTest, SymlinkNoFollowWithRelativePath) {
+  std::string parent_dir = "/tmp";
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(parent_dir, RunfilePath(kBasicWorkload)));
+  const FileDescriptor dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY));
+  std::string base = std::string(Basename(link.path()));
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(dirfd.get(), base, {base}, {},
+                                            AT_SYMLINK_NOFOLLOW,
+                                            /*child=*/nullptr, &execve_errno));
+  EXPECT_EQ(execve_errno, ELOOP);
+}
+
+TEST(ExecveatTest, UnshareFiles) {
+  TempPath tempFile = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "bar", 0755));
+  const FileDescriptor fd_closed_on_exec =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY | O_CLOEXEC));
+
+  ExecveArray argv = {"test"};
+  ExecveArray envp;
+  std::string child_path = RunfilePath(kBasicWorkload);
+  pid_t child =
+      syscall(__NR_clone, SIGCHLD | CLONE_VFORK | CLONE_FILES, 0, 0, 0, 0);
+  if (child == 0) {
+    execve(child_path.c_str(), argv.get(), envp.get());
+    _exit(1);
+  }
+  ASSERT_THAT(child, SyscallSucceeds());
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds());
+  EXPECT_EQ(status, 0);
+
+  struct stat st;
+  EXPECT_THAT(fstat(fd_closed_on_exec.get(), &st), SyscallSucceeds());
+}
+
+TEST(ExecveatTest, SymlinkNoFollowWithAbsolutePath) {
+  std::string parent_dir = "/tmp";
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(parent_dir, RunfilePath(kBasicWorkload)));
+  std::string path = link.path();
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(AT_FDCWD, path, {path}, {},
+                                            AT_SYMLINK_NOFOLLOW,
+                                            /*child=*/nullptr, &execve_errno));
+  EXPECT_EQ(execve_errno, ELOOP);
+}
+
+TEST(ExecveatTest, SymlinkNoFollowAndEmptyPath) {
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload)));
+  std::string path = link.path();
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, 0));
+
+  CheckExecveat(fd.get(), "", {path}, {}, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
+                ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n"));
+}
+
+TEST(ExecveatTest, SymlinkNoFollowIgnoreSymlinkAncestor) {
+  TempPath parent_link =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateSymlinkTo("/tmp", "/bin"));
+  std::string path_with_symlink = JoinPath(parent_link.path(), "echo");
+
+  CheckExecveat(AT_FDCWD, path_with_symlink, {path_with_symlink}, {},
+                AT_SYMLINK_NOFOLLOW, ArgEnvExitStatus(0, 0), "");
+}
+
+TEST(ExecveatTest, SymlinkNoFollowWithNormalFile) {
+  const FileDescriptor dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/bin", O_DIRECTORY));
+
+  CheckExecveat(dirfd.get(), "echo", {"echo"}, {}, AT_SYMLINK_NOFOLLOW,
+                ArgEnvExitStatus(0, 0), "");
+}
+
+TEST(ExecveatTest, BasicWithCloexecFD) {
+  std::string path = RunfilePath(kBasicWorkload);
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CLOEXEC));
+
+  CheckExecveat(fd.get(), "", {path}, {}, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH,
+                ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n"));
+}
+
+TEST(ExecveatTest, InterpreterScriptWithCloexecFD) {
+  std::string path = RunfilePath(kExitScript);
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CLOEXEC));
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(fd.get(), "", {path}, {},
+                                            AT_EMPTY_PATH, /*child=*/nullptr,
+                                            &execve_errno));
+  EXPECT_EQ(execve_errno, ENOENT);
+}
+
+TEST(ExecveatTest, InterpreterScriptWithCloexecDirFD) {
+  std::string absolute_path = RunfilePath(kExitScript);
+  std::string parent_dir = std::string(Dirname(absolute_path));
+  std::string base = std::string(Basename(absolute_path));
+  const FileDescriptor dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_CLOEXEC | O_DIRECTORY));
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(dirfd.get(), base, {base}, {},
+                                            /*flags=*/0, /*child=*/nullptr,
+                                            &execve_errno));
+  EXPECT_EQ(execve_errno, ENOENT);
+}
+
+TEST(ExecveatTest, InvalidFlags) {
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(
+      /*dirfd=*/-1, "", {}, {}, /*flags=*/0xFFFF, /*child=*/nullptr,
+      &execve_errno));
+  EXPECT_EQ(execve_errno, EINVAL);
+}
+
+// Priority consistent across calls to execve()
+TEST(GetpriorityTest, ExecveMaintainsPriority) {
+  int prio = 16;
+  ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), prio), SyscallSucceeds());
+
+  // To avoid trying to use negative exit values, check for
+  // 20 - prio. Since prio should always be in the range [-20, 19],
+  // this leave expected_exit_code in the range [1, 40].
+  int expected_exit_code = 20 - prio;
+
+  // Program run (priority_execve) will exit(X) where
+  // X=getpriority(PRIO_PROCESS,0). Check that this exit value is prio.
+  CheckExec(RunfilePath(kPriorityWorkload), {RunfilePath(kPriorityWorkload)},
+            {}, W_EXITCODE(expected_exit_code, 0), "");
+}
+
+void ExecWithThread() {
+  // Used to ensure that the thread has actually started.
+  absl::Mutex mu;
+  bool started = false;
+
+  ScopedThread t([&] {
+    mu.Lock();
+    started = true;
+    mu.Unlock();
+
+    while (true) {
+      pause();
+    }
+  });
+
+  mu.LockWhen(absl::Condition(&started));
+  mu.Unlock();
+
+  const ExecveArray argv = {"/proc/self/exe", kExit42};
+  const ExecveArray envv;
+
+  execve("/proc/self/exe", argv.get(), envv.get());
+  exit(errno);
+}
+
+void ExecFromThread() {
+  ScopedThread t([] {
+    const ExecveArray argv = {"/proc/self/exe", kExit42};
+    const ExecveArray envv;
+
+    execve("/proc/self/exe", argv.get(), envv.get());
+    exit(errno);
+  });
+
+  while (true) {
+    pause();
+  }
+}
+
+bool ValidateProcCmdlineVsArgv(const int argc, const char* const* argv) {
+  auto contents_or = GetContents("/proc/self/cmdline");
+  if (!contents_or.ok()) {
+    std::cerr << "Unable to get /proc/self/cmdline: " << contents_or.error()
+              << std::endl;
+    return false;
+  }
+  auto contents = contents_or.ValueOrDie();
+  if (contents.back() != '\0') {
+    std::cerr << "Non-null terminated /proc/self/cmdline!" << std::endl;
+    return false;
+  }
+  contents.pop_back();
+  std::vector<std::string> procfs_cmdline = absl::StrSplit(contents, '\0');
+
+  if (static_cast<int>(procfs_cmdline.size()) != argc) {
+    std::cerr << "argc = " << argc << " != " << procfs_cmdline.size()
+              << std::endl;
+    return false;
+  }
+
+  for (int i = 0; i < argc; ++i) {
+    if (procfs_cmdline[i] != argv[i]) {
+      std::cerr << "Procfs command line argument " << i << " mismatch "
+                << procfs_cmdline[i] << " != " << argv[i] << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  // Start by validating that the stack argv is consistent with procfs.
+  if (!gvisor::testing::ValidateProcCmdlineVsArgv(argc, argv)) {
+    return 1;
+  }
+
+  // Some of these tests require no background threads, so check for them before
+  // TestInit.
+  for (int i = 0; i < argc; i++) {
+    absl::string_view arg(argv[i]);
+
+    if (arg == gvisor::testing::kExit42) {
+      return 42;
+    }
+    if (arg == gvisor::testing::kExecWithThread) {
+      gvisor::testing::ExecWithThread();
+      return 1;
+    }
+    if (arg == gvisor::testing::kExecFromThread) {
+      gvisor::testing::ExecFromThread();
+      return 1;
+    }
+  }
+
+  gvisor::testing::TestInit(&argc, &argv);
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/exec.h b/test/syscalls/linux/exec.h
new file mode 100644
index 000000000..5c0f7e654
--- /dev/null
+++ b/test/syscalls/linux/exec.h
@@ -0,0 +1,34 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_EXEC_H_
+#define GVISOR_TEST_SYSCALLS_EXEC_H_
+
+#include <sys/wait.h>
+
+namespace gvisor {
+namespace testing {
+
+// Returns the exit code used by exec_basic_workload.
+inline int ArgEnvExitCode(int args, int envs) { return args + envs * 10; }
+
+// Returns the exit status used by exec_basic_workload.
+inline int ArgEnvExitStatus(int args, int envs) {
+  return W_EXITCODE(ArgEnvExitCode(args, envs), 0);
+}
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_EXEC_H_
diff --git a/test/syscalls/linux/exec_assert_closed_workload.cc b/test/syscalls/linux/exec_assert_closed_workload.cc
new file mode 100644
index 000000000..95643618d
--- /dev/null
+++ b/test/syscalls/linux/exec_assert_closed_workload.cc
@@ -0,0 +1,45 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <iostream>
+
+#include "absl/strings/numbers.h"
+
+int main(int argc, char** argv) {
+  if (argc != 2) {
+    std::cerr << "need two arguments, got " << argc;
+    exit(1);
+  }
+  int fd;
+  if (!absl::SimpleAtoi(argv[1], &fd)) {
+    std::cerr << "fd: " << argv[1] << " could not be parsed" << std::endl;
+    exit(1);
+  }
+  struct stat s;
+  if (fstat(fd, &s) == 0) {
+    std::cerr << "fd: " << argv[1] << " should not be valid" << std::endl;
+    exit(2);
+  }
+  if (errno != EBADF) {
+    std::cerr << "fstat fd: " << argv[1] << " got errno: " << errno
+              << " wanted: " << EBADF << std::endl;
+    exit(1);
+  }
+  return 0;
+}
diff --git a/test/syscalls/linux/exec_basic_workload.cc b/test/syscalls/linux/exec_basic_workload.cc
new file mode 100644
index 000000000..1bbd6437e
--- /dev/null
+++ b/test/syscalls/linux/exec_basic_workload.cc
@@ -0,0 +1,31 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+
+#include <iostream>
+
+#include "test/syscalls/linux/exec.h"
+
+int main(int argc, char** argv, char** envp) {
+  int i;
+  for (i = 0; i < argc; i++) {
+    std::cerr << argv[i] << std::endl;
+  }
+  for (i = 0; envp[i] != nullptr; i++) {
+    std::cerr << envp[i] << std::endl;
+  }
+  exit(gvisor::testing::ArgEnvExitCode(argc - 1, i));
+  return 0;
+}
diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc
new file mode 100644
index 000000000..18d2f22c1
--- /dev/null
+++ b/test/syscalls/linux/exec_binary.cc
@@ -0,0 +1,1646 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <elf.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <functional>
+#include <iterator>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/proc_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using ::testing::AnyOf;
+using ::testing::Eq;
+
+#if !defined(__x86_64__) && !defined(__aarch64__)
+// The assembly stub and ELF internal details must be ported to other arches.
+#error "Test only supported on x86-64/arm64"
+#endif  // __x86_64__ || __aarch64__
+
+#if defined(__x86_64__)
+#define EM_TYPE EM_X86_64
+#define IP_REG(p) ((p).rip)
+#define RAX_REG(p) ((p).rax)
+#define RDI_REG(p) ((p).rdi)
+#define RETURN_REG(p) ((p).rax)
+
+// amd64 stub that calls PTRACE_TRACEME and sends itself SIGSTOP.
+const char kPtraceCode[] = {
+    // movq $101, %rax  /* ptrace */
+    '\x48',
+    '\xc7',
+    '\xc0',
+    '\x65',
+    '\x00',
+    '\x00',
+    '\x00',
+    // movq $0, %rsi  /* PTRACE_TRACEME */
+    '\x48',
+    '\xc7',
+    '\xc6',
+    '\x00',
+    '\x00',
+    '\x00',
+    '\x00',
+    // movq $0, %rdi
+    '\x48',
+    '\xc7',
+    '\xc7',
+    '\x00',
+    '\x00',
+    '\x00',
+    '\x00',
+    // movq $0, %rdx
+    '\x48',
+    '\xc7',
+    '\xc2',
+    '\x00',
+    '\x00',
+    '\x00',
+    '\x00',
+    // movq $0, %r10
+    '\x49',
+    '\xc7',
+    '\xc2',
+    '\x00',
+    '\x00',
+    '\x00',
+    '\x00',
+    // syscall
+    '\x0f',
+    '\x05',
+
+    // movq $39, %rax  /* getpid */
+    '\x48',
+    '\xc7',
+    '\xc0',
+    '\x27',
+    '\x00',
+    '\x00',
+    '\x00',
+    // syscall
+    '\x0f',
+    '\x05',
+
+    // movq %rax, %rdi  /* pid */
+    '\x48',
+    '\x89',
+    '\xc7',
+    // movq $62, %rax  /* kill */
+    '\x48',
+    '\xc7',
+    '\xc0',
+    '\x3e',
+    '\x00',
+    '\x00',
+    '\x00',
+    // movq $19, %rsi  /* SIGSTOP */
+    '\x48',
+    '\xc7',
+    '\xc6',
+    '\x13',
+    '\x00',
+    '\x00',
+    '\x00',
+    // syscall
+    '\x0f',
+    '\x05',
+};
+
+// Size of a syscall instruction.
+constexpr int kSyscallSize = 2;
+
+#elif defined(__aarch64__)
+#define EM_TYPE EM_AARCH64
+#define IP_REG(p) ((p).pc)
+#define RAX_REG(p) ((p).regs[8])
+#define RDI_REG(p) ((p).regs[0])
+#define RETURN_REG(p) ((p).regs[0])
+
+const char kPtraceCode[] = {
+    // MOVD $117, R8 /* ptrace */
+    '\xa8',
+    '\x0e',
+    '\x80',
+    '\xd2',
+    // MOVD $0, R0 /* PTRACE_TRACEME */
+    '\x00',
+    '\x00',
+    '\x80',
+    '\xd2',
+    // MOVD $0, R1 /* pid */
+    '\x01',
+    '\x00',
+    '\x80',
+    '\xd2',
+    // MOVD $0, R2 /* addr */
+    '\x02',
+    '\x00',
+    '\x80',
+    '\xd2',
+    // MOVD $0, R3 /* data */
+    '\x03',
+    '\x00',
+    '\x80',
+    '\xd2',
+    // SVC
+    '\x01',
+    '\x00',
+    '\x00',
+    '\xd4',
+    // MOVD $172, R8 /* getpid */
+    '\x88',
+    '\x15',
+    '\x80',
+    '\xd2',
+    // SVC
+    '\x01',
+    '\x00',
+    '\x00',
+    '\xd4',
+    // MOVD $129, R8 /* kill, R0=pid */
+    '\x28',
+    '\x10',
+    '\x80',
+    '\xd2',
+    // MOVD $19, R1  /* SIGSTOP */
+    '\x61',
+    '\x02',
+    '\x80',
+    '\xd2',
+    // SVC
+    '\x01',
+    '\x00',
+    '\x00',
+    '\xd4',
+};
+// Size of a syscall instruction.
+constexpr int kSyscallSize = 4;
+#else
+#error "Unknown architecture"
+#endif
+
+// This test suite tests executable loading in the kernel (ELF and interpreter
+// scripts).
+
+// Parameterized ELF types for 64 and 32 bit.
+template <int Size>
+struct ElfTypes;
+
+template <>
+struct ElfTypes<64> {
+  typedef Elf64_Ehdr ElfEhdr;
+  typedef Elf64_Phdr ElfPhdr;
+};
+
+template <>
+struct ElfTypes<32> {
+  typedef Elf32_Ehdr ElfEhdr;
+  typedef Elf32_Phdr ElfPhdr;
+};
+
+template <int Size>
+struct ElfBinary {
+  using ElfEhdr = typename ElfTypes<Size>::ElfEhdr;
+  using ElfPhdr = typename ElfTypes<Size>::ElfPhdr;
+
+  ElfEhdr header = {};
+  std::vector<ElfPhdr> phdrs;
+  std::vector<char> data;
+
+  // UpdateOffsets updates p_offset, p_vaddr in all phdrs to account for the
+  // space taken by the header and phdrs.
+  //
+  // It also updates header.e_phnum and adds the offset to header.e_entry to
+  // account for the headers residing in the first PT_LOAD segment.
+  //
+  // Before calling UpdateOffsets each of those fields should be the appropriate
+  // offset into data.
+  void UpdateOffsets() {
+    size_t offset = sizeof(header) + phdrs.size() * sizeof(ElfPhdr);
+    header.e_entry += offset;
+    header.e_phnum = phdrs.size();
+    for (auto& p : phdrs) {
+      p.p_offset += offset;
+      p.p_vaddr += offset;
+    }
+  }
+
+  // AddInterpreter adds a PT_INTERP segment with the passed contents.
+  //
+  // A later call to UpdateOffsets is required to make the new phdr valid.
+  void AddInterpreter(std::vector<char> contents) {
+    const int start = data.size();
+    data.insert(data.end(), contents.begin(), contents.end());
+    const int size = data.size() - start;
+
+    ElfPhdr phdr = {};
+    phdr.p_type = PT_INTERP;
+    phdr.p_offset = start;
+    phdr.p_filesz = size;
+    phdr.p_memsz = size;
+    // "If [PT_INTERP] is present, it must precede any loadable segment entry."
+    phdrs.insert(phdrs.begin(), phdr);
+  }
+
+  // Writes the header, phdrs, and data to fd.
+  PosixError Write(int fd) const {
+    int ret = WriteFd(fd, &header, sizeof(header));
+    if (ret < 0) {
+      return PosixError(errno, "failed to write header");
+    } else if (ret != sizeof(header)) {
+      return PosixError(EIO, absl::StrCat("short write of header: ", ret));
+    }
+
+    for (auto const& p : phdrs) {
+      ret = WriteFd(fd, &p, sizeof(p));
+      if (ret < 0) {
+        return PosixError(errno, "failed to write phdr");
+      } else if (ret != sizeof(p)) {
+        return PosixError(EIO, absl::StrCat("short write of phdr: ", ret));
+      }
+    }
+
+    ret = WriteFd(fd, data.data(), data.size());
+    if (ret < 0) {
+      return PosixError(errno, "failed to write data");
+    } else if (ret != static_cast<int>(data.size())) {
+      return PosixError(EIO, absl::StrCat("short write of data: ", ret));
+    }
+
+    return NoError();
+  }
+};
+
+// Creates a new temporary executable ELF file in parent with elf as the
+// contents.
+template <int Size>
+PosixErrorOr<TempPath> CreateElfWith(absl::string_view parent,
+                                     ElfBinary<Size> const& elf) {
+  ASSIGN_OR_RETURN_ERRNO(
+      auto file, TempPath::CreateFileWith(parent, absl::string_view(), 0755));
+  ASSIGN_OR_RETURN_ERRNO(auto fd, Open(file.path(), O_RDWR));
+  RETURN_IF_ERRNO(elf.Write(fd.get()));
+  return std::move(file);
+}
+
+// Creates a new temporary executable ELF file with elf as the contents.
+template <int Size>
+PosixErrorOr<TempPath> CreateElfWith(ElfBinary<Size> const& elf) {
+  return CreateElfWith(GetAbsoluteTestTmpdir(), elf);
+}
+
+// Wait for pid to stop, and assert that it stopped via SIGSTOP.
+PosixError WaitStopped(pid_t pid) {
+  int status;
+  int ret = RetryEINTR(waitpid)(pid, &status, 0);
+  MaybeSave();
+  if (ret < 0) {
+    return PosixError(errno, "wait failed");
+  } else if (ret != pid) {
+    return PosixError(ESRCH, absl::StrCat("wait got ", ret, " want ", pid));
+  }
+
+  if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) {
+    return PosixError(EINVAL,
+                      absl::StrCat("pid did not SIGSTOP; status = ", status));
+  }
+
+  return NoError();
+}
+
+// Returns a valid ELF that PTRACE_TRACEME and SIGSTOPs itself.
+//
+// UpdateOffsets must be called before writing this ELF.
+ElfBinary<64> StandardElf() {
+  ElfBinary<64> elf;
+  elf.header.e_ident[EI_MAG0] = ELFMAG0;
+  elf.header.e_ident[EI_MAG1] = ELFMAG1;
+  elf.header.e_ident[EI_MAG2] = ELFMAG2;
+  elf.header.e_ident[EI_MAG3] = ELFMAG3;
+  elf.header.e_ident[EI_CLASS] = ELFCLASS64;
+  elf.header.e_ident[EI_DATA] = ELFDATA2LSB;
+  elf.header.e_ident[EI_VERSION] = EV_CURRENT;
+  elf.header.e_type = ET_EXEC;
+  elf.header.e_machine = EM_TYPE;
+  elf.header.e_version = EV_CURRENT;
+  elf.header.e_phoff = sizeof(elf.header);
+  elf.header.e_phentsize = sizeof(decltype(elf)::ElfPhdr);
+
+  // TODO(gvisor.dev/issue/153): Always include a PT_GNU_STACK segment to
+  // disable executable stacks. With this omitted the stack (and all PROT_READ)
+  // mappings should be executable, but gVisor doesn't support that.
+  decltype(elf)::ElfPhdr phdr = {};
+  phdr.p_type = PT_GNU_STACK;
+  phdr.p_flags = PF_R | PF_W;
+  elf.phdrs.push_back(phdr);
+
+  phdr = {};
+  phdr.p_type = PT_LOAD;
+  phdr.p_flags = PF_R | PF_X;
+  phdr.p_offset = 0;
+  phdr.p_vaddr = 0x40000;
+  phdr.p_filesz = sizeof(kPtraceCode);
+  phdr.p_memsz = phdr.p_filesz;
+  elf.phdrs.push_back(phdr);
+
+  elf.header.e_entry = phdr.p_vaddr;
+
+  elf.data.assign(kPtraceCode, kPtraceCode + sizeof(kPtraceCode));
+
+  return elf;
+}
+
+// Test that a trivial binary executes.
+TEST(ElfTest, Execute) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  // Ensure it made it to SIGSTOP.
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  struct user_regs_struct regs;
+  struct iovec iov;
+  iov.iov_base = &regs;
+  iov.iov_len = sizeof(regs);
+  EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov),
+              SyscallSucceeds());
+  // Read exactly the full register set.
+  EXPECT_EQ(iov.iov_len, sizeof(regs));
+  // RIP/PC is just beyond the final syscall instruction.
+  EXPECT_EQ(IP_REG(regs), elf.header.e_entry + sizeof(kPtraceCode));
+
+  EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+                         {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+                          file.path().c_str()},
+                     })));
+}
+
+// StandardElf without data completes execve, but faults once running.
+TEST(ElfTest, MissingText) {
+  ElfBinary<64> elf = StandardElf();
+  elf.data.clear();
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+              SyscallSucceedsWithValue(child));
+  // It runs off the end of the zeroes filling the end of the page.
+#if defined(__x86_64__)
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) << status;
+#elif defined(__aarch64__)
+  // 0 is an invalid instruction opcode on arm64.
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGILL) << status;
+#endif
+}
+
+// Typical ELF with a data + bss segment
+TEST(ElfTest, DataSegment) {
+  ElfBinary<64> elf = StandardElf();
+
+  // Create a standard ELF, but extend to 1.5 pages. The second page will be the
+  // beginning of a multi-page data + bss segment.
+  elf.data.resize(kPageSize + kPageSize / 2);
+
+  decltype(elf)::ElfPhdr phdr = {};
+  phdr.p_type = PT_LOAD;
+  phdr.p_flags = PF_R | PF_W;
+  phdr.p_offset = kPageSize;
+  phdr.p_vaddr = 0x41000;
+  phdr.p_filesz = kPageSize / 2;
+  // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a
+  // bit less than 2 pages so this mapping doesn't extend beyond 0x43000.
+  phdr.p_memsz = 2 * kPageSize - kPageSize / 2;
+  elf.phdrs.push_back(phdr);
+
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  EXPECT_THAT(
+      child, ContainsMappings(std::vector<ProcMapsEntry>({
+                 // text page.
+                 {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+                  file.path().c_str()},
+                 // data + bss page from file.
+                 {0x41000, 0x42000, true, true, false, true, kPageSize, 0, 0, 0,
+                  file.path().c_str()},
+                 // bss page from anon.
+                 {0x42000, 0x43000, true, true, false, true, 0, 0, 0, 0, ""},
+             })));
+}
+
+// Additonal pages beyond filesz honor (only) execute protections.
+//
+// N.B. Linux changed this in 4.11 (16e72e9b30986 "powerpc: do not make the
+// entire heap executable"). Previously, extra pages were always RW.
+TEST(ElfTest, ExtraMemPages) {
+  // gVisor has the newer behavior.
+  if (!IsRunningOnGvisor()) {
+    auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion());
+    SKIP_IF(version.major < 4 || (version.major == 4 && version.minor < 11));
+  }
+
+  ElfBinary<64> elf = StandardElf();
+
+  // Create a standard ELF, but extend to 1.5 pages. The second page will be the
+  // beginning of a multi-page data + bss segment.
+  elf.data.resize(kPageSize + kPageSize / 2);
+
+  decltype(elf)::ElfPhdr phdr = {};
+  phdr.p_type = PT_LOAD;
+  // RWX segment. The extra anon page will also be RWX.
+  //
+  // N.B. Linux uses clear_user to clear the end of the file-mapped page, which
+  // respects the mapping protections. Thus if we map this RO with memsz >
+  // (unaligned) filesz, then execve will fail with EFAULT. See padzero(elf_bss)
+  // in fs/binfmt_elf.c:load_elf_binary.
+  //
+  // N.N.B.B. The above only applies to the last segment. For earlier segments,
+  // the clear_user error is ignored.
+  phdr.p_flags = PF_R | PF_W | PF_X;
+  phdr.p_offset = kPageSize;
+  phdr.p_vaddr = 0x41000;
+  phdr.p_filesz = kPageSize / 2;
+  // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a
+  // bit less than 2 pages so this mapping doesn't extend beyond 0x43000.
+  phdr.p_memsz = 2 * kPageSize - kPageSize / 2;
+  elf.phdrs.push_back(phdr);
+
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  EXPECT_THAT(child,
+              ContainsMappings(std::vector<ProcMapsEntry>({
+                  // text page.
+                  {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+                   file.path().c_str()},
+                  // data + bss page from file.
+                  {0x41000, 0x42000, true, true, true, true, kPageSize, 0, 0, 0,
+                   file.path().c_str()},
+                  // extra page from anon.
+                  {0x42000, 0x43000, true, true, true, true, 0, 0, 0, 0, ""},
+              })));
+}
+
+// An aligned segment with filesz == 0, memsz > 0 is anon-only.
+TEST(ElfTest, AnonOnlySegment) {
+  ElfBinary<64> elf = StandardElf();
+
+  decltype(elf)::ElfPhdr phdr = {};
+  phdr.p_type = PT_LOAD;
+  // RO segment. The extra anon page will be RW anyways.
+  phdr.p_flags = PF_R;
+  phdr.p_offset = 0;
+  phdr.p_vaddr = 0x41000;
+  phdr.p_filesz = 0;
+  phdr.p_memsz = kPageSize;
+  elf.phdrs.push_back(phdr);
+
+  elf.UpdateOffsets();
+
+  // UpdateOffsets adjusts p_vaddr and p_offset by the header size, but we need
+  // a page-aligned p_vaddr to get a truly anon-only page.
+  elf.phdrs[2].p_vaddr = 0x41000;
+  // N.B. p_offset is now unaligned, but Linux doesn't care since this is
+  // anon-only.
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  EXPECT_THAT(child,
+              ContainsMappings(std::vector<ProcMapsEntry>({
+                  // text page.
+                  {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+                   file.path().c_str()},
+                  // anon page.
+                  {0x41000, 0x42000, true, true, false, true, 0, 0, 0, 0, ""},
+              })));
+}
+
+// p_offset must have the same alignment as p_vaddr.
+TEST(ElfTest, UnalignedOffset) {
+  ElfBinary<64> elf = StandardElf();
+
+  // Unaligned offset.
+  elf.phdrs[1].p_offset += 1;
+
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+
+  // execve(2) return EINVAL, but behavior varies between Linux and gVisor.
+  //
+  // On Linux, the new mm is committed before attempting to map into it. By the
+  // time we hit EINVAL in the segment mmap, the old mm is gone. Linux returns
+  // to an empty mm, which immediately segfaults.
+  //
+  // OTOH, gVisor maps into the new mm before committing it. Thus when it hits
+  // failure, the caller is still intact to receive the error.
+  if (IsRunningOnGvisor()) {
+    ASSERT_EQ(execve_errno, EINVAL);
+  } else {
+    ASSERT_EQ(execve_errno, 0);
+
+    int status;
+    ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+                SyscallSucceedsWithValue(child));
+    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) << status;
+  }
+}
+
+// Linux will allow PT_LOAD segments to overlap.
+TEST(ElfTest, DirectlyOverlappingSegments) {
+  // NOTE(b/37289926): see PIEOutOfOrderSegments.
+  SKIP_IF(IsRunningOnGvisor());
+
+  ElfBinary<64> elf = StandardElf();
+
+  // Same as the StandardElf mapping.
+  decltype(elf)::ElfPhdr phdr = {};
+  phdr.p_type = PT_LOAD;
+  // Add PF_W so we can differentiate this mapping from the first.
+  phdr.p_flags = PF_R | PF_W | PF_X;
+  phdr.p_offset = 0;
+  phdr.p_vaddr = 0x40000;
+  phdr.p_filesz = sizeof(kPtraceCode);
+  phdr.p_memsz = phdr.p_filesz;
+  elf.phdrs.push_back(phdr);
+
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+                         {0x40000, 0x41000, true, true, true, true, 0, 0, 0, 0,
+                          file.path().c_str()},
+                     })));
+}
+
+// Linux allows out-of-order PT_LOAD segments.
+TEST(ElfTest, OutOfOrderSegments) {
+  // NOTE(b/37289926): see PIEOutOfOrderSegments.
+  SKIP_IF(IsRunningOnGvisor());
+
+  ElfBinary<64> elf = StandardElf();
+
+  decltype(elf)::ElfPhdr phdr = {};
+  phdr.p_type = PT_LOAD;
+  phdr.p_flags = PF_R | PF_X;
+  phdr.p_offset = 0;
+  phdr.p_vaddr = 0x20000;
+  phdr.p_filesz = sizeof(kPtraceCode);
+  phdr.p_memsz = phdr.p_filesz;
+  elf.phdrs.push_back(phdr);
+
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+                         {0x20000, 0x21000, true, false, true, true, 0, 0, 0, 0,
+                          file.path().c_str()},
+                         {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+                          file.path().c_str()},
+                     })));
+}
+
+// header.e_phoff is bound the end of the file.
+TEST(ElfTest, OutOfBoundsPhdrs) {
+  ElfBinary<64> elf = StandardElf();
+  elf.header.e_phoff = 0x100000;
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  // On Linux 3.11, this caused EIO. On newer Linux, it causes ENOEXEC.
+  EXPECT_THAT(execve_errno, AnyOf(Eq(ENOEXEC), Eq(EIO)));
+}
+
+// Claim there is a phdr beyond the end of the file, but don't include it.
+TEST(ElfTest, MissingPhdr) {
+  ElfBinary<64> elf = StandardElf();
+
+  // Clear data so the file ends immediately after the phdrs.
+  // N.B. Per ElfTest.MissingData, StandardElf without data completes execve
+  // without error.
+  elf.data.clear();
+  elf.UpdateOffsets();
+
+  // Claim that there is another phdr just beyond the end of the file. Of
+  // course, it isn't accessible.
+  elf.header.e_phnum++;
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  // On Linux 3.11, this caused EIO. On newer Linux, it causes ENOEXEC.
+  EXPECT_THAT(execve_errno, AnyOf(Eq(ENOEXEC), Eq(EIO)));
+}
+
+// No headers at all, just the ELF magic.
+TEST(ElfTest, MissingHeader) {
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0755));
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  const char kElfMagic[] = {0x7f, 'E', 'L', 'F'};
+
+  ASSERT_THAT(WriteFd(fd.get(), &kElfMagic, sizeof(kElfMagic)),
+              SyscallSucceeds());
+  fd.reset();
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  EXPECT_EQ(execve_errno, ENOEXEC);
+}
+
+// Load a PIE ELF with a data + bss segment.
+TEST(ElfTest, PIE) {
+  ElfBinary<64> elf = StandardElf();
+
+  elf.header.e_type = ET_DYN;
+
+  // Create a standard ELF, but extend to 1.5 pages. The second page will be the
+  // beginning of a multi-page data + bss segment.
+  elf.data.resize(kPageSize + kPageSize / 2);
+
+  elf.header.e_entry = 0x0;
+
+  decltype(elf)::ElfPhdr phdr = {};
+  phdr.p_type = PT_LOAD;
+  phdr.p_flags = PF_R | PF_W;
+  phdr.p_offset = kPageSize;
+  // Put the data segment at a bit of an offset.
+  phdr.p_vaddr = 0x20000;
+  phdr.p_filesz = kPageSize / 2;
+  // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a
+  // bit less than 2 pages so this mapping doesn't extend beyond 0x43000.
+  phdr.p_memsz = 2 * kPageSize - kPageSize / 2;
+  elf.phdrs.push_back(phdr);
+
+  elf.UpdateOffsets();
+
+  // The first segment really needs to start at 0 for a normal PIE binary, and
+  // thus includes the headers.
+  const uint64_t offset = elf.phdrs[1].p_offset;
+  elf.phdrs[1].p_offset = 0x0;
+  elf.phdrs[1].p_vaddr = 0x0;
+  elf.phdrs[1].p_filesz += offset;
+  elf.phdrs[1].p_memsz += offset;
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  // RIP tells us which page the first segment was loaded into.
+  struct user_regs_struct regs;
+  struct iovec iov;
+  iov.iov_base = &regs;
+  iov.iov_len = sizeof(regs);
+
+  EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov),
+              SyscallSucceeds());
+  // Read exactly the full register set.
+  EXPECT_EQ(iov.iov_len, sizeof(regs));
+
+  const uint64_t load_addr = IP_REG(regs) & ~(kPageSize - 1);
+
+  EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+                         // text page.
+                         {load_addr, load_addr + 0x1000, true, false, true,
+                          true, 0, 0, 0, 0, file.path().c_str()},
+                         // data + bss page from file.
+                         {load_addr + 0x20000, load_addr + 0x21000, true, true,
+                          false, true, kPageSize, 0, 0, 0, file.path().c_str()},
+                         // bss page from anon.
+                         {load_addr + 0x21000, load_addr + 0x22000, true, true,
+                          false, true, 0, 0, 0, 0, ""},
+                     })));
+}
+
+// PIE binary with a non-zero start address.
+//
+// This is non-standard for a PIE binary, but valid. The binary is still loaded
+// at an arbitrary address, not the first PT_LOAD vaddr.
+//
+// N.B. Linux changed this behavior in d1fd836dcf00d2028c700c7e44d2c23404062c90.
+// Previously, with "randomization" enabled, PIE binaries with a non-zero start
+// address would be be loaded at the address they specified because mmap was
+// passed the load address, which wasn't 0 as expected.
+//
+// This change is present in kernel v4.1+.
+TEST(ElfTest, PIENonZeroStart) {
+  // gVisor has the newer behavior.
+  if (!IsRunningOnGvisor()) {
+    auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion());
+    SKIP_IF(version.major < 4 || (version.major == 4 && version.minor < 1));
+  }
+
+  ElfBinary<64> elf = StandardElf();
+
+  elf.header.e_type = ET_DYN;
+
+  // Create a standard ELF, but extend to 1.5 pages. The second page will be the
+  // beginning of a multi-page data + bss segment.
+  elf.data.resize(kPageSize + kPageSize / 2);
+
+  decltype(elf)::ElfPhdr phdr = {};
+  phdr.p_type = PT_LOAD;
+  phdr.p_flags = PF_R | PF_W;
+  phdr.p_offset = kPageSize;
+  // Put the data segment at a bit of an offset.
+  phdr.p_vaddr = 0x60000;
+  phdr.p_filesz = kPageSize / 2;
+  // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a
+  // bit less than 2 pages so this mapping doesn't extend beyond 0x43000.
+  phdr.p_memsz = 2 * kPageSize - kPageSize / 2;
+  elf.phdrs.push_back(phdr);
+
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  // RIP tells us which page the first segment was loaded into.
+  struct user_regs_struct regs;
+  struct iovec iov;
+  iov.iov_base = &regs;
+  iov.iov_len = sizeof(regs);
+  EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov),
+              SyscallSucceeds());
+  // Read exactly the full register set.
+  EXPECT_EQ(iov.iov_len, sizeof(regs));
+
+  const uint64_t load_addr = IP_REG(regs) & ~(kPageSize - 1);
+
+  // The ELF is loaded at an arbitrary address, not the first PT_LOAD vaddr.
+  //
+  // N.B. this is technically flaky, but Linux is *extremely* unlikely to pick
+  // this as the start address, as it searches from the top down.
+  EXPECT_NE(load_addr, 0x40000);
+
+  EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+                         // text page.
+                         {load_addr, load_addr + 0x1000, true, false, true,
+                          true, 0, 0, 0, 0, file.path().c_str()},
+                         // data + bss page from file.
+                         {load_addr + 0x20000, load_addr + 0x21000, true, true,
+                          false, true, kPageSize, 0, 0, 0, file.path().c_str()},
+                         // bss page from anon.
+                         {load_addr + 0x21000, load_addr + 0x22000, true, true,
+                          false, true, 0, 0, 0, 0, ""},
+                     })));
+}
+
+TEST(ElfTest, PIEOutOfOrderSegments) {
+  // TODO(b/37289926): This triggers a bug in Linux where it computes the size
+  // of the binary as 0x20000 - 0x40000 = 0xfffffffffffe0000, which obviously
+  // fails to map.
+  //
+  // We test gVisor's behavior (of rejecting the binary) because I assert that
+  // Linux is wrong and needs to be fixed.
+  SKIP_IF(!IsRunningOnGvisor());
+
+  ElfBinary<64> elf = StandardElf();
+
+  elf.header.e_type = ET_DYN;
+
+  // Create a standard ELF, but extend to 1.5 pages. The second page will be the
+  // beginning of a multi-page data + bss segment.
+  elf.data.resize(kPageSize + kPageSize / 2);
+
+  decltype(elf)::ElfPhdr phdr = {};
+  phdr.p_type = PT_LOAD;
+  phdr.p_flags = PF_R | PF_W;
+  phdr.p_offset = kPageSize;
+  // Put the data segment *before* the first segment.
+  phdr.p_vaddr = 0x20000;
+  phdr.p_filesz = kPageSize / 2;
+  // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a
+  // bit less than 2 pages so this mapping doesn't extend beyond 0x43000.
+  phdr.p_memsz = 2 * kPageSize - kPageSize / 2;
+  elf.phdrs.push_back(phdr);
+
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  EXPECT_EQ(execve_errno, ENOEXEC);
+}
+
+// Standard dynamically linked binary with an ELF interpreter.
+TEST(ElfTest, ELFInterpreter) {
+  ElfBinary<64> interpreter = StandardElf();
+  interpreter.header.e_type = ET_DYN;
+  interpreter.header.e_entry = 0x0;
+  interpreter.UpdateOffsets();
+
+  // The first segment really needs to start at 0 for a normal PIE binary, and
+  // thus includes the headers.
+  uint64_t const offset = interpreter.phdrs[1].p_offset;
+  // N.B. Since Linux 4.10 (0036d1f7eb95b "binfmt_elf: fix calculations for bss
+  // padding"), Linux unconditionally zeroes the remainder of the highest mapped
+  // page in an interpreter, failing if the protections don't allow write. Thus
+  // we must mark this writeable.
+  interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X;
+  interpreter.phdrs[1].p_offset = 0x0;
+  interpreter.phdrs[1].p_vaddr = 0x0;
+  interpreter.phdrs[1].p_filesz += offset;
+  interpreter.phdrs[1].p_memsz += offset;
+
+  TempPath interpreter_file =
+      ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+
+  ElfBinary<64> binary = StandardElf();
+
+  // Append the interpreter path.
+  int const interp_data_start = binary.data.size();
+  for (char const c : interpreter_file.path()) {
+    binary.data.push_back(c);
+  }
+  // NUL-terminate.
+  binary.data.push_back(0);
+  int const interp_data_size = binary.data.size() - interp_data_start;
+
+  decltype(binary)::ElfPhdr phdr = {};
+  phdr.p_type = PT_INTERP;
+  phdr.p_offset = interp_data_start;
+  phdr.p_filesz = interp_data_size;
+  phdr.p_memsz = interp_data_size;
+  // "If [PT_INTERP] is present, it must precede any loadable segment entry."
+  //
+  // However, Linux allows it anywhere, so we just stick it at the end to make
+  // sure out-of-order PT_INTERP is OK.
+  binary.phdrs.push_back(phdr);
+
+  binary.UpdateOffsets();
+
+  TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+      binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  // RIP tells us which page the first segment of the interpreter was loaded
+  // into.
+  struct user_regs_struct regs;
+  struct iovec iov;
+  iov.iov_base = &regs;
+  iov.iov_len = sizeof(regs);
+  EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov),
+              SyscallSucceeds());
+  // Read exactly the full register set.
+  EXPECT_EQ(iov.iov_len, sizeof(regs));
+
+  const uint64_t interp_load_addr = IP_REG(regs) & ~(kPageSize - 1);
+
+  EXPECT_THAT(
+      child, ContainsMappings(std::vector<ProcMapsEntry>({
+                 // Main binary
+                 {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+                  binary_file.path().c_str()},
+                 // Interpreter
+                 {interp_load_addr, interp_load_addr + 0x1000, true, true, true,
+                  true, 0, 0, 0, 0, interpreter_file.path().c_str()},
+             })));
+}
+
+// Test parameter to ElfInterpterStaticTest cases. The first item is a suffix to
+// add to the end of the interpreter path in the PT_INTERP segment and the
+// second is the expected execve(2) errno.
+using ElfInterpreterStaticParam = std::tuple<std::vector<char>, int>;
+
+class ElfInterpreterStaticTest
+    : public ::testing::TestWithParam<ElfInterpreterStaticParam> {};
+
+// Statically linked ELF with a statically linked ELF interpreter.
+TEST_P(ElfInterpreterStaticTest, Test) {
+  const std::vector<char> segment_suffix = std::get<0>(GetParam());
+  const int expected_errno = std::get<1>(GetParam());
+
+  ElfBinary<64> interpreter = StandardElf();
+  // See comment in ElfTest.ELFInterpreter.
+  interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X;
+  interpreter.UpdateOffsets();
+  TempPath interpreter_file =
+      ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+
+  ElfBinary<64> binary = StandardElf();
+  // The PT_LOAD segment conflicts with the interpreter's PT_LOAD segment. The
+  // interpreter's will be mapped directly over the binary's.
+
+  // Interpreter path plus the parameterized suffix in the PT_INTERP segment.
+  const std::string path = interpreter_file.path();
+  std::vector<char> segment(path.begin(), path.end());
+  segment.insert(segment.end(), segment_suffix.begin(), segment_suffix.end());
+  binary.AddInterpreter(segment);
+
+  binary.UpdateOffsets();
+
+  TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+      binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, expected_errno);
+
+  if (expected_errno == 0) {
+    ASSERT_NO_ERRNO(WaitStopped(child));
+
+    EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+                           // Interpreter.
+                           {0x40000, 0x41000, true, true, true, true, 0, 0, 0,
+                            0, interpreter_file.path().c_str()},
+                       })));
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    Cases, ElfInterpreterStaticTest,
+    ::testing::ValuesIn({
+        // Simple NUL-terminator to run the interpreter as normal.
+        std::make_tuple(std::vector<char>({'\0'}), 0),
+        // Add some garbage to the segment followed by a NUL-terminator. This is
+        // ignored.
+        std::make_tuple(std::vector<char>({'\0', 'b', '\0'}), 0),
+        // Add some garbage to the segment without a NUL-terminator. Linux will
+        // reject
+        // this.
+        std::make_tuple(std::vector<char>({'\0', 'b'}), ENOEXEC),
+    }));
+
+// Test parameter to ElfInterpterBadPathTest cases. The first item is the
+// contents of the PT_INTERP segment and the second is the expected execve(2)
+// errno.
+using ElfInterpreterBadPathParam = std::tuple<std::vector<char>, int>;
+
+class ElfInterpreterBadPathTest
+    : public ::testing::TestWithParam<ElfInterpreterBadPathParam> {};
+
+TEST_P(ElfInterpreterBadPathTest, Test) {
+  const std::vector<char> segment = std::get<0>(GetParam());
+  const int expected_errno = std::get<1>(GetParam());
+
+  ElfBinary<64> binary = StandardElf();
+  binary.AddInterpreter(segment);
+  binary.UpdateOffsets();
+
+  TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+      binary_file.path(), {binary_file.path()}, {}, nullptr, &execve_errno));
+  EXPECT_EQ(execve_errno, expected_errno);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    Cases, ElfInterpreterBadPathTest,
+    ::testing::ValuesIn({
+        // NUL-terminated fake path in the PT_INTERP segment.
+        std::make_tuple(std::vector<char>({'/', 'f', '/', 'b', '\0'}), ENOENT),
+        // ELF interpreter not NUL-terminated.
+        std::make_tuple(std::vector<char>({'/', 'f', '/', 'b'}), ENOEXEC),
+        // ELF interpreter path omitted entirely.
+        //
+        // fs/binfmt_elf.c:load_elf_binary returns ENOEXEC if p_filesz is < 2
+        // bytes.
+        std::make_tuple(std::vector<char>({'\0'}), ENOEXEC),
+        // ELF interpreter path = "\0".
+        //
+        // fs/binfmt_elf.c:load_elf_binary returns ENOEXEC if p_filesz is < 2
+        // bytes, so add an extra byte to pass that check.
+        //
+        // load_elf_binary -> open_exec -> do_open_execat fails to check that
+        // name != '\0' before calling do_filp_open, which thus opens the
+        // working directory. do_open_execat returns EACCES because the
+        // directory is not a regular file.
+        std::make_tuple(std::vector<char>({'\0', '\0'}), EACCES),
+    }));
+
+// Relative path to ELF interpreter.
+TEST(ElfTest, ELFInterpreterRelative) {
+  ElfBinary<64> interpreter = StandardElf();
+  interpreter.header.e_type = ET_DYN;
+  interpreter.header.e_entry = 0x0;
+  interpreter.UpdateOffsets();
+
+  // The first segment really needs to start at 0 for a normal PIE binary, and
+  // thus includes the headers.
+  uint64_t const offset = interpreter.phdrs[1].p_offset;
+  // See comment in ElfTest.ELFInterpreter.
+  interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X;
+  interpreter.phdrs[1].p_offset = 0x0;
+  interpreter.phdrs[1].p_vaddr = 0x0;
+  interpreter.phdrs[1].p_filesz += offset;
+  interpreter.phdrs[1].p_memsz += offset;
+
+  TempPath interpreter_file =
+      ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+  auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD());
+  auto interpreter_relative =
+      ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, interpreter_file.path()));
+
+  ElfBinary<64> binary = StandardElf();
+
+  // NUL-terminated path in the PT_INTERP segment.
+  std::vector<char> segment(interpreter_relative.begin(),
+                            interpreter_relative.end());
+  segment.push_back(0);
+  binary.AddInterpreter(segment);
+
+  binary.UpdateOffsets();
+
+  TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+      binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  // RIP tells us which page the first segment of the interpreter was loaded
+  // into.
+  struct user_regs_struct regs;
+  struct iovec iov;
+  iov.iov_base = &regs;
+  iov.iov_len = sizeof(regs);
+  EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov),
+              SyscallSucceeds());
+  // Read exactly the full register set.
+  EXPECT_EQ(iov.iov_len, sizeof(regs));
+
+  const uint64_t interp_load_addr = IP_REG(regs) & ~(kPageSize - 1);
+
+  EXPECT_THAT(
+      child, ContainsMappings(std::vector<ProcMapsEntry>({
+                 // Main binary
+                 {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+                  binary_file.path().c_str()},
+                 // Interpreter
+                 {interp_load_addr, interp_load_addr + 0x1000, true, true, true,
+                  true, 0, 0, 0, 0, interpreter_file.path().c_str()},
+             })));
+}
+
+// ELF interpreter architecture doesn't match the binary.
+TEST(ElfTest, ELFInterpreterWrongArch) {
+  ElfBinary<64> interpreter = StandardElf();
+  interpreter.header.e_machine = EM_PPC64;
+  interpreter.header.e_type = ET_DYN;
+  interpreter.header.e_entry = 0x0;
+  interpreter.UpdateOffsets();
+
+  // The first segment really needs to start at 0 for a normal PIE binary, and
+  // thus includes the headers.
+  uint64_t const offset = interpreter.phdrs[1].p_offset;
+  // See comment in ElfTest.ELFInterpreter.
+  interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X;
+  interpreter.phdrs[1].p_offset = 0x0;
+  interpreter.phdrs[1].p_vaddr = 0x0;
+  interpreter.phdrs[1].p_filesz += offset;
+  interpreter.phdrs[1].p_memsz += offset;
+
+  TempPath interpreter_file =
+      ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+
+  ElfBinary<64> binary = StandardElf();
+
+  // NUL-terminated path in the PT_INTERP segment.
+  const std::string path = interpreter_file.path();
+  std::vector<char> segment(path.begin(), path.end());
+  segment.push_back(0);
+  binary.AddInterpreter(segment);
+
+  binary.UpdateOffsets();
+
+  TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+      binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, ELIBBAD);
+}
+
+// No execute permissions on the binary.
+TEST(ElfTest, NoExecute) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  ASSERT_THAT(chmod(file.path().c_str(), 0644), SyscallSucceeds());
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  EXPECT_EQ(execve_errno, EACCES);
+}
+
+// Execute, but no read permissions on the binary works just fine.
+TEST(ElfTest, NoRead) {
+  // TODO(gvisor.dev/issue/160): gVisor's backing filesystem may prevent the
+  // sentry from reading the executable.
+  SKIP_IF(IsRunningOnGvisor());
+
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  ASSERT_THAT(chmod(file.path().c_str(), 0111), SyscallSucceeds());
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  // TODO(gvisor.dev/issue/160): A task with a non-readable executable is marked
+  // non-dumpable, preventing access to proc files. gVisor does not implement
+  // this behavior.
+}
+
+// No execute permissions on the ELF interpreter.
+TEST(ElfTest, ElfInterpreterNoExecute) {
+  ElfBinary<64> interpreter = StandardElf();
+  interpreter.header.e_type = ET_DYN;
+  interpreter.header.e_entry = 0x0;
+  interpreter.UpdateOffsets();
+
+  // The first segment really needs to start at 0 for a normal PIE binary, and
+  // thus includes the headers.
+  uint64_t const offset = interpreter.phdrs[1].p_offset;
+  // See comment in ElfTest.ELFInterpreter.
+  interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X;
+  interpreter.phdrs[1].p_offset = 0x0;
+  interpreter.phdrs[1].p_vaddr = 0x0;
+  interpreter.phdrs[1].p_filesz += offset;
+  interpreter.phdrs[1].p_memsz += offset;
+
+  TempPath interpreter_file =
+      ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+
+  ElfBinary<64> binary = StandardElf();
+
+  // NUL-terminated path in the PT_INTERP segment.
+  const std::string path = interpreter_file.path();
+  std::vector<char> segment(path.begin(), path.end());
+  segment.push_back(0);
+  binary.AddInterpreter(segment);
+
+  binary.UpdateOffsets();
+
+  TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+  ASSERT_THAT(chmod(interpreter_file.path().c_str(), 0644), SyscallSucceeds());
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(interpreter_file.path(), {interpreter_file.path()}, {},
+                  &child, &execve_errno));
+  EXPECT_EQ(execve_errno, EACCES);
+}
+
+// Execute a basic interpreter script.
+TEST(InterpreterScriptTest, Execute) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+  // Use /tmp explicitly to ensure the path is short enough.
+  TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Whitespace after #!.
+TEST(InterpreterScriptTest, Whitespace) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+  // Use /tmp explicitly to ensure the path is short enough.
+  TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#! \t  \t", binary.path()), 0755));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Interpreter script is missing execute permission.
+TEST(InterpreterScriptTest, InterpreterScriptNoExecute) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+  // Use /tmp explicitly to ensure the path is short enough.
+  TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0644));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, EACCES);
+}
+
+// Binary interpreter script refers to is missing execute permission.
+TEST(InterpreterScriptTest, BinaryNoExecute) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+  // Use /tmp explicitly to ensure the path is short enough.
+  TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+  ASSERT_THAT(chmod(binary.path().c_str(), 0644), SyscallSucceeds());
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, EACCES);
+}
+
+// Linux will load interpreter scripts five levels deep, but no more.
+TEST(InterpreterScriptTest, MaxRecursion) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+  // Use /tmp explicitly to ensure the path is short enough.
+  TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+  TempPath script1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      "/tmp", absl::StrCat("#!", binary.path()), 0755));
+  TempPath script2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      "/tmp", absl::StrCat("#!", script1.path()), 0755));
+  TempPath script3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      "/tmp", absl::StrCat("#!", script2.path()), 0755));
+  TempPath script4 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      "/tmp", absl::StrCat("#!", script3.path()), 0755));
+  TempPath script5 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      "/tmp", absl::StrCat("#!", script4.path()), 0755));
+  TempPath script6 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      "/tmp", absl::StrCat("#!", script5.path()), 0755));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script6.path(), {script6.path()}, {}, &child, &execve_errno));
+  // Too many levels of recursion.
+  EXPECT_EQ(execve_errno, ELOOP);
+
+  // The next level up is OK.
+  auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script5.path(), {script5.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Interpreter script with a relative path.
+TEST(InterpreterScriptTest, RelativePath) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+  TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+  auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD());
+  auto binary_relative =
+      ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, binary.path()));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary_relative), 0755));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Interpreter script with .. in a path component.
+TEST(InterpreterScriptTest, UncleanPath) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+  // Use /tmp explicitly to ensure the path is short enough.
+  TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!/tmp/../", binary.path()),
+      0755));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Passed interpreter script is a symlink.
+TEST(InterpreterScriptTest, Symlink) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+  // Use /tmp explicitly to ensure the path is short enough.
+  TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755));
+
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), script.path()));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(link.path(), {link.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Interpreter script points to a symlink loop.
+TEST(InterpreterScriptTest, SymlinkLoop) {
+  std::string const link1 = NewTempAbsPathInDir("/tmp");
+  std::string const link2 = NewTempAbsPathInDir("/tmp");
+
+  ASSERT_THAT(symlink(link2.c_str(), link1.c_str()), SyscallSucceeds());
+  auto remove_link1 = Cleanup(
+      [&link1] { EXPECT_THAT(unlink(link1.c_str()), SyscallSucceeds()); });
+
+  ASSERT_THAT(symlink(link1.c_str(), link2.c_str()), SyscallSucceeds());
+  auto remove_link2 = Cleanup(
+      [&link2] { EXPECT_THAT(unlink(link2.c_str()), SyscallSucceeds()); });
+
+  TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::StrCat("#!", link1), 0755));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+  EXPECT_EQ(execve_errno, ELOOP);
+}
+
+// Binary is a symlink loop.
+TEST(ExecveTest, SymlinkLoop) {
+  std::string const link1 = NewTempAbsPathInDir("/tmp");
+  std::string const link2 = NewTempAbsPathInDir("/tmp");
+
+  ASSERT_THAT(symlink(link2.c_str(), link1.c_str()), SyscallSucceeds());
+  auto remove_link = Cleanup(
+      [&link1] { EXPECT_THAT(unlink(link1.c_str()), SyscallSucceeds()); });
+
+  ASSERT_THAT(symlink(link1.c_str(), link2.c_str()), SyscallSucceeds());
+  auto remove_link2 = Cleanup(
+      [&link2] { EXPECT_THAT(unlink(link2.c_str()), SyscallSucceeds()); });
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(link1, {link1}, {}, &child, &execve_errno));
+  EXPECT_EQ(execve_errno, ELOOP);
+}
+
+// Binary is a directory.
+TEST(ExecveTest, Directory) {
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec("/tmp", {"/tmp"}, {}, &child, &execve_errno));
+  EXPECT_EQ(execve_errno, EACCES);
+}
+
+// Pass a valid binary as a directory (extra / on the end).
+TEST(ExecveTest, BinaryAsDirectory) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  std::string const path = absl::StrCat(file.path(), "/");
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(path, {path}, {}, &child, &execve_errno));
+  EXPECT_EQ(execve_errno, ENOTDIR);
+}
+
+// The initial brk value is after the page at the end of the binary.
+TEST(ExecveTest, BrkAfterBinary) {
+  ElfBinary<64> elf = StandardElf();
+  elf.UpdateOffsets();
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+  pid_t child;
+  int execve_errno;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+  ASSERT_EQ(execve_errno, 0);
+
+  // Ensure it made it to SIGSTOP.
+  ASSERT_NO_ERRNO(WaitStopped(child));
+
+  struct user_regs_struct regs;
+  struct iovec iov;
+  iov.iov_base = &regs;
+  iov.iov_len = sizeof(regs);
+  EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov),
+              SyscallSucceeds());
+  // Read exactly the full register set.
+  EXPECT_EQ(iov.iov_len, sizeof(regs));
+
+  // RIP is just beyond the final syscall instruction. Rewind to execute a brk
+  // syscall.
+  IP_REG(regs) -= kSyscallSize;
+  RAX_REG(regs) = __NR_brk;
+  RDI_REG(regs) = 0;
+  ASSERT_THAT(ptrace(PTRACE_SETREGSET, child, NT_PRSTATUS, &iov),
+              SyscallSucceeds());
+
+  // Resume the child, waiting for syscall entry.
+  ASSERT_THAT(ptrace(PTRACE_SYSCALL, child, 0, 0), SyscallSucceeds());
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+              SyscallSucceedsWithValue(child));
+  ASSERT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+      << "status = " << status;
+
+  // Execute the syscall.
+  ASSERT_THAT(ptrace(PTRACE_SYSCALL, child, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+              SyscallSucceedsWithValue(child));
+  ASSERT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+      << "status = " << status;
+
+  iov.iov_base = &regs;
+  iov.iov_len = sizeof(regs);
+  EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov),
+              SyscallSucceeds());
+  // Read exactly the full register set.
+  EXPECT_EQ(iov.iov_len, sizeof(regs));
+
+  // brk is after the text page.
+  //
+  // The kernel does brk randomization, so we can't be sure what the exact
+  // address will be, but it is always beyond the final page in the binary.
+  // i.e., it does not start immediately after memsz in the middle of a page.
+  // Userspace may expect to use that space.
+  EXPECT_GE(RETURN_REG(regs), 0x41000);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/exec_proc_exe_workload.cc b/test/syscalls/linux/exec_proc_exe_workload.cc
new file mode 100644
index 000000000..2989379b7
--- /dev/null
+++ b/test/syscalls/linux/exec_proc_exe_workload.cc
@@ -0,0 +1,42 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <iostream>
+
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+
+int main(int argc, char** argv, char** envp) {
+  // This is annoying. Because remote build systems may put these binaries
+  // in a content-addressable-store, you may wind up with /proc/self/exe
+  // pointing to some random path (but with a sensible argv[0]).
+  //
+  // Therefore, this test simply checks that the /proc/self/exe
+  // is absolute and *doesn't* match argv[1].
+  std::string exe =
+      gvisor::testing::ProcessExePath(getpid()).ValueOrDie();
+  if (exe[0] != '/') {
+    std::cerr << "relative path: " << exe << std::endl;
+    exit(1);
+  }
+  if (exe.find(argv[1]) != std::string::npos) {
+    std::cerr << "matching path: " << exe << std::endl;
+    exit(1);
+  }
+
+  return 0;
+}
diff --git a/test/syscalls/linux/exec_state_workload.cc b/test/syscalls/linux/exec_state_workload.cc
new file mode 100644
index 000000000..028902b14
--- /dev/null
+++ b/test/syscalls/linux/exec_state_workload.cc
@@ -0,0 +1,202 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/time.h>
+
+#include <iostream>
+#include <ostream>
+#include <string>
+
+#include "absl/strings/numbers.h"
+
+// Pretty-print a sigset_t.
+std::ostream& operator<<(std::ostream& out, const sigset_t& s) {
+  out << "{ ";
+
+  for (int i = 0; i < NSIG; i++) {
+    if (sigismember(&s, i)) {
+      out << i << " ";
+    }
+  }
+
+  out << "}";
+  return out;
+}
+
+// Verify that the signo handler is handler.
+int CheckSigHandler(uint32_t signo, uintptr_t handler) {
+  struct sigaction sa;
+  int ret = sigaction(signo, nullptr, &sa);
+  if (ret < 0) {
+    perror("sigaction");
+    return 1;
+  }
+
+  if (reinterpret_cast<void (*)(int)>(handler) != sa.sa_handler) {
+    std::cerr << "signo " << signo << " handler got: " << sa.sa_handler
+              << " expected: " << std::hex << handler;
+    return 1;
+  }
+  return 0;
+}
+
+// Verify that the signo is blocked.
+int CheckSigBlocked(uint32_t signo) {
+  sigset_t s;
+  int ret = sigprocmask(SIG_SETMASK, nullptr, &s);
+  if (ret < 0) {
+    perror("sigprocmask");
+    return 1;
+  }
+
+  if (!sigismember(&s, signo)) {
+    std::cerr << "signal " << signo << " not blocked in signal mask: " << s
+              << std::endl;
+    return 1;
+  }
+  return 0;
+}
+
+// Verify that the itimer is enabled.
+int CheckItimerEnabled(uint32_t timer) {
+  struct itimerval itv;
+  int ret = getitimer(timer, &itv);
+  if (ret < 0) {
+    perror("getitimer");
+    return 1;
+  }
+
+  if (!itv.it_value.tv_sec && !itv.it_value.tv_usec &&
+      !itv.it_interval.tv_sec && !itv.it_interval.tv_usec) {
+    std::cerr << "timer " << timer
+              << " not enabled. value sec: " << itv.it_value.tv_sec
+              << " usec: " << itv.it_value.tv_usec
+              << " interval sec: " << itv.it_interval.tv_sec
+              << " usec: " << itv.it_interval.tv_usec << std::endl;
+    return 1;
+  }
+  return 0;
+}
+
+int PrintExecFn() {
+  unsigned long execfn = getauxval(AT_EXECFN);
+  if (!execfn) {
+    std::cerr << "AT_EXECFN missing" << std::endl;
+    return 1;
+  }
+
+  std::cerr << reinterpret_cast<const char*>(execfn) << std::endl;
+  return 0;
+}
+
+int PrintExecName() {
+  const size_t name_length = 20;
+  char name[name_length] = {0};
+  if (prctl(PR_GET_NAME, name) < 0) {
+    std::cerr << "prctl(PR_GET_NAME) failed" << std::endl;
+    return 1;
+  }
+
+  std::cerr << name << std::endl;
+  return 0;
+}
+
+void usage(const std::string& prog) {
+  std::cerr << "usage:\n"
+            << "\t" << prog << " CheckSigHandler <signo> <handler addr (hex)>\n"
+            << "\t" << prog << " CheckSigBlocked <signo>\n"
+            << "\t" << prog << " CheckTimerDisabled <timer>\n"
+            << "\t" << prog << " PrintExecFn\n"
+            << "\t" << prog << " PrintExecName" << std::endl;
+}
+
+int main(int argc, char** argv) {
+  if (argc < 2) {
+    usage(argv[0]);
+    return 1;
+  }
+
+  std::string func(argv[1]);
+
+  if (func == "CheckSigHandler") {
+    if (argc != 4) {
+      usage(argv[0]);
+      return 1;
+    }
+
+    uint32_t signo;
+    if (!absl::SimpleAtoi(argv[2], &signo)) {
+      std::cerr << "invalid signo: " << argv[2] << std::endl;
+      return 1;
+    }
+
+    uintptr_t handler;
+    if (!absl::numbers_internal::safe_strtoi_base(argv[3], &handler, 16)) {
+      std::cerr << "invalid handler: " << std::hex << argv[3] << std::endl;
+      return 1;
+    }
+
+    return CheckSigHandler(signo, handler);
+  }
+
+  if (func == "CheckSigBlocked") {
+    if (argc != 3) {
+      usage(argv[0]);
+      return 1;
+    }
+
+    uint32_t signo;
+    if (!absl::SimpleAtoi(argv[2], &signo)) {
+      std::cerr << "invalid signo: " << argv[2] << std::endl;
+      return 1;
+    }
+
+    return CheckSigBlocked(signo);
+  }
+
+  if (func == "CheckItimerEnabled") {
+    if (argc != 3) {
+      usage(argv[0]);
+      return 1;
+    }
+
+    uint32_t timer;
+    if (!absl::SimpleAtoi(argv[2], &timer)) {
+      std::cerr << "invalid signo: " << argv[2] << std::endl;
+      return 1;
+    }
+
+    return CheckItimerEnabled(timer);
+  }
+
+  if (func == "PrintExecFn") {
+    // N.B. This will be called as an interpreter script, with the script passed
+    // as the third argument. We don't care about that script.
+    return PrintExecFn();
+  }
+
+  if (func == "PrintExecName") {
+    // N.B. This may be called as an interpreter script like PrintExecFn.
+    return PrintExecName();
+  }
+
+  std::cerr << "Invalid function: " << func << std::endl;
+  return 1;
+}
diff --git a/test/syscalls/linux/exit.cc b/test/syscalls/linux/exit.cc
new file mode 100644
index 000000000..d52ea786b
--- /dev/null
+++ b/test/syscalls/linux/exit.cc
@@ -0,0 +1,78 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+#include "test/util/time_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void TestExit(int code) {
+  pid_t pid = fork();
+  if (pid == 0) {
+    _exit(code);
+  }
+
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == code) << status;
+}
+
+TEST(ExitTest, Success) { TestExit(0); }
+
+TEST(ExitTest, Failure) { TestExit(1); }
+
+// This test ensures that a process's file descriptors are closed when it calls
+// exit(). In order to test this, the parent tries to read from a pipe whose
+// write end is held by the child. While the read is blocking, the child exits,
+// which should cause the parent to read 0 bytes due to EOF.
+TEST(ExitTest, CloseFds) {
+  int pipe_fds[2];
+  ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+  FileDescriptor read_fd(pipe_fds[0]);
+  FileDescriptor write_fd(pipe_fds[1]);
+
+  pid_t pid = fork();
+  if (pid == 0) {
+    read_fd.reset();
+
+    SleepSafe(absl::Seconds(10));
+
+    _exit(0);
+  }
+
+  EXPECT_THAT(pid, SyscallSucceeds());
+
+  write_fd.reset();
+
+  char buf[10];
+  EXPECT_THAT(ReadFd(read_fd.get(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(0));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/exit_script.sh b/test/syscalls/linux/exit_script.sh
new file mode 100755
index 000000000..527518e06
--- /dev/null
+++ b/test/syscalls/linux/exit_script.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Copyright 2018 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 exit_code"
+  exit 255
+fi
+
+exit $1
diff --git a/test/syscalls/linux/fadvise64.cc b/test/syscalls/linux/fadvise64.cc
new file mode 100644
index 000000000..2af7aa6d9
--- /dev/null
+++ b/test/syscalls/linux/fadvise64.cc
@@ -0,0 +1,72 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+TEST(FAdvise64Test, Basic) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  // fadvise64 is noop in gVisor, so just test that it succeeds.
+  ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_NORMAL),
+              SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_RANDOM),
+              SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_SEQUENTIAL),
+              SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_WILLNEED),
+              SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_DONTNEED),
+              SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_NOREUSE),
+              SyscallSucceeds());
+}
+
+TEST(FAdvise64Test, InvalidArgs) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  // Note: offset is allowed to be negative.
+  ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, static_cast<off_t>(-1),
+                      POSIX_FADV_NORMAL),
+              SyscallFailsWithErrno(EINVAL));
+  ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, 12345),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(FAdvise64Test, NoPipes) {
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor read(fds[0]);
+  const FileDescriptor write(fds[1]);
+
+  ASSERT_THAT(syscall(__NR_fadvise64, read.get(), 0, 10, POSIX_FADV_NORMAL),
+              SyscallFailsWithErrno(ESPIPE));
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc
new file mode 100644
index 000000000..cabc2b751
--- /dev/null
+++ b/test/syscalls/linux/fallocate.cc
@@ -0,0 +1,186 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/timerfd.h>
+#include <syscall.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <ctime>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/eventfd_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+int fallocate(int fd, int mode, off_t offset, off_t len) {
+  return RetryEINTR(syscall)(__NR_fallocate, fd, mode, offset, len);
+}
+
+class AllocateTest : public FileTest {
+  void SetUp() override { FileTest::SetUp(); }
+};
+
+TEST_F(AllocateTest, Fallocate) {
+  // Check that it starts at size zero.
+  struct stat buf;
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 0);
+
+  // Grow to ten bytes.
+  ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 0, 10), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 10);
+
+  // Allocate to a smaller size should be noop.
+  ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 0, 5), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 10);
+
+  // Grow again.
+  ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 0, 20), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 20);
+
+  // Grow with offset.
+  ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 10, 20), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 30);
+
+  // Grow with offset beyond EOF.
+  ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 39, 1), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 40);
+
+  // Given length 0 should fail with EINVAL.
+  ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 50, 0),
+              SyscallFailsWithErrno(EINVAL));
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 40);
+}
+
+TEST_F(AllocateTest, FallocateInvalid) {
+  // Invalid FD
+  EXPECT_THAT(fallocate(-1, 0, 0, 10), SyscallFailsWithErrno(EBADF));
+
+  // Negative offset and size.
+  EXPECT_THAT(fallocate(test_file_fd_.get(), 0, -1, 10),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, -1),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(fallocate(test_file_fd_.get(), 0, -1, -1),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(AllocateTest, FallocateReadonly) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+  EXPECT_THAT(fallocate(fd.get(), 0, 0, 10), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(AllocateTest, FallocatePipe) {
+  int pipes[2];
+  EXPECT_THAT(pipe(pipes), SyscallSucceeds());
+  auto cleanup = Cleanup([&pipes] {
+    EXPECT_THAT(close(pipes[0]), SyscallSucceeds());
+    EXPECT_THAT(close(pipes[1]), SyscallSucceeds());
+  });
+
+  EXPECT_THAT(fallocate(pipes[1], 0, 0, 10), SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST_F(AllocateTest, FallocateChar) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDWR));
+  EXPECT_THAT(fallocate(fd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+}
+
+TEST_F(AllocateTest, FallocateRlimit) {
+  // Get the current rlimit and restore after test run.
+  struct rlimit initial_lim;
+  ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  auto cleanup = Cleanup([&initial_lim] {
+    EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  });
+
+  // Try growing past the file size limit.
+  sigset_t new_mask;
+  sigemptyset(&new_mask);
+  sigaddset(&new_mask, SIGXFSZ);
+  sigprocmask(SIG_BLOCK, &new_mask, nullptr);
+
+  struct rlimit setlim = {};
+  setlim.rlim_cur = 1024;
+  setlim.rlim_max = RLIM_INFINITY;
+  ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+
+  EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 1025),
+              SyscallFailsWithErrno(EFBIG));
+
+  struct timespec timelimit = {};
+  timelimit.tv_sec = 10;
+  EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ);
+  ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds());
+}
+
+TEST_F(AllocateTest, FallocateOtherFDs) {
+  int fd;
+  ASSERT_THAT(fd = timerfd_create(CLOCK_MONOTONIC, 0), SyscallSucceeds());
+  auto timer_fd = FileDescriptor(fd);
+  EXPECT_THAT(fallocate(timer_fd.get(), 0, 0, 10),
+              SyscallFailsWithErrno(ENODEV));
+
+  sigset_t mask;
+  sigemptyset(&mask);
+  ASSERT_THAT(fd = signalfd(-1, &mask, 0), SyscallSucceeds());
+  auto sfd = FileDescriptor(fd);
+  EXPECT_THAT(fallocate(sfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+
+  auto efd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
+  EXPECT_THAT(fallocate(efd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+
+  auto sockfd = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  EXPECT_THAT(fallocate(sockfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+
+  int socks[2];
+  ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, socks),
+              SyscallSucceeds());
+  auto sock0 = FileDescriptor(socks[0]);
+  auto sock1 = FileDescriptor(socks[1]);
+  EXPECT_THAT(fallocate(sock0.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/fault.cc b/test/syscalls/linux/fault.cc
new file mode 100644
index 000000000..a85750382
--- /dev/null
+++ b/test/syscalls/linux/fault.cc
@@ -0,0 +1,74 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define _GNU_SOURCE 1
+#include <signal.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+__attribute__((noinline)) void Fault(void) {
+  volatile int* foo = nullptr;
+  *foo = 0;
+}
+
+int GetPcFromUcontext(ucontext_t* uc, uintptr_t* pc) {
+#if defined(__x86_64__)
+  *pc = uc->uc_mcontext.gregs[REG_RIP];
+  return 1;
+#elif defined(__i386__)
+  *pc = uc->uc_mcontext.gregs[REG_EIP];
+  return 1;
+#elif defined(__aarch64__)
+  *pc = uc->uc_mcontext.pc;
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+void sigact_handler(int sig, siginfo_t* siginfo, void* context) {
+  uintptr_t pc;
+  if (GetPcFromUcontext(reinterpret_cast<ucontext_t*>(context), &pc)) {
+    /* Expect Fault() to be at most 64 bytes in size. */
+    uintptr_t fault_addr = reinterpret_cast<uintptr_t>(&Fault);
+    EXPECT_GE(pc, fault_addr);
+    EXPECT_LT(pc, fault_addr + 64);
+    exit(0);
+  }
+}
+
+TEST(FaultTest, InRange) {
+  // Reset the signal handler to do nothing so that it doesn't freak out
+  // the test runner when we fire an alarm.
+  struct sigaction sa = {};
+  sa.sa_sigaction = sigact_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  ASSERT_THAT(sigaction(SIGSEGV, &sa, nullptr), SyscallSucceeds());
+
+  Fault();
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/fchdir.cc b/test/syscalls/linux/fchdir.cc
new file mode 100644
index 000000000..08bcae1e8
--- /dev/null
+++ b/test/syscalls/linux/fchdir.cc
@@ -0,0 +1,77 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(FchdirTest, Success) {
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  int fd;
+  ASSERT_THAT(fd = open(temp_dir.path().c_str(), O_DIRECTORY | O_RDONLY),
+              SyscallSucceeds());
+
+  EXPECT_THAT(fchdir(fd), SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  // Change CWD to a permanent location as temp dirs will be cleaned up.
+  EXPECT_THAT(chdir("/"), SyscallSucceeds());
+}
+
+TEST(FchdirTest, InvalidFD) {
+  EXPECT_THAT(fchdir(-1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(FchdirTest, PermissionDenied) {
+  // Drop capabilities that allow us to override directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */));
+
+  int fd;
+  ASSERT_THAT(fd = open(temp_dir.path().c_str(), O_DIRECTORY | O_RDONLY),
+              SyscallSucceeds());
+
+  EXPECT_THAT(fchdir(fd), SyscallFailsWithErrno(EACCES));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(FchdirTest, NotDir) {
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  int fd;
+  ASSERT_THAT(fd = open(temp_file.path().c_str(), O_CREAT | O_RDONLY, 0777),
+              SyscallSucceeds());
+
+  EXPECT_THAT(fchdir(fd), SyscallFailsWithErrno(ENOTDIR));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc
new file mode 100644
index 000000000..5467fa2c8
--- /dev/null
+++ b/test/syscalls/linux/fcntl.cc
@@ -0,0 +1,1353 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <list>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/flags/flag.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/eventfd_util.h"
+#include "test/util/fs_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/save_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+ABSL_FLAG(std::string, child_setlock_on, "",
+          "Contains the path to try to set a file lock on.");
+ABSL_FLAG(bool, child_setlock_write, false,
+          "Whether to set a writable lock (otherwise readable)");
+ABSL_FLAG(bool, blocking, false,
+          "Whether to set a blocking lock (otherwise non-blocking).");
+ABSL_FLAG(bool, retry_eintr, false,
+          "Whether to retry in the subprocess on EINTR.");
+ABSL_FLAG(uint64_t, child_setlock_start, 0, "The value of struct flock start");
+ABSL_FLAG(uint64_t, child_setlock_len, 0, "The value of struct flock len");
+ABSL_FLAG(int32_t, socket_fd, -1,
+          "A socket to use for communicating more state back "
+          "to the parent.");
+
+namespace gvisor {
+namespace testing {
+
+class FcntlLockTest : public ::testing::Test {
+ public:
+  void SetUp() override {
+    // Let's make a socket pair.
+    ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, fds_), SyscallSucceeds());
+  }
+
+  void TearDown() override {
+    EXPECT_THAT(close(fds_[0]), SyscallSucceeds());
+    EXPECT_THAT(close(fds_[1]), SyscallSucceeds());
+  }
+
+  int64_t GetSubprocessFcntlTimeInUsec() {
+    int64_t ret = 0;
+    EXPECT_THAT(ReadFd(fds_[0], reinterpret_cast<void*>(&ret), sizeof(ret)),
+                SyscallSucceedsWithValue(sizeof(ret)));
+    return ret;
+  }
+
+  // The first fd will remain with the process creating the subprocess
+  // and the second will go to the subprocess.
+  int fds_[2] = {};
+};
+
+namespace {
+
+PosixErrorOr<Cleanup> SubprocessLock(std::string const& path, bool for_write,
+                                     bool blocking, bool retry_eintr, int fd,
+                                     off_t start, off_t length, pid_t* child) {
+  std::vector<std::string> args = {
+      "/proc/self/exe",        "--child_setlock_on", path,
+      "--child_setlock_start", absl::StrCat(start),  "--child_setlock_len",
+      absl::StrCat(length),    "--socket_fd",        absl::StrCat(fd)};
+
+  if (for_write) {
+    args.push_back("--child_setlock_write");
+  }
+
+  if (blocking) {
+    args.push_back("--blocking");
+  }
+
+  if (retry_eintr) {
+    args.push_back("--retry_eintr");
+  }
+
+  int execve_errno = 0;
+  ASSIGN_OR_RETURN_ERRNO(
+      auto cleanup,
+      ForkAndExec("/proc/self/exe", ExecveArray(args.begin(), args.end()), {},
+                  nullptr, child, &execve_errno));
+
+  if (execve_errno != 0) {
+    return PosixError(execve_errno, "execve");
+  }
+
+  return std::move(cleanup);
+}
+
+TEST(FcntlTest, SetCloExecBadFD) {
+  // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set.
+  FileDescriptor f = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0));
+  auto fd = f.get();
+  f.reset();
+  ASSERT_THAT(fcntl(fd, F_GETFD), SyscallFailsWithErrno(EBADF));
+  ASSERT_THAT(fcntl(fd, F_SETFD, FD_CLOEXEC), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(FcntlTest, SetCloExec) {
+  // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set.
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0));
+  ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+
+  // Set the FD_CLOEXEC flag.
+  ASSERT_THAT(fcntl(fd.get(), F_SETFD, FD_CLOEXEC), SyscallSucceeds());
+  ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+}
+
+TEST(FcntlTest, ClearCloExec) {
+  // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag set.
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_CLOEXEC));
+  ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+
+  // Clear the FD_CLOEXEC flag.
+  ASSERT_THAT(fcntl(fd.get(), F_SETFD, 0), SyscallSucceeds());
+  ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+}
+
+TEST(FcntlTest, IndependentDescriptorFlags) {
+  // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set.
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0));
+  ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+
+  // Duplicate the descriptor. Ensure that it also doesn't have FD_CLOEXEC.
+  FileDescriptor newfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup());
+  ASSERT_THAT(fcntl(newfd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+
+  // Set FD_CLOEXEC on the first FD.
+  ASSERT_THAT(fcntl(fd.get(), F_SETFD, FD_CLOEXEC), SyscallSucceeds());
+  ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+
+  // Ensure that the second FD is unaffected by the change on the first.
+  ASSERT_THAT(fcntl(newfd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+}
+
+// All file description flags passed to open appear in F_GETFL.
+TEST(FcntlTest, GetAllFlags) {
+  TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  int flags = O_RDWR | O_DIRECT | O_SYNC | O_NONBLOCK | O_APPEND;
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), flags));
+
+  // Linux forces O_LARGEFILE on all 64-bit kernels and gVisor's is 64-bit.
+  int expected = flags | kOLargeFile;
+
+  int rflags;
+  EXPECT_THAT(rflags = fcntl(fd.get(), F_GETFL), SyscallSucceeds());
+  EXPECT_EQ(rflags, expected);
+}
+
+TEST(FcntlTest, SetFlags) {
+  TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), 0));
+
+  int const flags = O_RDWR | O_DIRECT | O_SYNC | O_NONBLOCK | O_APPEND;
+  EXPECT_THAT(fcntl(fd.get(), F_SETFL, flags), SyscallSucceeds());
+
+  // Can't set O_RDWR or O_SYNC.
+  // Linux forces O_LARGEFILE on all 64-bit kernels and gVisor's is 64-bit.
+  int expected = O_DIRECT | O_NONBLOCK | O_APPEND | kOLargeFile;
+
+  int rflags;
+  EXPECT_THAT(rflags = fcntl(fd.get(), F_GETFL), SyscallSucceeds());
+  EXPECT_EQ(rflags, expected);
+}
+
+void TestLock(int fd, short lock_type = F_RDLCK) {  // NOLINT, type in flock
+  struct flock fl;
+  fl.l_type = lock_type;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // len 0 locks all bytes despite how large the file grows.
+  fl.l_len = 0;
+  EXPECT_THAT(fcntl(fd, F_SETLK, &fl), SyscallSucceeds());
+}
+
+void TestLockBadFD(int fd,
+                   short lock_type = F_RDLCK) {  // NOLINT, type in flock
+  struct flock fl;
+  fl.l_type = lock_type;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // len 0 locks all bytes despite how large the file grows.
+  fl.l_len = 0;
+  EXPECT_THAT(fcntl(fd, F_SETLK, &fl), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(FcntlLockTest, SetLockBadFd) { TestLockBadFD(-1); }
+
+TEST_F(FcntlLockTest, SetLockDir) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0000));
+  TestLock(fd.get());
+}
+
+TEST_F(FcntlLockTest, SetLockSymlink) {
+  // TODO(gvisor.dev/issue/2782): Replace with IsRunningWithVFS1() when O_PATH
+  // is supported.
+  SKIP_IF(IsRunningOnGvisor());
+
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto symlink = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path()));
+
+  auto fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(symlink.path(), O_RDONLY | O_PATH, 0000));
+  TestLockBadFD(fd.get());
+}
+
+TEST_F(FcntlLockTest, SetLockProc) {
+  auto fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/status", O_RDONLY, 0000));
+  TestLock(fd.get());
+}
+
+TEST_F(FcntlLockTest, SetLockPipe) {
+  SKIP_IF(IsRunningWithVFS1());
+
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+  TestLock(fds[0]);
+  TestLockBadFD(fds[0], F_WRLCK);
+
+  TestLock(fds[1], F_WRLCK);
+  TestLockBadFD(fds[1]);
+
+  EXPECT_THAT(close(fds[0]), SyscallSucceeds());
+  EXPECT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(FcntlLockTest, SetLockSocket) {
+  SKIP_IF(IsRunningWithVFS1());
+
+  int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+  ASSERT_THAT(sock, SyscallSucceeds());
+
+  struct sockaddr_un addr =
+      ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(true /* abstract */, AF_UNIX));
+  ASSERT_THAT(
+      bind(sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+      SyscallSucceeds());
+
+  TestLock(sock);
+  EXPECT_THAT(close(sock), SyscallSucceeds());
+}
+
+TEST_F(FcntlLockTest, SetLockBadOpenFlagsWrite) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY, 0666));
+
+  struct flock fl0;
+  fl0.l_type = F_WRLCK;
+  fl0.l_whence = SEEK_SET;
+  fl0.l_start = 0;
+  fl0.l_len = 0;  // Lock all file
+
+  // Expect that setting a write lock using a read only file descriptor
+  // won't work.
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(FcntlLockTest, SetLockBadOpenFlagsRead) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY, 0666));
+
+  struct flock fl1;
+  fl1.l_type = F_RDLCK;
+  fl1.l_whence = SEEK_SET;
+  fl1.l_start = 0;
+  // Same as SetLockBadFd.
+  fl1.l_len = 0;
+
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(FcntlLockTest, SetLockUnlockOnNothing) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_UNLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // Same as SetLockBadFd.
+  fl.l_len = 0;
+
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+}
+
+TEST_F(FcntlLockTest, SetWriteLockSingleProc) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd0 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_WRLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // Same as SetLockBadFd.
+  fl.l_len = 0;
+
+  EXPECT_THAT(fcntl(fd0.get(), F_SETLK, &fl), SyscallSucceeds());
+  // Expect to be able to take the same lock on the same fd no problem.
+  EXPECT_THAT(fcntl(fd0.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  // Expect to be able to take the same lock from a different fd but for
+  // the same process.
+  EXPECT_THAT(fcntl(fd1.get(), F_SETLK, &fl), SyscallSucceeds());
+}
+
+TEST_F(FcntlLockTest, SetReadLockMultiProc) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_RDLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // Same as SetLockBadFd.
+  fl.l_len = 0;
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  // spawn a child process to take a read lock on the same file.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), false /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetReadThenWriteLockMultiProc) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_RDLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // Same as SetLockBadFd.
+  fl.l_len = 0;
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  // Assert that another process trying to lock on the same file will fail
+  // with EAGAIN.  It's important that we keep the fd above open so that
+  // that the other process will contend with the lock.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), true /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+      << "Exited with code: " << status;
+
+  // Close the fd: we want to test that another process can acquire the
+  // lock after this point.
+  fd.reset();
+  // Assert that another process can now acquire the lock.
+
+  child_pid = 0;
+  auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), true /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetWriteThenReadLockMultiProc) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+  // Same as SetReadThenWriteLockMultiProc.
+
+  struct flock fl;
+  fl.l_type = F_WRLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // Same as SetLockBadFd.
+  fl.l_len = 0;
+
+  // Same as SetReadThenWriteLockMultiProc.
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  // Same as SetReadThenWriteLockMultiProc.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), false /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+      << "Exited with code: " << status;
+
+  // Same as SetReadThenWriteLockMultiProc.
+  fd.reset();  // Close the fd.
+
+  // Same as SetReadThenWriteLockMultiProc.
+  child_pid = 0;
+  auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), false /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetWriteLockMultiProc) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+  // Same as SetReadThenWriteLockMultiProc.
+
+  struct flock fl;
+  fl.l_type = F_WRLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // Same as SetLockBadFd.
+  fl.l_len = 0;
+
+  // Same as SetReadWriteLockMultiProc.
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  // Same as SetReadWriteLockMultiProc.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), true /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+      << "Exited with code: " << status;
+
+  fd.reset();  // Close the FD.
+  // Same as SetReadWriteLockMultiProc.
+  child_pid = 0;
+  auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), true /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockIsRegional) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_WRLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  fl.l_len = 4096;
+
+  // Same as SetReadWriteLockMultiProc.
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  // Same as SetReadWriteLockMultiProc.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), true /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_len, 0, &child_pid));
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockUpgradeDowngrade) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_RDLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // Same as SetLockBadFd.
+  fl.l_len = 0;
+
+  // Same as SetReadWriteLockMultiProc.
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  // Upgrade to a write lock.  This will prevent anyone else from taking
+  // the lock.
+  fl.l_type = F_WRLCK;
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  // Same as SetReadWriteLockMultiProc.,
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), false /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+      << "Exited with code: " << status;
+
+  // Downgrade back to a read lock.
+  fl.l_type = F_RDLCK;
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  // Do the same stint as before, but this time it should succeed.
+  child_pid = 0;
+  auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), false /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockDroppedOnClose) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  // While somewhat surprising, obtaining another fd to the same file and
+  // then closing it in this process drops *all* locks.
+  FileDescriptor other_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+  // Same as SetReadThenWriteLockMultiProc.
+
+  struct flock fl;
+  fl.l_type = F_WRLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // Same as SetLockBadFd.
+  fl.l_len = 0;
+
+  // Same as SetReadWriteLockMultiProc.
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  other_fd.reset();  // Close.
+
+  // Expect to be able to get the lock, given that the close above dropped it.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(file.path(), true /* write lock */,
+                     false /* nonblocking */, false /* no eintr retry */,
+                     -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockUnlock) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  // Setup two regional locks with different permissions.
+  struct flock fl0;
+  fl0.l_type = F_WRLCK;
+  fl0.l_whence = SEEK_SET;
+  fl0.l_start = 0;
+  fl0.l_len = 4096;
+
+  struct flock fl1;
+  fl1.l_type = F_RDLCK;
+  fl1.l_whence = SEEK_SET;
+  fl1.l_start = 4096;
+  // Same as SetLockBadFd.
+  fl1.l_len = 0;
+
+  // Set both region locks.
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallSucceeds());
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl1), SyscallSucceeds());
+
+  // Another process should fail to take a read lock on the entire file
+  // due to the regional write lock.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+      file.path(), false /* write lock */, false /* nonblocking */,
+      false /* no eintr retry */, -1 /* no socket fd */, 0, 0, &child_pid));
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+      << "Exited with code: " << status;
+
+  // Then only unlock the writable one.  This should ensure that other
+  // processes can take any read lock that it wants.
+  fl0.l_type = F_UNLCK;
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallSucceeds());
+
+  // Another process should now succeed to get a read lock on the entire file.
+  child_pid = 0;
+  auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+      file.path(), false /* write lock */, false /* nonblocking */,
+      false /* no eintr retry */, -1 /* no socket fd */, 0, 0, &child_pid));
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockAcrossRename) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  // Setup two regional locks with different permissions.
+  struct flock fl;
+  fl.l_type = F_WRLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  // Same as SetLockBadFd.
+  fl.l_len = 0;
+
+  // Set the region lock.
+  EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+  // Rename the file to someplace nearby.
+  std::string const newpath = NewTempAbsPath();
+  EXPECT_THAT(rename(file.path().c_str(), newpath.c_str()), SyscallSucceeds());
+
+  // Another process should fail to take a read lock on the renamed file
+  // since we still have an open handle to the inode.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      SubprocessLock(newpath, false /* write lock */, false /* nonblocking */,
+                     false /* no eintr retry */, -1 /* no socket fd */,
+                     fl.l_start, fl.l_len, &child_pid));
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+      << "Exited with code: " << status;
+}
+
+// NOTE: The blocking tests below aren't perfect. It's hard to assert exactly
+// what the kernel did while handling a syscall. These tests are timing based
+// because there really isn't any other reasonable way to assert that correct
+// blocking behavior happened.
+
+// This test will verify that blocking works as expected when another process
+// holds a write lock when obtaining a write lock. This test will hold the lock
+// for some amount of time and then wait for the second process to send over the
+// socket_fd the amount of time it was blocked for before the lock succeeded.
+TEST_F(FcntlLockTest, SetWriteLockThenBlockingWriteLock) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_WRLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  fl.l_len = 0;
+
+  // Take the write lock.
+  ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+  // Attempt to take the read lock in a sub process. This will immediately block
+  // so we will release our lock after some amount of time and then assert the
+  // amount of time the other process was blocked for.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+      file.path(), true /* write lock */, true /* Blocking Lock */,
+      true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */,
+      fl.l_start, fl.l_len, &child_pid));
+
+  // We will wait kHoldLockForSec before we release our lock allowing the
+  // subprocess to obtain it.
+  constexpr absl::Duration kHoldLockFor = absl::Seconds(5);
+  const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1));
+
+  absl::SleepFor(kHoldLockFor);
+
+  // Unlock our write lock.
+  fl.l_type = F_UNLCK;
+  ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+  // Read the blocked time from the subprocess socket.
+  int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec();
+
+  // We must have been waiting at least kMinBlockTime.
+  EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec);
+
+  // The FCNTL write lock must always succeed as it will simply block until it
+  // can obtain the lock.
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+// This test will verify that blocking works as expected when another process
+// holds a read lock when obtaining a write lock. This test will hold the lock
+// for some amount of time and then wait for the second process to send over the
+// socket_fd the amount of time it was blocked for before the lock succeeded.
+TEST_F(FcntlLockTest, SetReadLockThenBlockingWriteLock) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_RDLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  fl.l_len = 0;
+
+  // Take the write lock.
+  ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+  // Attempt to take the read lock in a sub process. This will immediately block
+  // so we will release our lock after some amount of time and then assert the
+  // amount of time the other process was blocked for.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+      file.path(), true /* write lock */, true /* Blocking Lock */,
+      true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */,
+      fl.l_start, fl.l_len, &child_pid));
+
+  // We will wait kHoldLockForSec before we release our lock allowing the
+  // subprocess to obtain it.
+  constexpr absl::Duration kHoldLockFor = absl::Seconds(5);
+
+  const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1));
+
+  absl::SleepFor(kHoldLockFor);
+
+  // Unlock our READ lock.
+  fl.l_type = F_UNLCK;
+  ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+  // Read the blocked time from the subprocess socket.
+  int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec();
+
+  // We must have been waiting at least kMinBlockTime.
+  EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec);
+
+  // The FCNTL write lock must always succeed as it will simply block until it
+  // can obtain the lock.
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+// This test will veirfy that blocking works as expected when another process
+// holds a write lock when obtaining a read lock. This test will hold the lock
+// for some amount of time and then wait for the second process to send over the
+// socket_fd the amount of time it was blocked for before the lock succeeded.
+TEST_F(FcntlLockTest, SetWriteLockThenBlockingReadLock) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_WRLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  fl.l_len = 0;
+
+  // Take the write lock.
+  ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+  // Attempt to take the read lock in a sub process. This will immediately block
+  // so we will release our lock after some amount of time and then assert the
+  // amount of time the other process was blocked for.
+  pid_t child_pid = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+      file.path(), false /* read lock */, true /* Blocking Lock */,
+      true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */,
+      fl.l_start, fl.l_len, &child_pid));
+
+  // We will wait kHoldLockForSec before we release our lock allowing the
+  // subprocess to obtain it.
+  constexpr absl::Duration kHoldLockFor = absl::Seconds(5);
+
+  const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1));
+
+  absl::SleepFor(kHoldLockFor);
+
+  // Unlock our write lock.
+  fl.l_type = F_UNLCK;
+  ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+  // Read the blocked time from the subprocess socket.
+  int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec();
+
+  // We must have been waiting at least kMinBlockTime.
+  EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec);
+
+  // The FCNTL read lock must always succeed as it will simply block until it
+  // can obtain the lock.
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+// This test will verify that when one process only holds a read lock that
+// another will not block while obtaining a read lock when F_SETLKW is used.
+TEST_F(FcntlLockTest, SetReadLockThenBlockingReadLock) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+  struct flock fl;
+  fl.l_type = F_RDLCK;
+  fl.l_whence = SEEK_SET;
+  fl.l_start = 0;
+  fl.l_len = 0;
+
+  // Take the READ lock.
+  ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+  // Attempt to take the read lock in a sub process. Since multiple processes
+  // can hold a read lock this should immediately return without blocking
+  // even though we used F_SETLKW in the subprocess.
+  pid_t child_pid = 0;
+  auto sp = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+      file.path(), false /* read lock */, true /* Blocking Lock */,
+      true /* Retry on EINTR */, -1 /* No fd, should not block */, fl.l_start,
+      fl.l_len, &child_pid));
+
+  // We never release the lock and the subprocess should still obtain it without
+  // blocking for any period of time.
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+TEST(FcntlTest, GetO_ASYNC) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  int flag_fl = -1;
+  ASSERT_THAT(flag_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+  EXPECT_EQ(flag_fl & O_ASYNC, 0);
+
+  int flag_fd = -1;
+  ASSERT_THAT(flag_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+  EXPECT_EQ(flag_fd & O_ASYNC, 0);
+}
+
+TEST(FcntlTest, SetFlO_ASYNC) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  int before_fl = -1;
+  ASSERT_THAT(before_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+
+  int before_fd = -1;
+  ASSERT_THAT(before_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+
+  ASSERT_THAT(fcntl(s.get(), F_SETFL, before_fl | O_ASYNC), SyscallSucceeds());
+
+  int after_fl = -1;
+  ASSERT_THAT(after_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+  EXPECT_EQ(after_fl, before_fl | O_ASYNC);
+
+  int after_fd = -1;
+  ASSERT_THAT(after_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+  EXPECT_EQ(after_fd, before_fd);
+}
+
+TEST(FcntlTest, SetFdO_ASYNC) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  int before_fl = -1;
+  ASSERT_THAT(before_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+
+  int before_fd = -1;
+  ASSERT_THAT(before_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+
+  ASSERT_THAT(fcntl(s.get(), F_SETFD, before_fd | O_ASYNC), SyscallSucceeds());
+
+  int after_fl = -1;
+  ASSERT_THAT(after_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+  EXPECT_EQ(after_fl, before_fl);
+
+  int after_fd = -1;
+  ASSERT_THAT(after_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+  EXPECT_EQ(after_fd, before_fd);
+}
+
+TEST(FcntlTest, DupAfterO_ASYNC) {
+  FileDescriptor s1 = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  int before = -1;
+  ASSERT_THAT(before = fcntl(s1.get(), F_GETFL), SyscallSucceeds());
+
+  ASSERT_THAT(fcntl(s1.get(), F_SETFL, before | O_ASYNC), SyscallSucceeds());
+
+  FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(s1.Dup());
+
+  int after = -1;
+  ASSERT_THAT(after = fcntl(fd2.get(), F_GETFL), SyscallSucceeds());
+  EXPECT_EQ(after & O_ASYNC, O_ASYNC);
+}
+
+TEST(FcntlTest, GetOwnNone) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  // Use the raw syscall because the glibc wrapper may convert F_{GET,SET}OWN
+  // into F_{GET,SET}OWN_EX.
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(0));
+  MaybeSave();
+}
+
+TEST(FcntlTest, GetOwnExNone) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex owner = {};
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &owner),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST(FcntlTest, SetOwnInvalidPid) {
+  SKIP_IF(IsRunningWithVFS1());
+
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 12345678),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+TEST(FcntlTest, SetOwnInvalidPgrp) {
+  SKIP_IF(IsRunningWithVFS1());
+
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -12345678),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+TEST(FcntlTest, SetOwnPid) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  pid_t pid;
+  EXPECT_THAT(pid = getpid(), SyscallSucceeds());
+
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(pid));
+  MaybeSave();
+}
+
+TEST(FcntlTest, SetOwnPgrp) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  pid_t pgid;
+  EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds());
+
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds());
+
+  // Verify with F_GETOWN_EX; using F_GETOWN on Linux may incorrectly treat the
+  // negative return value as an error, converting the return value to -1 and
+  // setting errno accordingly.
+  f_owner_ex got_owner = {};
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(got_owner.type, F_OWNER_PGRP);
+  EXPECT_EQ(got_owner.pid, pgid);
+  MaybeSave();
+}
+
+TEST(FcntlTest, SetOwnUnset) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  // Set and unset pid.
+  pid_t pid;
+  EXPECT_THAT(pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(0));
+
+  // Set and unset pgid.
+  pid_t pgid;
+  EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(0));
+  MaybeSave();
+}
+
+// F_SETOWN flips the sign of negative values, an operation that is guarded
+// against overflow.
+TEST(FcntlTest, SetOwnOverflow) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, INT_MIN),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(FcntlTest, SetOwnExInvalidType) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex owner = {};
+  owner.type = __pid_type(-1);
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(FcntlTest, SetOwnExInvalidTid) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex owner = {};
+  owner.type = F_OWNER_TID;
+  owner.pid = -1;
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+TEST(FcntlTest, SetOwnExInvalidPid) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex owner = {};
+  owner.type = F_OWNER_PID;
+  owner.pid = -1;
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+TEST(FcntlTest, SetOwnExInvalidPgrp) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex owner = {};
+  owner.type = F_OWNER_PGRP;
+  owner.pid = -1;
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+TEST(FcntlTest, SetOwnExTid) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex owner = {};
+  owner.type = F_OWNER_TID;
+  EXPECT_THAT(owner.pid = syscall(__NR_gettid), SyscallSucceeds());
+
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(owner.pid));
+  MaybeSave();
+}
+
+TEST(FcntlTest, SetOwnExPid) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex owner = {};
+  owner.type = F_OWNER_PID;
+  EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds());
+
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(owner.pid));
+  MaybeSave();
+}
+
+TEST(FcntlTest, SetOwnExPgrp) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex set_owner = {};
+  set_owner.type = F_OWNER_PGRP;
+  EXPECT_THAT(set_owner.pid = getpgrp(), SyscallSucceeds());
+
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner),
+              SyscallSucceeds());
+
+  // Verify with F_GETOWN_EX; using F_GETOWN on Linux may incorrectly treat the
+  // negative return value as an error, converting the return value to -1 and
+  // setting errno accordingly.
+  f_owner_ex got_owner = {};
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(got_owner.type, set_owner.type);
+  EXPECT_EQ(got_owner.pid, set_owner.pid);
+  MaybeSave();
+}
+
+TEST(FcntlTest, SetOwnExUnset) {
+  SKIP_IF(IsRunningWithVFS1());
+
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  // Set and unset pid.
+  f_owner_ex owner = {};
+  owner.type = F_OWNER_PID;
+  EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+  owner.pid = 0;
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(0));
+
+  // Set and unset pgid.
+  owner.type = F_OWNER_PGRP;
+  EXPECT_THAT(owner.pid = getpgrp(), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+  owner.pid = 0;
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(0));
+  MaybeSave();
+}
+
+TEST(FcntlTest, GetOwnExTid) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex set_owner = {};
+  set_owner.type = F_OWNER_TID;
+  EXPECT_THAT(set_owner.pid = syscall(__NR_gettid), SyscallSucceeds());
+
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner),
+              SyscallSucceeds());
+
+  f_owner_ex got_owner = {};
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(got_owner.type, set_owner.type);
+  EXPECT_EQ(got_owner.pid, set_owner.pid);
+}
+
+TEST(FcntlTest, GetOwnExPid) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex set_owner = {};
+  set_owner.type = F_OWNER_PID;
+  EXPECT_THAT(set_owner.pid = getpid(), SyscallSucceeds());
+
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner),
+              SyscallSucceeds());
+
+  f_owner_ex got_owner = {};
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(got_owner.type, set_owner.type);
+  EXPECT_EQ(got_owner.pid, set_owner.pid);
+}
+
+TEST(FcntlTest, GetOwnExPgrp) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  f_owner_ex set_owner = {};
+  set_owner.type = F_OWNER_PGRP;
+  EXPECT_THAT(set_owner.pid = getpgrp(), SyscallSucceeds());
+
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner),
+              SyscallSucceeds());
+
+  f_owner_ex got_owner = {};
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(got_owner.type, set_owner.type);
+  EXPECT_EQ(got_owner.pid, set_owner.pid);
+}
+
+// Make sure that making multiple concurrent changes to async signal generation
+// does not cause any race issues.
+TEST(FcntlTest, SetFlSetOwnDoNotRace) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  pid_t pid;
+  EXPECT_THAT(pid = getpid(), SyscallSucceeds());
+
+  constexpr absl::Duration runtime = absl::Milliseconds(300);
+  auto setAsync = [&s, &runtime] {
+    for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+      ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETFL, O_ASYNC),
+                  SyscallSucceeds());
+      sched_yield();
+    }
+  };
+  auto resetAsync = [&s, &runtime] {
+    for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+      ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETFL, 0), SyscallSucceeds());
+      sched_yield();
+    }
+  };
+  auto setOwn = [&s, &pid, &runtime] {
+    for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+      ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid),
+                  SyscallSucceeds());
+      sched_yield();
+    }
+  };
+
+  std::list<ScopedThread> threads;
+  for (int i = 0; i < 10; i++) {
+    threads.emplace_back(setAsync);
+    threads.emplace_back(resetAsync);
+    threads.emplace_back(setOwn);
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  gvisor::testing::TestInit(&argc, &argv);
+
+  const std::string setlock_on = absl::GetFlag(FLAGS_child_setlock_on);
+  if (!setlock_on.empty()) {
+    int socket_fd = absl::GetFlag(FLAGS_socket_fd);
+    int fd = open(setlock_on.c_str(), O_RDWR, 0666);
+    if (fd == -1 && errno != 0) {
+      int err = errno;
+      std::cerr << "CHILD open " << setlock_on << " failed " << err
+                << std::endl;
+      exit(err);
+    }
+
+    struct flock fl;
+    if (absl::GetFlag(FLAGS_child_setlock_write)) {
+      fl.l_type = F_WRLCK;
+    } else {
+      fl.l_type = F_RDLCK;
+    }
+    fl.l_whence = SEEK_SET;
+    fl.l_start = absl::GetFlag(FLAGS_child_setlock_start);
+    fl.l_len = absl::GetFlag(FLAGS_child_setlock_len);
+
+    // Test the fcntl.
+    int err = 0;
+    int ret = 0;
+
+    gvisor::testing::MonotonicTimer timer;
+    timer.Start();
+    do {
+      ret = fcntl(fd, absl::GetFlag(FLAGS_blocking) ? F_SETLKW : F_SETLK, &fl);
+    } while (absl::GetFlag(FLAGS_retry_eintr) && ret == -1 && errno == EINTR);
+    auto usec = absl::ToInt64Microseconds(timer.Duration());
+
+    if (ret == -1 && errno != 0) {
+      err = errno;
+      std::cerr << "CHILD lock " << setlock_on << " failed " << err
+                << std::endl;
+    }
+
+    // If there is a socket fd let's send back the time in microseconds it took
+    // to execute this syscall.
+    if (socket_fd != -1) {
+      gvisor::testing::WriteFd(socket_fd, reinterpret_cast<void*>(&usec),
+                                     sizeof(usec));
+      close(socket_fd);
+    }
+
+    close(fd);
+    exit(err);
+  }
+
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h
new file mode 100644
index 000000000..fb418e052
--- /dev/null
+++ b/test/syscalls/linux/file_base.h
@@ -0,0 +1,100 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_FILE_BASE_H_
+#define GVISOR_TEST_SYSCALLS_FILE_BASE_H_
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include <cstring>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+class FileTest : public ::testing::Test {
+ public:
+  void SetUp() override {
+    test_pipe_[0] = -1;
+    test_pipe_[1] = -1;
+
+    test_file_name_ = NewTempAbsPath();
+    test_file_fd_ = ASSERT_NO_ERRNO_AND_VALUE(
+        Open(test_file_name_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR));
+
+    ASSERT_THAT(pipe(test_pipe_), SyscallSucceeds());
+    ASSERT_THAT(fcntl(test_pipe_[0], F_SETFL, O_NONBLOCK), SyscallSucceeds());
+  }
+
+  // CloseFile will allow the test to manually close the file descriptor.
+  void CloseFile() { test_file_fd_.reset(); }
+
+  // UnlinkFile will allow the test to manually unlink the file.
+  void UnlinkFile() {
+    if (!test_file_name_.empty()) {
+      EXPECT_THAT(unlink(test_file_name_.c_str()), SyscallSucceeds());
+      test_file_name_.clear();
+    }
+  }
+
+  // ClosePipes will allow the test to manually close the pipes.
+  void ClosePipes() {
+    if (test_pipe_[0] > 0) {
+      EXPECT_THAT(close(test_pipe_[0]), SyscallSucceeds());
+    }
+
+    if (test_pipe_[1] > 0) {
+      EXPECT_THAT(close(test_pipe_[1]), SyscallSucceeds());
+    }
+
+    test_pipe_[0] = -1;
+    test_pipe_[1] = -1;
+  }
+
+  void TearDown() override {
+    CloseFile();
+    UnlinkFile();
+    ClosePipes();
+  }
+
+ protected:
+  std::string test_file_name_;
+  FileDescriptor test_file_fd_;
+
+  int test_pipe_[2];
+};
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_FILE_BASE_H_
diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc
new file mode 100644
index 000000000..638a93979
--- /dev/null
+++ b/test/syscalls/linux/flock.cc
@@ -0,0 +1,636 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/file.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class FlockTest : public FileTest {};
+
+TEST_F(FlockTest, InvalidOpCombinations) {
+  // The operation cannot be both exclusive and shared.
+  EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_SH | LOCK_NB),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Locking and Unlocking doesn't make sense.
+  EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_UN | LOCK_NB),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_UN | LOCK_NB),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(FlockTest, NoOperationSpecified) {
+  // Not specifying an operation is invalid.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(FlockTest, TestSimpleExLock) {
+  // Test that we can obtain an exclusive lock (no other holders)
+  // and that we can unlock it.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestSimpleShLock) {
+  // Test that we can obtain a shared lock (no other holders)
+  // and that we can unlock it.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestLockableAnyMode) {
+  // flock(2): A shared or exclusive lock can be placed on a file
+  // regardless of the mode in which the file was opened.
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(test_file_name_, O_RDONLY));  // open read only to test
+
+  // Mode shouldn't prevent us from taking an exclusive lock.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0));
+
+  // Unlock
+  ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestUnlockWithNoHolders) {
+  // Test that unlocking when no one holds a lock succeeeds.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestRepeatedExLockingBySameHolder) {
+  // Test that repeated locking by the same holder for the
+  // same type of lock works correctly.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestRepeatedExLockingSingleUnlock) {
+  // Test that repeated locking by the same holder for the
+  // same type of lock works correctly and that a single unlock is required.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+  // Should be unlocked at this point
+  ASSERT_THAT(flock(fd.get(), LOCK_NB | LOCK_EX), SyscallSucceedsWithValue(0));
+
+  ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestRepeatedShLockingBySameHolder) {
+  // Test that repeated locking by the same holder for the
+  // same type of lock works correctly.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestSingleHolderUpgrade) {
+  // Test that a shared lock is upgradable when no one else holds a lock.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestSingleHolderDowngrade) {
+  // Test single holder lock downgrade case.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestMultipleShared) {
+  // This is a simple test to verify that multiple independent shared
+  // locks will be granted.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // A shared lock should be granted as there only exists other shared locks.
+  ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0));
+
+  // Unlock both.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+/*
+ * flock(2): If a process uses open(2) (or similar) to obtain more than one
+ * descriptor for the same file, these descriptors are treated
+ * independently by flock(). An attempt to lock the file using one of
+ * these file descriptors may be denied by a lock that the calling process
+ * has already placed via another descriptor.
+ */
+TEST_F(FlockTest, TestMultipleHolderSharedExclusive) {
+  // This test will verify that an exclusive lock will not be granted
+  // while a shared is held.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Verify We're unable to get an exlcusive lock via the second FD.
+  // because someone is holding a shared lock.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Unlock
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestSharedLockFailExclusiveHolder) {
+  // This test will verify that a shared lock is denied while
+  // someone holds an exclusive lock.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Verify we're unable to get an shared lock via the second FD.
+  // because someone is holding an exclusive lock.
+  ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Unlock
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestExclusiveLockFailExclusiveHolder) {
+  // This test will verify that an exclusive lock is denied while
+  // someone already holds an exclsuive lock.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Verify we're unable to get an exclusive lock via the second FD
+  // because someone is already holding an exclusive lock.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Unlock
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestMultipleHolderSharedExclusiveUpgrade) {
+  // This test will verify that we cannot obtain an exclusive lock while
+  // a shared lock is held by another descriptor, then verify that an upgrade
+  // is possible on a shared lock once all other shared locks have closed.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Verify we're unable to get an exclusive lock via the second FD because
+  // a shared lock is held.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Verify that we can get a shared lock via the second descriptor instead
+  ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0));
+
+  // Unlock the first and there will only be one shared lock remaining.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+
+  // Upgrade 2nd fd.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0));
+
+  // Finally unlock the second
+  ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestMultipleHolderSharedExclusiveDowngrade) {
+  // This test will verify that a shared lock is not obtainable while an
+  // exclusive lock is held but that once the first is downgraded that
+  // the second independent file descriptor can also get a shared lock.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Verify We're unable to get a shared lock via the second FD because
+  // an exclusive lock is held.
+  ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Verify that we can downgrade the first.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  // Now verify that we can obtain a shared lock since the first was downgraded.
+  ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0));
+
+  // Finally unlock both.
+  ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+/*
+ * flock(2): Locks created by flock() are associated with an open file table
+ * entry. This means that duplicate file descriptors (created by, for example,
+ * fork(2) or dup(2)) refer to the same lock, and this lock may be modified or
+ * released using any of these descriptors. Furthermore, the lock is released
+ * either by an explicit LOCK_UN operation on any of these duplicate descriptors
+ * or when all such descriptors have been closed.
+ */
+TEST_F(FlockTest, TestDupFdUpgrade) {
+  // This test will verify that a shared lock is upgradeable via a dupped
+  // file descriptor, if the FD wasn't dupped this would fail.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+  // Now we should be able to upgrade via the dupped fd.
+  ASSERT_THAT(flock(dup_fd.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  // Validate unlock via dupped fd.
+  ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestDupFdDowngrade) {
+  // This test will verify that a exclusive lock is downgradable via a dupped
+  // file descriptor, if the FD wasn't dupped this would fail.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+  // Now we should be able to downgrade via the dupped fd.
+  ASSERT_THAT(flock(dup_fd.get(), LOCK_SH | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  // Validate unlock via dupped fd
+  ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestDupFdCloseRelease) {
+  // flock(2): Furthermore, the lock is released either by an explicit LOCK_UN
+  // operation on any of these duplicate descriptors, or when all such
+  // descriptors have been closed.
+  //
+  // This test will verify that a dupped fd closing will not release the
+  // underlying lock until all such dupped fds have closed.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+  // At this point we have ONE exclusive locked referenced by two different fds.
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Validate that we cannot get a lock on a new unrelated FD.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Closing the dupped fd shouldn't affect the lock until all are closed.
+  dup_fd.reset();  // Closed the duped fd.
+
+  // Validate that we still cannot get a lock on a new unrelated FD.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Closing the first fd
+  CloseFile();  // Will validate the syscall succeeds.
+
+  // Now we should actually be able to get a lock since all fds related to
+  // the first lock are closed.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0));
+
+  // Unlock.
+  ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestDupFdUnlockRelease) {
+  /* flock(2): Furthermore, the lock is released either by an explicit LOCK_UN
+   * operation on any of these duplicate descriptors, or when all such
+   * descriptors have been closed.
+   */
+  // This test will verify that an explict unlock on a dupped FD will release
+  // the underlying lock unlike the previous case where close on a dup was
+  // not enough to release the lock.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+              SyscallSucceedsWithValue(0));
+
+  const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+  // At this point we have ONE exclusive locked referenced by two different fds.
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Validate that we cannot get a lock on a new unrelated FD.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Explicitly unlock via the dupped descriptor.
+  ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+
+  // Validate that we can now get the lock since we explicitly unlocked.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0));
+
+  // Unlock
+  ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestDupFdFollowedByLock) {
+  // This test will verify that taking a lock on a file descriptor that has
+  // already been dupped means that the lock is shared between both. This is
+  // slightly different than than duping on an already locked FD.
+  FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+  // Take a lock.
+  ASSERT_THAT(flock(dup_fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds());
+
+  // Now dup_fd and test_file_ should both reference the same lock.
+  // We shouldn't be able to obtain a lock until both are closed.
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  // Closing the first fd
+  dup_fd.reset();  // Close the duped fd.
+
+  // Validate that we cannot get a lock yet because the dupped descriptor.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Closing the second fd.
+  CloseFile();  // CloseFile() will validate the syscall succeeds.
+
+  // Now we should be able to get the lock.
+  ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds());
+
+  // Unlock.
+  ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+// NOTE: These blocking tests are not perfect. Unfortunately it's very hard to
+// determine if a thread was actually blocked in the kernel so we're forced
+// to use timing.
+TEST_F(FlockTest, BlockingLockNoBlockingForSharedLocks_NoRandomSave) {
+  // This test will verify that although LOCK_NB isn't specified
+  // two different fds can obtain shared locks without blocking.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH), SyscallSucceeds());
+
+  // kHoldLockTime is the amount of time we will hold the lock before releasing.
+  constexpr absl::Duration kHoldLockTime = absl::Seconds(30);
+
+  const DisableSave ds;  // Timing-related.
+
+  // We do this in another thread so we can determine if it was actually
+  // blocked by timing the amount of time it took for the syscall to complete.
+  ScopedThread t([&] {
+    MonotonicTimer timer;
+    const FileDescriptor fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+    // Only a single shared lock is held, the lock will be granted immediately.
+    // This should be granted without any blocking. Don't save here to avoid
+    // wild discrepencies on timing.
+    timer.Start();
+    ASSERT_THAT(flock(fd.get(), LOCK_SH), SyscallSucceeds());
+
+    // We held the lock for 30 seconds but this thread should not have
+    // blocked at all so we expect a very small duration on syscall completion.
+    ASSERT_LT(timer.Duration(),
+              absl::Seconds(1));  // 1000ms is much less than 30s.
+
+    // We can release our second shared lock
+    ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds());
+  });
+
+  // Sleep before unlocking.
+  absl::SleepFor(kHoldLockTime);
+
+  // Release the first shared lock. Don't save in this situation to avoid
+  // discrepencies in timing.
+  EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds());
+}
+
+TEST_F(FlockTest, BlockingLockFirstSharedSecondExclusive_NoRandomSave) {
+  // This test will verify that if someone holds a shared lock any attempt to
+  // obtain an exclusive lock will result in blocking.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH), SyscallSucceeds());
+
+  // kHoldLockTime is the amount of time we will hold the lock before releasing.
+  constexpr absl::Duration kHoldLockTime = absl::Seconds(2);
+
+  const DisableSave ds;  // Timing-related.
+
+  // We do this in another thread so we can determine if it was actually
+  // blocked by timing the amount of time it took for the syscall to complete.
+  ScopedThread t([&] {
+    MonotonicTimer timer;
+    const FileDescriptor fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+    // This exclusive lock should block because someone is already holding a
+    // shared lock. We don't save here to avoid wild discrepencies on timing.
+    timer.Start();
+    ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_EX), SyscallSucceeds());
+
+    // We should be blocked, we will expect to be blocked for more than 1.0s.
+    ASSERT_GT(timer.Duration(), absl::Seconds(1));
+
+    // We can release our exclusive lock.
+    ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds());
+  });
+
+  // Sleep before unlocking.
+  absl::SleepFor(kHoldLockTime);
+
+  // Release the shared lock allowing the thread to proceed.
+  // We don't save here to avoid wild discrepencies in timing.
+  EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds());
+}
+
+TEST_F(FlockTest, BlockingLockFirstExclusiveSecondShared_NoRandomSave) {
+  // This test will verify that if someone holds an exclusive lock any attempt
+  // to obtain a shared lock will result in blocking.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX), SyscallSucceeds());
+
+  // kHoldLockTime is the amount of time we will hold the lock before releasing.
+  constexpr absl::Duration kHoldLockTime = absl::Seconds(2);
+
+  const DisableSave ds;  // Timing-related.
+
+  // We do this in another thread so we can determine if it was actually
+  // blocked by timing the amount of time it took for the syscall to complete.
+  ScopedThread t([&] {
+    MonotonicTimer timer;
+    const FileDescriptor fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+    // This shared lock should block because someone is already holding an
+    // exclusive lock. We don't save here to avoid wild discrepencies on timing.
+    timer.Start();
+    ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_SH), SyscallSucceeds());
+
+    // We should be blocked, we will expect to be blocked for more than 1.0s.
+    ASSERT_GT(timer.Duration(), absl::Seconds(1));
+
+    // We can release our shared lock.
+    ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds());
+  });
+
+  // Sleep before unlocking.
+  absl::SleepFor(kHoldLockTime);
+
+  // Release the exclusive lock allowing the blocked thread to proceed.
+  // We don't save here to avoid wild discrepencies in timing.
+  EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds());
+}
+
+TEST_F(FlockTest, BlockingLockFirstExclusiveSecondExclusive_NoRandomSave) {
+  // This test will verify that if someone holds an exclusive lock any attempt
+  // to obtain another exclusive lock will result in blocking.
+  ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX), SyscallSucceeds());
+
+  // kHoldLockTime is the amount of time we will hold the lock before releasing.
+  constexpr absl::Duration kHoldLockTime = absl::Seconds(2);
+
+  const DisableSave ds;  // Timing-related.
+
+  // We do this in another thread so we can determine if it was actually
+  // blocked by timing the amount of time it took for the syscall to complete.
+  ScopedThread t([&] {
+    MonotonicTimer timer;
+    const FileDescriptor fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+    // This exclusive lock should block because someone is already holding an
+    // exclusive lock.
+    timer.Start();
+    ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_EX), SyscallSucceeds());
+
+    // We should be blocked, we will expect to be blocked for more than 1.0s.
+    ASSERT_GT(timer.Duration(), absl::Seconds(1));
+
+    // We can release our exclusive lock.
+    ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds());
+  });
+
+  // Sleep before unlocking.
+  absl::SleepFor(kHoldLockTime);
+
+  // Release the exclusive lock allowing the blocked thread to proceed.
+  // We don't save to avoid wild discrepencies in timing.
+  EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds());
+}
+
+TEST(FlockTestNoFixture, BadFD) {
+  // EBADF: fd is not an open file descriptor.
+  ASSERT_THAT(flock(-1, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(FlockTestNoFixture, FlockDir) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0000));
+  EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds());
+}
+
+TEST(FlockTestNoFixture, FlockSymlink) {
+  // TODO(gvisor.dev/issue/2782): Replace with IsRunningWithVFS1() when O_PATH
+  // is supported.
+  SKIP_IF(IsRunningOnGvisor());
+
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto symlink = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path()));
+
+  auto fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(symlink.path(), O_RDONLY | O_PATH, 0000));
+  EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(FlockTestNoFixture, FlockProc) {
+  auto fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/status", O_RDONLY, 0000));
+  EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds());
+}
+
+TEST(FlockTestNoFixture, FlockPipe) {
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+  EXPECT_THAT(flock(fds[0], LOCK_EX | LOCK_NB), SyscallSucceeds());
+  // Check that the pipe was locked above.
+  EXPECT_THAT(flock(fds[1], LOCK_EX | LOCK_NB), SyscallFailsWithErrno(EAGAIN));
+
+  EXPECT_THAT(flock(fds[0], LOCK_UN), SyscallSucceeds());
+  EXPECT_THAT(flock(fds[1], LOCK_EX | LOCK_NB), SyscallSucceeds());
+
+  EXPECT_THAT(close(fds[0]), SyscallSucceeds());
+  EXPECT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST(FlockTestNoFixture, FlockSocket) {
+  int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+  ASSERT_THAT(sock, SyscallSucceeds());
+
+  struct sockaddr_un addr =
+      ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(true /* abstract */, AF_UNIX));
+  ASSERT_THAT(
+      bind(sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+      SyscallSucceeds());
+
+  EXPECT_THAT(flock(sock, LOCK_EX | LOCK_NB), SyscallSucceeds());
+  EXPECT_THAT(close(sock), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc
new file mode 100644
index 000000000..853f6231a
--- /dev/null
+++ b/test/syscalls/linux/fork.cc
@@ -0,0 +1,464 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <atomic>
+#include <cstdlib>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/capability_util.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::Ge;
+
+class ForkTest : public ::testing::Test {
+ protected:
+  // SetUp creates a populated, open file.
+  void SetUp() override {
+    // Make a shared mapping.
+    shared_ = reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE,
+                                           MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+    ASSERT_NE(reinterpret_cast<void*>(shared_), MAP_FAILED);
+
+    // Make a private mapping.
+    private_ =
+        reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE,
+                                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+    ASSERT_NE(reinterpret_cast<void*>(private_), MAP_FAILED);
+
+    // Make a pipe.
+    ASSERT_THAT(pipe(pipes_), SyscallSucceeds());
+  }
+
+  // TearDown frees associated resources.
+  void TearDown() override {
+    EXPECT_THAT(munmap(shared_, kPageSize), SyscallSucceeds());
+    EXPECT_THAT(munmap(private_, kPageSize), SyscallSucceeds());
+    EXPECT_THAT(close(pipes_[0]), SyscallSucceeds());
+    EXPECT_THAT(close(pipes_[1]), SyscallSucceeds());
+  }
+
+  // Fork executes a clone system call.
+  pid_t Fork() {
+    pid_t pid = fork();
+    MaybeSave();
+    TEST_PCHECK_MSG(pid >= 0, "fork failed");
+    return pid;
+  }
+
+  // Wait waits for the given pid and returns the exit status. If the child was
+  // killed by a signal or an error occurs, then 256+signal is returned.
+  int Wait(pid_t pid) {
+    int status;
+    while (true) {
+      int rval = wait4(pid, &status, 0, NULL);
+      if (rval < 0) {
+        return rval;
+      }
+      if (rval != pid) {
+        continue;
+      }
+      if (WIFEXITED(status)) {
+        return WEXITSTATUS(status);
+      }
+      if (WIFSIGNALED(status)) {
+        return 256 + WTERMSIG(status);
+      }
+    }
+  }
+
+  // Exit exits the proccess.
+  void Exit(int code) {
+    _exit(code);
+
+    // Should never reach here. Since the exit above failed, we really don't
+    // have much in the way of options to indicate failure. So we just try to
+    // log an assertion failure to the logs. The parent process will likely
+    // fail anyways if exit is not working.
+    TEST_CHECK_MSG(false, "_exit returned");
+  }
+
+  // ReadByte reads a byte from the shared pipe.
+  char ReadByte() {
+    char val = -1;
+    TEST_PCHECK(ReadFd(pipes_[0], &val, 1) == 1);
+    MaybeSave();
+    return val;
+  }
+
+  // WriteByte writes a byte from the shared pipe.
+  void WriteByte(char val) {
+    TEST_PCHECK(WriteFd(pipes_[1], &val, 1) == 1);
+    MaybeSave();
+  }
+
+  // Shared pipe.
+  int pipes_[2];
+
+  // Shared mapping (one page).
+  char* shared_;
+
+  // Private mapping (one page).
+  char* private_;
+};
+
+TEST_F(ForkTest, Simple) {
+  pid_t child = Fork();
+  if (child == 0) {
+    Exit(0);
+  }
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ForkTest, ExitCode) {
+  pid_t child = Fork();
+  if (child == 0) {
+    Exit(123);
+  }
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(123));
+  child = Fork();
+  if (child == 0) {
+    Exit(1);
+  }
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(1));
+}
+
+TEST_F(ForkTest, Multi) {
+  pid_t child1 = Fork();
+  if (child1 == 0) {
+    Exit(0);
+  }
+  pid_t child2 = Fork();
+  if (child2 == 0) {
+    Exit(1);
+  }
+  EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(Wait(child2), SyscallSucceedsWithValue(1));
+}
+
+TEST_F(ForkTest, Pipe) {
+  pid_t child = Fork();
+  if (child == 0) {
+    WriteByte(1);
+    Exit(0);
+  }
+  EXPECT_EQ(ReadByte(), 1);
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ForkTest, SharedMapping) {
+  pid_t child = Fork();
+  if (child == 0) {
+    // Wait for the parent.
+    ReadByte();
+    if (shared_[0] == 1) {
+      Exit(0);
+    }
+    // Failed.
+    Exit(1);
+  }
+  // Change the mapping.
+  ASSERT_EQ(shared_[0], 0);
+  shared_[0] = 1;
+  // Unblock the child.
+  WriteByte(0);
+  // Did it work?
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ForkTest, PrivateMapping) {
+  pid_t child = Fork();
+  if (child == 0) {
+    // Wait for the parent.
+    ReadByte();
+    if (private_[0] == 0) {
+      Exit(0);
+    }
+    // Failed.
+    Exit(1);
+  }
+  // Change the mapping.
+  ASSERT_EQ(private_[0], 0);
+  private_[0] = 1;
+  // Unblock the child.
+  WriteByte(0);
+  // Did it work?
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+// CPUID is x86 specific.
+#ifdef __x86_64__
+// Test that cpuid works after a fork.
+TEST_F(ForkTest, Cpuid) {
+  pid_t child = Fork();
+
+  // We should be able to determine the CPU vendor.
+  ASSERT_NE(GetCPUVendor(), CPUVendor::kUnknownVendor);
+
+  if (child == 0) {
+    Exit(0);
+  }
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+#endif
+
+TEST_F(ForkTest, Mmap) {
+  pid_t child = Fork();
+
+  if (child == 0) {
+    void* addr =
+        mmap(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    MaybeSave();
+    Exit(addr == MAP_FAILED);
+  }
+
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+static volatile int alarmed = 0;
+
+void AlarmHandler(int sig, siginfo_t* info, void* context) { alarmed = 1; }
+
+TEST_F(ForkTest, Alarm) {
+  // Setup an alarm handler.
+  struct sigaction sa;
+  sa.sa_sigaction = AlarmHandler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  EXPECT_THAT(sigaction(SIGALRM, &sa, nullptr), SyscallSucceeds());
+
+  pid_t child = Fork();
+
+  if (child == 0) {
+    alarm(1);
+    sleep(3);
+    if (!alarmed) {
+      Exit(1);
+    }
+    Exit(0);
+  }
+
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(0, alarmed);
+}
+
+// Child cannot affect parent private memory. Regression test for b/24137240.
+TEST_F(ForkTest, PrivateMemory) {
+  std::atomic<uint32_t> local(0);
+
+  pid_t child1 = Fork();
+  if (child1 == 0) {
+    local++;
+
+    pid_t child2 = Fork();
+    if (child2 == 0) {
+      local++;
+
+      TEST_CHECK(local.load() == 2);
+
+      Exit(0);
+    }
+
+    TEST_PCHECK(Wait(child2) == 0);
+    TEST_CHECK(local.load() == 1);
+    Exit(0);
+  }
+
+  EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(0, local.load());
+}
+
+// Kernel-accessed buffers should remain coherent across COW.
+//
+// The buffer must be >= usermem.ZeroCopyMinBytes, as UnsafeAccess operates
+// differently. Regression test for b/33811887.
+TEST_F(ForkTest, COWSegment) {
+  constexpr int kBufSize = 1024;
+  char* read_buf = private_;
+  char* touch = private_ + kPageSize / 2;
+
+  std::string contents(kBufSize, 'a');
+
+  ScopedThread t([&] {
+    // Wait to be sure the parent is blocked in read.
+    absl::SleepFor(absl::Seconds(3));
+
+    // Fork to mark private pages for COW.
+    //
+    // Use fork directly rather than the Fork wrapper to skip the multi-threaded
+    // check, and limit the child to async-signal-safe functions:
+    //
+    // "After a fork() in a multithreaded program, the child can safely call
+    // only async-signal-safe functions (see signal(7)) until such time as it
+    // calls execve(2)."
+    //
+    // Skip ASSERT in the child, as it isn't async-signal-safe.
+    pid_t child = fork();
+    if (child == 0) {
+      // Wait to be sure parent touched memory.
+      sleep(3);
+      Exit(0);
+    }
+
+    // Check success only in the parent.
+    ASSERT_THAT(child, SyscallSucceedsWithValue(Ge(0)));
+
+    // Trigger COW on private page.
+    *touch = 42;
+
+    // Write to pipe. Parent should still be able to read this.
+    EXPECT_THAT(WriteFd(pipes_[1], contents.c_str(), kBufSize),
+                SyscallSucceedsWithValue(kBufSize));
+
+    EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+  });
+
+  EXPECT_THAT(ReadFd(pipes_[0], read_buf, kBufSize),
+              SyscallSucceedsWithValue(kBufSize));
+  EXPECT_STREQ(contents.c_str(), read_buf);
+}
+
+TEST_F(ForkTest, SigAltStack) {
+  std::vector<char> stack_mem(SIGSTKSZ);
+  stack_t stack = {};
+  stack.ss_size = SIGSTKSZ;
+  stack.ss_sp = stack_mem.data();
+  ASSERT_THAT(sigaltstack(&stack, nullptr), SyscallSucceeds());
+
+  pid_t child = Fork();
+
+  if (child == 0) {
+    stack_t oss = {};
+    TEST_PCHECK(sigaltstack(nullptr, &oss) == 0);
+    MaybeSave();
+
+    TEST_CHECK((oss.ss_flags & SS_DISABLE) == 0);
+    TEST_CHECK(oss.ss_size == SIGSTKSZ);
+    TEST_CHECK(oss.ss_sp == stack.ss_sp);
+
+    Exit(0);
+  }
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ForkTest, Affinity) {
+  // Make a non-default cpumask.
+  cpu_set_t parent_mask;
+  EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask),
+              SyscallSucceeds());
+  // Knock out the lowest bit.
+  for (unsigned int n = 0; n < CPU_SETSIZE; n++) {
+    if (CPU_ISSET(n, &parent_mask)) {
+      CPU_CLR(n, &parent_mask);
+      break;
+    }
+  }
+  EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask),
+              SyscallSucceeds());
+
+  pid_t child = Fork();
+  if (child == 0) {
+    cpu_set_t child_mask;
+
+    int ret = sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask);
+    MaybeSave();
+    if (ret < 0) {
+      Exit(-ret);
+    }
+
+    TEST_CHECK(CPU_EQUAL(&child_mask, &parent_mask));
+
+    Exit(0);
+  }
+
+  EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST(CloneTest, NewUserNamespacePermitsAllOtherNamespaces) {
+  // "If CLONE_NEWUSER is specified along with other CLONE_NEW* flags in a
+  // single clone(2) or unshare(2) call, the user namespace is guaranteed to be
+  // created first, giving the child (clone(2)) or caller (unshare(2))
+  // privileges over the remaining namespaces created by the call. Thus, it is
+  // possible for an unprivileged caller to specify this combination of flags."
+  // - user_namespaces(7)
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+  Mapping child_stack = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  int child_pid;
+  // We only test with CLONE_NEWIPC, CLONE_NEWNET, and CLONE_NEWUTS since these
+  // namespaces were implemented in Linux before user namespaces.
+  ASSERT_THAT(
+      child_pid = clone(
+          +[](void*) { return 0; },
+          reinterpret_cast<void*>(child_stack.addr() + kPageSize),
+          CLONE_NEWUSER | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUTS | SIGCHLD,
+          /* arg = */ nullptr),
+      SyscallSucceeds());
+
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status = " << status;
+}
+
+// Clone with CLONE_SETTLS and a non-canonical TLS address is rejected.
+TEST(CloneTest, NonCanonicalTLS) {
+  constexpr uintptr_t kNonCanonical = 1ull << 48;
+
+  // We need a valid address for the stack pointer. We'll never actually execute
+  // on this.
+  char stack;
+
+  // The raw system call interface on x86-64 is:
+  // long clone(unsigned long flags, void *stack,
+  //            int *parent_tid, int *child_tid,
+  //            unsigned long tls);
+  //
+  // While on arm64, the order of the last two arguments is reversed:
+  // long clone(unsigned long flags, void *stack,
+  //            int *parent_tid, unsigned long tls,
+  //            int *child_tid);
+#if defined(__x86_64__)
+  EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr,
+                      nullptr, kNonCanonical),
+              SyscallFailsWithErrno(EPERM));
+#elif defined(__aarch64__)
+  EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr,
+                      kNonCanonical, nullptr),
+              SyscallFailsWithErrno(EPERM));
+#endif
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/fpsig_fork.cc b/test/syscalls/linux/fpsig_fork.cc
new file mode 100644
index 000000000..c47567b4e
--- /dev/null
+++ b/test/syscalls/linux/fpsig_fork.cc
@@ -0,0 +1,131 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This test verifies that fork(2) in a signal handler will correctly
+// restore floating point state after the signal handler returns in both
+// the child and parent.
+#include <sys/time.h>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#ifdef __x86_64__
+#define GET_XMM(__var, __xmm) \
+  asm volatile("movq %%" #__xmm ", %0" : "=r"(__var))
+#define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var))
+#define GET_FP0(__var) GET_XMM(__var, xmm0)
+#define SET_FP0(__var) SET_XMM(__var, xmm0)
+#elif __aarch64__
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+#define GET_FPREG(var, regname) \
+  asm volatile("str " __stringify(regname) ", %0" : "=m"(var))
+#define SET_FPREG(var, regname) \
+  asm volatile("ldr " __stringify(regname) ", %0" : "=m"(var))
+#define GET_FP0(var) GET_FPREG(var, d0)
+#define SET_FP0(var) SET_FPREG(var, d0)
+#endif
+
+int parent, child;
+
+void sigusr1(int s, siginfo_t* siginfo, void* _uc) {
+  // Fork and clobber %xmm0. The fpstate should be restored by sigreturn(2)
+  // in both parent and child.
+  child = fork();
+  TEST_CHECK_MSG(child >= 0, "fork failed");
+
+  uint64_t val = SIGUSR1;
+  SET_FP0(val);
+  uint64_t got;
+  GET_FP0(got);
+  TEST_CHECK_MSG(val == got, "Basic FP check failed in sigusr1()");
+}
+
+TEST(FPSigTest, Fork) {
+  parent = getpid();
+  pid_t parent_tid = gettid();
+
+  struct sigaction sa = {};
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  sa.sa_sigaction = sigusr1;
+  ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+  // The amd64 ABI specifies that the XMM register set is caller-saved. This
+  // implies that if there is any function call between SET_XMM and GET_XMM the
+  // compiler might save/restore xmm0 implicitly. This defeats the entire
+  // purpose of the test which is to verify that fpstate is restored by
+  // sigreturn(2).
+  //
+  // This is the reason why 'tgkill(getpid(), gettid(), SIGUSR1)' is implemented
+  // in inline assembly below.
+  //
+  // If the OS is broken and registers are clobbered by the child, using tgkill
+  // to signal the current thread increases the likelihood that this thread will
+  // be the one clobbered.
+
+  uint64_t expected = 0xdeadbeeffacefeed;
+  SET_FP0(expected);
+
+#ifdef __x86_64__
+  asm volatile(
+      "movl %[killnr], %%eax;"
+      "movl %[parent], %%edi;"
+      "movl %[tid], %%esi;"
+      "movl %[sig], %%edx;"
+      "syscall;"
+      :
+      : [ killnr ] "i"(__NR_tgkill), [ parent ] "rm"(parent),
+        [ tid ] "rm"(parent_tid), [ sig ] "i"(SIGUSR1)
+      : "rax", "rdi", "rsi", "rdx",
+        // Clobbered by syscall.
+        "rcx", "r11");
+#elif __aarch64__
+  asm volatile(
+      "mov x8, %0\n"
+      "mov x0, %1\n"
+      "mov x1, %2\n"
+      "mov x2, %3\n"
+      "svc #0\n" ::"r"(__NR_tgkill),
+      "r"(parent), "r"(parent_tid), "r"(SIGUSR1));
+#endif
+
+  uint64_t got;
+  GET_FP0(got);
+
+  if (getpid() == parent) {  // Parent.
+    int status;
+    ASSERT_THAT(waitpid(child, &status, 0), SyscallSucceedsWithValue(child));
+    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+  }
+
+  // TEST_CHECK_MSG since this may run in the child.
+  TEST_CHECK_MSG(expected == got, "Bad xmm0 value");
+
+  if (getpid() != parent) {  // Child.
+    _exit(0);
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/fpsig_nested.cc b/test/syscalls/linux/fpsig_nested.cc
new file mode 100644
index 000000000..302d928d1
--- /dev/null
+++ b/test/syscalls/linux/fpsig_nested.cc
@@ -0,0 +1,167 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This program verifies that application floating point state is restored
+// correctly after a signal handler returns. It also verifies that this works
+// with nested signals.
+#include <sys/time.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#ifdef __x86_64__
+#define GET_XMM(__var, __xmm) \
+  asm volatile("movq %%" #__xmm ", %0" : "=r"(__var))
+#define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var))
+#define GET_FP0(__var) GET_XMM(__var, xmm0)
+#define SET_FP0(__var) SET_XMM(__var, xmm0)
+#elif __aarch64__
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+#define GET_FPREG(var, regname) \
+  asm volatile("str " __stringify(regname) ", %0" : "=m"(var))
+#define SET_FPREG(var, regname) \
+  asm volatile("ldr " __stringify(regname) ", %0" : "=m"(var))
+#define GET_FP0(var) GET_FPREG(var, d0)
+#define SET_FP0(var) SET_FPREG(var, d0)
+#endif
+
+int pid;
+int tid;
+
+volatile uint64_t entryxmm[2] = {~0UL, ~0UL};
+volatile uint64_t exitxmm[2];
+
+void sigusr2(int s, siginfo_t* siginfo, void* _uc) {
+  uint64_t val = SIGUSR2;
+
+  // Record the value of %xmm0 on entry and then clobber it.
+  GET_FP0(entryxmm[1]);
+  SET_FP0(val);
+  GET_FP0(exitxmm[1]);
+}
+
+void sigusr1(int s, siginfo_t* siginfo, void* _uc) {
+  uint64_t val = SIGUSR1;
+
+  // Record the value of %xmm0 on entry and then clobber it.
+  GET_FP0(entryxmm[0]);
+  SET_FP0(val);
+
+  // Send a SIGUSR2 to ourself. The signal mask is configured such that
+  // the SIGUSR2 handler will run before this handler returns.
+#ifdef __x86_64__
+  asm volatile(
+      "movl %[killnr], %%eax;"
+      "movl %[pid], %%edi;"
+      "movl %[tid], %%esi;"
+      "movl %[sig], %%edx;"
+      "syscall;"
+      :
+      : [ killnr ] "i"(__NR_tgkill), [ pid ] "rm"(pid), [ tid ] "rm"(tid),
+        [ sig ] "i"(SIGUSR2)
+      : "rax", "rdi", "rsi", "rdx",
+        // Clobbered by syscall.
+        "rcx", "r11");
+#elif __aarch64__
+  asm volatile(
+      "mov x8, %0\n"
+      "mov x0, %1\n"
+      "mov x1, %2\n"
+      "mov x2, %3\n"
+      "svc #0\n" ::"r"(__NR_tgkill),
+      "r"(pid), "r"(tid), "r"(SIGUSR2));
+#endif
+
+  // Record value of %xmm0 again to verify that the nested signal handler
+  // does not clobber it.
+  GET_FP0(exitxmm[0]);
+}
+
+TEST(FPSigTest, NestedSignals) {
+  pid = getpid();
+  tid = gettid();
+
+  struct sigaction sa = {};
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  sa.sa_sigaction = sigusr1;
+  ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+  sa.sa_sigaction = sigusr2;
+  ASSERT_THAT(sigaction(SIGUSR2, &sa, nullptr), SyscallSucceeds());
+
+  // The amd64 ABI specifies that the XMM register set is caller-saved. This
+  // implies that if there is any function call between SET_XMM and GET_XMM the
+  // compiler might save/restore xmm0 implicitly. This defeats the entire
+  // purpose of the test which is to verify that fpstate is restored by
+  // sigreturn(2).
+  //
+  // This is the reason why 'tgkill(getpid(), gettid(), SIGUSR1)' is implemented
+  // in inline assembly below.
+  //
+  // If the OS is broken and registers are clobbered by the signal, using tgkill
+  // to signal the current thread ensures that this is the clobbered thread.
+
+  uint64_t expected = 0xdeadbeeffacefeed;
+  SET_FP0(expected);
+
+#ifdef __x86_64__
+  asm volatile(
+      "movl %[killnr], %%eax;"
+      "movl %[pid], %%edi;"
+      "movl %[tid], %%esi;"
+      "movl %[sig], %%edx;"
+      "syscall;"
+      :
+      : [ killnr ] "i"(__NR_tgkill), [ pid ] "rm"(pid), [ tid ] "rm"(tid),
+        [ sig ] "i"(SIGUSR1)
+      : "rax", "rdi", "rsi", "rdx",
+        // Clobbered by syscall.
+        "rcx", "r11");
+#elif __aarch64__
+  asm volatile(
+      "mov x8, %0\n"
+      "mov x0, %1\n"
+      "mov x1, %2\n"
+      "mov x2, %3\n"
+      "svc #0\n" ::"r"(__NR_tgkill),
+      "r"(pid), "r"(tid), "r"(SIGUSR1));
+#endif
+
+  uint64_t got;
+  GET_FP0(got);
+
+  //
+  // The checks below verifies the following:
+  // - signal handlers must called with a clean fpu state.
+  // - sigreturn(2) must restore fpstate of the interrupted context.
+  //
+  EXPECT_EQ(expected, got);
+  EXPECT_EQ(entryxmm[0], 0);
+  EXPECT_EQ(entryxmm[1], 0);
+  EXPECT_EQ(exitxmm[0], SIGUSR1);
+  EXPECT_EQ(exitxmm[1], SIGUSR2);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/fsync.cc b/test/syscalls/linux/fsync.cc
new file mode 100644
index 000000000..e7e057f06
--- /dev/null
+++ b/test/syscalls/linux/fsync.cc
@@ -0,0 +1,58 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(FsyncTest, TempFileSucceeds) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+  const std::string data = "some data to sync";
+  EXPECT_THAT(write(fd.get(), data.c_str(), data.size()),
+              SyscallSucceedsWithValue(data.size()));
+  EXPECT_THAT(fsync(fd.get()), SyscallSucceeds());
+}
+
+TEST(FsyncTest, TempDirSucceeds) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY));
+  EXPECT_THAT(fsync(fd.get()), SyscallSucceeds());
+}
+
+TEST(FsyncTest, CannotFsyncOnUnopenedFd) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  int fd;
+  ASSERT_THAT(fd = open(file.path().c_str(), O_RDONLY), SyscallSucceeds());
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+
+  // fd is now invalid.
+  EXPECT_THAT(fsync(fd), SyscallFailsWithErrno(EBADF));
+}
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/futex.cc b/test/syscalls/linux/futex.cc
new file mode 100644
index 000000000..40c80a6e1
--- /dev/null
+++ b/test/syscalls/linux/futex.cc
@@ -0,0 +1,742 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <linux/futex.h>
+#include <linux/types.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <atomic>
+#include <memory>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/memory_util.h"
+#include "test/util/save_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/time_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Amount of time we wait for threads doing futex_wait to start running before
+// doing futex_wake.
+constexpr auto kWaiterStartupDelay = absl::Seconds(3);
+
+// Default timeout for waiters in tests where we expect a futex_wake to be
+// ineffective.
+constexpr auto kIneffectiveWakeTimeout = absl::Seconds(6);
+
+static_assert(kWaiterStartupDelay < kIneffectiveWakeTimeout,
+              "futex_wait will time out before futex_wake is called");
+
+int futex_wait(bool priv, std::atomic<int>* uaddr, int val,
+               absl::Duration timeout = absl::InfiniteDuration()) {
+  int op = FUTEX_WAIT;
+  if (priv) {
+    op |= FUTEX_PRIVATE_FLAG;
+  }
+
+  if (timeout == absl::InfiniteDuration()) {
+    return RetryEINTR(syscall)(SYS_futex, uaddr, op, val, nullptr);
+  }
+
+  // FUTEX_WAIT doesn't adjust the timeout if it returns EINTR, so we have to do
+  // so.
+  while (true) {
+    auto const timeout_ts = absl::ToTimespec(timeout);
+    MonotonicTimer timer;
+    timer.Start();
+    int const ret = syscall(SYS_futex, uaddr, op, val, &timeout_ts);
+    if (ret != -1 || errno != EINTR) {
+      return ret;
+    }
+    timeout = std::max(timeout - timer.Duration(), absl::ZeroDuration());
+  }
+}
+
+int futex_wait_bitset(bool priv, std::atomic<int>* uaddr, int val, int bitset,
+                      absl::Time deadline = absl::InfiniteFuture()) {
+  int op = FUTEX_WAIT_BITSET | FUTEX_CLOCK_REALTIME;
+  if (priv) {
+    op |= FUTEX_PRIVATE_FLAG;
+  }
+
+  auto const deadline_ts = absl::ToTimespec(deadline);
+  return RetryEINTR(syscall)(
+      SYS_futex, uaddr, op, val,
+      deadline == absl::InfiniteFuture() ? nullptr : &deadline_ts, nullptr,
+      bitset);
+}
+
+int futex_wake(bool priv, std::atomic<int>* uaddr, int count) {
+  int op = FUTEX_WAKE;
+  if (priv) {
+    op |= FUTEX_PRIVATE_FLAG;
+  }
+  return syscall(SYS_futex, uaddr, op, count);
+}
+
+int futex_wake_bitset(bool priv, std::atomic<int>* uaddr, int count,
+                      int bitset) {
+  int op = FUTEX_WAKE_BITSET;
+  if (priv) {
+    op |= FUTEX_PRIVATE_FLAG;
+  }
+  return syscall(SYS_futex, uaddr, op, count, nullptr, nullptr, bitset);
+}
+
+int futex_wake_op(bool priv, std::atomic<int>* uaddr1, std::atomic<int>* uaddr2,
+                  int nwake1, int nwake2, uint32_t sub_op) {
+  int op = FUTEX_WAKE_OP;
+  if (priv) {
+    op |= FUTEX_PRIVATE_FLAG;
+  }
+  return syscall(SYS_futex, uaddr1, op, nwake1, nwake2, uaddr2, sub_op);
+}
+
+int futex_lock_pi(bool priv, std::atomic<int>* uaddr) {
+  int op = FUTEX_LOCK_PI;
+  if (priv) {
+    op |= FUTEX_PRIVATE_FLAG;
+  }
+  int zero = 0;
+  if (uaddr->compare_exchange_strong(zero, gettid())) {
+    return 0;
+  }
+  return RetryEINTR(syscall)(SYS_futex, uaddr, op, nullptr, nullptr);
+}
+
+int futex_trylock_pi(bool priv, std::atomic<int>* uaddr) {
+  int op = FUTEX_TRYLOCK_PI;
+  if (priv) {
+    op |= FUTEX_PRIVATE_FLAG;
+  }
+  int zero = 0;
+  if (uaddr->compare_exchange_strong(zero, gettid())) {
+    return 0;
+  }
+  return RetryEINTR(syscall)(SYS_futex, uaddr, op, nullptr, nullptr);
+}
+
+int futex_unlock_pi(bool priv, std::atomic<int>* uaddr) {
+  int op = FUTEX_UNLOCK_PI;
+  if (priv) {
+    op |= FUTEX_PRIVATE_FLAG;
+  }
+  int tid = gettid();
+  if (uaddr->compare_exchange_strong(tid, 0)) {
+    return 0;
+  }
+  return RetryEINTR(syscall)(SYS_futex, uaddr, op, nullptr, nullptr);
+}
+
+// Fixture for futex tests parameterized by whether to use private or shared
+// futexes.
+class PrivateAndSharedFutexTest : public ::testing::TestWithParam<bool> {
+ protected:
+  bool IsPrivate() const { return GetParam(); }
+  int PrivateFlag() const { return IsPrivate() ? FUTEX_PRIVATE_FLAG : 0; }
+};
+
+// FUTEX_WAIT with 0 timeout does not block.
+TEST_P(PrivateAndSharedFutexTest, Wait_ZeroTimeout) {
+  struct timespec timeout = {};
+
+  // Don't use the futex_wait helper because it adjusts timeout.
+  int a = 1;
+  EXPECT_THAT(syscall(SYS_futex, &a, FUTEX_WAIT | PrivateFlag(), a, &timeout),
+              SyscallFailsWithErrno(ETIMEDOUT));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_Timeout) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(1);
+
+  MonotonicTimer timer;
+  timer.Start();
+  constexpr absl::Duration kTimeout = absl::Seconds(1);
+  EXPECT_THAT(futex_wait(IsPrivate(), &a, a, kTimeout),
+              SyscallFailsWithErrno(ETIMEDOUT));
+  EXPECT_GE(timer.Duration(), kTimeout);
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_BitsetTimeout) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(1);
+
+  MonotonicTimer timer;
+  timer.Start();
+  constexpr absl::Duration kTimeout = absl::Seconds(1);
+  EXPECT_THAT(
+      futex_wait_bitset(IsPrivate(), &a, a, 0xffffffff, absl::Now() + kTimeout),
+      SyscallFailsWithErrno(ETIMEDOUT));
+  EXPECT_GE(timer.Duration(), kTimeout);
+}
+
+TEST_P(PrivateAndSharedFutexTest, WaitBitset_NegativeTimeout) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(1);
+
+  MonotonicTimer timer;
+  timer.Start();
+  EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, a, 0xffffffff,
+                                absl::Now() - absl::Seconds(1)),
+              SyscallFailsWithErrno(ETIMEDOUT));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_WrongVal) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(1);
+  EXPECT_THAT(futex_wait(IsPrivate(), &a, a + 1),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_ZeroBitset) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(1);
+  EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, a, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wake1_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+  // Prevent save/restore from interrupting futex_wait, which will cause it to
+  // return EAGAIN instead of the expected result if futex_wait is restarted
+  // after we change the value of a below.
+  DisableSave ds;
+  ScopedThread thread([&] {
+    EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue),
+                SyscallSucceedsWithValue(0));
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  // Change a so that if futex_wake happens before futex_wait, the latter
+  // returns EAGAIN instead of hanging the test.
+  a.fetch_add(1);
+  EXPECT_THAT(futex_wake(IsPrivate(), &a, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wake0_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+  // Prevent save/restore from interrupting futex_wait, which will cause it to
+  // return EAGAIN instead of the expected result if futex_wait is restarted
+  // after we change the value of a below.
+  DisableSave ds;
+  ScopedThread thread([&] {
+    EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue),
+                SyscallSucceedsWithValue(0));
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  // Change a so that if futex_wake happens before futex_wait, the latter
+  // returns EAGAIN instead of hanging the test.
+  a.fetch_add(1);
+  // The Linux kernel wakes one waiter even if val is 0 or negative.
+  EXPECT_THAT(futex_wake(IsPrivate(), &a, 0), SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeAll_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+  DisableSave ds;
+  constexpr int kThreads = 5;
+  std::vector<std::unique_ptr<ScopedThread>> threads;
+  threads.reserve(kThreads);
+  for (int i = 0; i < kThreads; i++) {
+    threads.push_back(absl::make_unique<ScopedThread>([&] {
+      EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue),
+                  SyscallSucceeds());
+    }));
+  }
+  absl::SleepFor(kWaiterStartupDelay);
+
+  a.fetch_add(1);
+  EXPECT_THAT(futex_wake(IsPrivate(), &a, kThreads),
+              SyscallSucceedsWithValue(kThreads));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeSome_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+  DisableSave ds;
+  constexpr int kThreads = 5;
+  constexpr int kWokenThreads = 3;
+  static_assert(kWokenThreads < kThreads,
+                "can't wake more threads than are created");
+  std::vector<std::unique_ptr<ScopedThread>> threads;
+  threads.reserve(kThreads);
+  std::vector<int> rets;
+  rets.reserve(kThreads);
+  std::vector<int> errs;
+  errs.reserve(kThreads);
+  for (int i = 0; i < kThreads; i++) {
+    rets.push_back(-1);
+    errs.push_back(0);
+  }
+  for (int i = 0; i < kThreads; i++) {
+    threads.push_back(absl::make_unique<ScopedThread>([&, i] {
+      rets[i] =
+          futex_wait(IsPrivate(), &a, kInitialValue, kIneffectiveWakeTimeout);
+      errs[i] = errno;
+    }));
+  }
+  absl::SleepFor(kWaiterStartupDelay);
+
+  a.fetch_add(1);
+  EXPECT_THAT(futex_wake(IsPrivate(), &a, kWokenThreads),
+              SyscallSucceedsWithValue(kWokenThreads));
+
+  int woken = 0;
+  int timedout = 0;
+  for (int i = 0; i < kThreads; i++) {
+    threads[i]->Join();
+    if (rets[i] == 0) {
+      woken++;
+    } else if (errs[i] == ETIMEDOUT) {
+      timedout++;
+    } else {
+      ADD_FAILURE() << " thread " << i << ": returned " << rets[i] << ", errno "
+                    << errs[i];
+    }
+  }
+  EXPECT_EQ(woken, kWokenThreads);
+  EXPECT_EQ(timedout, kThreads - kWokenThreads);
+}
+
+TEST_P(PrivateAndSharedFutexTest, WaitBitset_Wake_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+  DisableSave ds;
+  ScopedThread thread([&] {
+    EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, 0b01001000),
+                SyscallSucceeds());
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  a.fetch_add(1);
+  EXPECT_THAT(futex_wake(IsPrivate(), &a, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_WakeBitset_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+  DisableSave ds;
+  ScopedThread thread([&] {
+    EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds());
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  a.fetch_add(1);
+  EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, 0b01001000),
+              SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WaitBitset_WakeBitsetMatch_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+  constexpr int kBitset = 0b01001000;
+
+  DisableSave ds;
+  ScopedThread thread([&] {
+    EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, kBitset),
+                SyscallSucceeds());
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  a.fetch_add(1);
+  EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, kBitset),
+              SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WaitBitset_WakeBitsetNoMatch_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+  constexpr int kWaitBitset = 0b01000001;
+  constexpr int kWakeBitset = 0b00101000;
+  static_assert((kWaitBitset & kWakeBitset) == 0,
+                "futex_wake_bitset will wake waiter");
+
+  DisableSave ds;
+  ScopedThread thread([&] {
+    EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, kWaitBitset,
+                                  absl::Now() + kIneffectiveWakeTimeout),
+                SyscallFailsWithErrno(ETIMEDOUT));
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  a.fetch_add(1);
+  EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, kWakeBitset),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeOpCondSuccess_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+  std::atomic<int> b = ATOMIC_VAR_INIT(kInitialValue);
+
+  DisableSave ds;
+  ScopedThread thread_a([&] {
+    EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds());
+  });
+  ScopedThread thread_b([&] {
+    EXPECT_THAT(futex_wait(IsPrivate(), &b, kInitialValue), SyscallSucceeds());
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  a.fetch_add(1);
+  b.fetch_add(1);
+  // This futex_wake_op should:
+  // - Wake 1 waiter on a unconditionally.
+  // - Wake 1 waiter on b if b == kInitialValue + 1, which it is.
+  // - Do "b += 1".
+  EXPECT_THAT(futex_wake_op(IsPrivate(), &a, &b, 1, 1,
+                            FUTEX_OP(FUTEX_OP_ADD, 1, FUTEX_OP_CMP_EQ,
+                                     (kInitialValue + 1))),
+              SyscallSucceedsWithValue(2));
+  EXPECT_EQ(b, kInitialValue + 2);
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeOpCondFailure_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+  std::atomic<int> b = ATOMIC_VAR_INIT(kInitialValue);
+
+  DisableSave ds;
+  ScopedThread thread_a([&] {
+    EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds());
+  });
+  ScopedThread thread_b([&] {
+    EXPECT_THAT(
+        futex_wait(IsPrivate(), &b, kInitialValue, kIneffectiveWakeTimeout),
+        SyscallFailsWithErrno(ETIMEDOUT));
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  a.fetch_add(1);
+  b.fetch_add(1);
+  // This futex_wake_op should:
+  // - Wake 1 waiter on a unconditionally.
+  // - Wake 1 waiter on b if b == kInitialValue - 1, which it isn't.
+  // - Do "b += 1".
+  EXPECT_THAT(futex_wake_op(IsPrivate(), &a, &b, 1, 1,
+                            FUTEX_OP(FUTEX_OP_ADD, 1, FUTEX_OP_CMP_EQ,
+                                     (kInitialValue - 1))),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ(b, kInitialValue + 2);
+}
+
+TEST_P(PrivateAndSharedFutexTest, NoWakeInterprocessPrivateAnon_NoRandomSave) {
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr());
+  constexpr int kInitialValue = 1;
+  ptr->store(kInitialValue);
+
+  DisableSave ds;
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    TEST_PCHECK(futex_wait(IsPrivate(), ptr, kInitialValue,
+                           kIneffectiveWakeTimeout) == -1 &&
+                errno == ETIMEDOUT);
+    _exit(0);
+  }
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+  absl::SleepFor(kWaiterStartupDelay);
+
+  EXPECT_THAT(futex_wake(IsPrivate(), ptr, 1), SyscallSucceedsWithValue(0));
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeAfterCOWBreak_NoRandomSave) {
+  // Use a futex on a non-stack mapping so we can be sure that the child process
+  // below isn't the one that breaks copy-on-write.
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr());
+  constexpr int kInitialValue = 1;
+  ptr->store(kInitialValue);
+
+  DisableSave ds;
+  ScopedThread thread([&] {
+    EXPECT_THAT(futex_wait(IsPrivate(), ptr, kInitialValue), SyscallSucceeds());
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // Wait to be killed by the parent.
+    while (true) pause();
+  }
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+  auto cleanup_child = Cleanup([&] {
+    EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+    int status;
+    ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+                SyscallSucceedsWithValue(child_pid));
+    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+        << " status " << status;
+  });
+
+  // In addition to preventing a late futex_wait from sleeping, this breaks
+  // copy-on-write on the mapped page.
+  ptr->fetch_add(1);
+  EXPECT_THAT(futex_wake(IsPrivate(), ptr, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeWrongKind_NoRandomSave) {
+  constexpr int kInitialValue = 1;
+  std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+  DisableSave ds;
+  ScopedThread thread([&] {
+    EXPECT_THAT(
+        futex_wait(IsPrivate(), &a, kInitialValue, kIneffectiveWakeTimeout),
+        SyscallFailsWithErrno(ETIMEDOUT));
+  });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  a.fetch_add(1);
+  // The value of priv passed to futex_wake is the opposite of that passed to
+  // the futex_waiter; we expect this not to wake the waiter.
+  EXPECT_THAT(futex_wake(!IsPrivate(), &a, 1), SyscallSucceedsWithValue(0));
+}
+
+INSTANTIATE_TEST_SUITE_P(SharedPrivate, PrivateAndSharedFutexTest,
+                         ::testing::Bool());
+
+// Passing null as the address only works for private futexes.
+
+TEST(PrivateFutexTest, WakeOp0Set) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(1);
+
+  int futex_op = FUTEX_OP(FUTEX_OP_SET, 2, 0, 0);
+  EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(a, 2);
+}
+
+TEST(PrivateFutexTest, WakeOp0Add) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(1);
+  int futex_op = FUTEX_OP(FUTEX_OP_ADD, 1, 0, 0);
+  EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(a, 2);
+}
+
+TEST(PrivateFutexTest, WakeOp0Or) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(0b01);
+  int futex_op = FUTEX_OP(FUTEX_OP_OR, 0b10, 0, 0);
+  EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(a, 0b11);
+}
+
+TEST(PrivateFutexTest, WakeOp0Andn) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(0b11);
+  int futex_op = FUTEX_OP(FUTEX_OP_ANDN, 0b10, 0, 0);
+  EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(a, 0b01);
+}
+
+TEST(PrivateFutexTest, WakeOp0Xor) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(0b1010);
+  int futex_op = FUTEX_OP(FUTEX_OP_XOR, 0b1100, 0, 0);
+  EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(a, 0b0110);
+}
+
+TEST(SharedFutexTest, WakeInterprocessSharedAnon_NoRandomSave) {
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED));
+  auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr());
+  constexpr int kInitialValue = 1;
+  ptr->store(kInitialValue);
+
+  DisableSave ds;
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    TEST_PCHECK(futex_wait(false, ptr, kInitialValue) == 0);
+    _exit(0);
+  }
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+  auto kill_child = Cleanup(
+      [&] { EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  ptr->fetch_add(1);
+  // This is an ASSERT so that if it fails, we immediately abort the test (and
+  // kill the subprocess).
+  ASSERT_THAT(futex_wake(false, ptr, 1), SyscallSucceedsWithValue(1));
+
+  kill_child.Release();
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+TEST(SharedFutexTest, WakeInterprocessFile_NoRandomSave) {
+  auto const file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  ASSERT_THAT(truncate(file.path().c_str(), kPageSize), SyscallSucceeds());
+  auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0));
+  auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr());
+  constexpr int kInitialValue = 1;
+  ptr->store(kInitialValue);
+
+  DisableSave ds;
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    TEST_PCHECK(futex_wait(false, ptr, kInitialValue) == 0);
+    _exit(0);
+  }
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+  auto kill_child = Cleanup(
+      [&] { EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); });
+  absl::SleepFor(kWaiterStartupDelay);
+
+  ptr->fetch_add(1);
+  // This is an ASSERT so that if it fails, we immediately abort the test (and
+  // kill the subprocess).
+  ASSERT_THAT(futex_wake(false, ptr, 1), SyscallSucceedsWithValue(1));
+
+  kill_child.Release();
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+TEST_P(PrivateAndSharedFutexTest, PIBasic) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(0);
+
+  ASSERT_THAT(futex_lock_pi(IsPrivate(), &a), SyscallSucceeds());
+  EXPECT_EQ(a.load(), gettid());
+  EXPECT_THAT(futex_lock_pi(IsPrivate(), &a), SyscallFailsWithErrno(EDEADLK));
+
+  ASSERT_THAT(futex_unlock_pi(IsPrivate(), &a), SyscallSucceeds());
+  EXPECT_EQ(a.load(), 0);
+  EXPECT_THAT(futex_unlock_pi(IsPrivate(), &a), SyscallFailsWithErrno(EPERM));
+}
+
+TEST_P(PrivateAndSharedFutexTest, PIConcurrency_NoRandomSave) {
+  DisableSave ds;  // Too many syscalls.
+
+  std::atomic<int> a = ATOMIC_VAR_INIT(0);
+  const bool is_priv = IsPrivate();
+
+  std::unique_ptr<ScopedThread> threads[100];
+  for (size_t i = 0; i < ABSL_ARRAYSIZE(threads); ++i) {
+    threads[i] = absl::make_unique<ScopedThread>([is_priv, &a] {
+      for (size_t j = 0; j < 10; ++j) {
+        ASSERT_THAT(futex_lock_pi(is_priv, &a), SyscallSucceeds());
+        EXPECT_EQ(a.load() & FUTEX_TID_MASK, gettid());
+        SleepSafe(absl::Milliseconds(5));
+        ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds());
+      }
+    });
+  }
+}
+
+TEST_P(PrivateAndSharedFutexTest, PIWaiters) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(0);
+  const bool is_priv = IsPrivate();
+
+  ASSERT_THAT(futex_lock_pi(is_priv, &a), SyscallSucceeds());
+  EXPECT_EQ(a.load(), gettid());
+
+  ScopedThread th([is_priv, &a] {
+    ASSERT_THAT(futex_lock_pi(is_priv, &a), SyscallSucceeds());
+    ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds());
+  });
+
+  // Wait until the thread blocks on the futex, setting the waiters bit.
+  auto start = absl::Now();
+  while (a.load() != (FUTEX_WAITERS | gettid())) {
+    ASSERT_LT(absl::Now() - start, absl::Seconds(5));
+    absl::SleepFor(absl::Milliseconds(100));
+  }
+  ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds());
+}
+
+TEST_P(PrivateAndSharedFutexTest, PITryLock) {
+  std::atomic<int> a = ATOMIC_VAR_INIT(0);
+  const bool is_priv = IsPrivate();
+
+  ASSERT_THAT(futex_trylock_pi(IsPrivate(), &a), SyscallSucceeds());
+  EXPECT_EQ(a.load(), gettid());
+
+  EXPECT_THAT(futex_trylock_pi(is_priv, &a), SyscallFailsWithErrno(EDEADLK));
+  ScopedThread th([is_priv, &a] {
+    EXPECT_THAT(futex_trylock_pi(is_priv, &a), SyscallFailsWithErrno(EAGAIN));
+  });
+  th.Join();
+
+  ASSERT_THAT(futex_unlock_pi(IsPrivate(), &a), SyscallSucceeds());
+}
+
+TEST_P(PrivateAndSharedFutexTest, PITryLockConcurrency_NoRandomSave) {
+  DisableSave ds;  // Too many syscalls.
+
+  std::atomic<int> a = ATOMIC_VAR_INIT(0);
+  const bool is_priv = IsPrivate();
+
+  std::unique_ptr<ScopedThread> threads[10];
+  for (size_t i = 0; i < ABSL_ARRAYSIZE(threads); ++i) {
+    threads[i] = absl::make_unique<ScopedThread>([is_priv, &a] {
+      for (size_t j = 0; j < 10;) {
+        if (futex_trylock_pi(is_priv, &a) == 0) {
+          ++j;
+          EXPECT_EQ(a.load() & FUTEX_TID_MASK, gettid());
+          SleepSafe(absl::Milliseconds(5));
+          ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds());
+        }
+      }
+    });
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/getcpu.cc b/test/syscalls/linux/getcpu.cc
new file mode 100644
index 000000000..f4d94bd6a
--- /dev/null
+++ b/test/syscalls/linux/getcpu.cc
@@ -0,0 +1,40 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(GetcpuTest, IsValidCpuStress) {
+  const int num_cpus = NumCPUs();
+  absl::Time deadline = absl::Now() + absl::Seconds(10);
+  while (absl::Now() < deadline) {
+    int cpu;
+    ASSERT_THAT(cpu = sched_getcpu(), SyscallSucceeds());
+    ASSERT_LT(cpu, num_cpus);
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc
new file mode 100644
index 000000000..b147d6181
--- /dev/null
+++ b/test/syscalls/linux/getdents.cc
@@ -0,0 +1,539 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <map>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "test/util/eventfd_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::Contains;
+using ::testing::IsEmpty;
+using ::testing::IsSupersetOf;
+using ::testing::Not;
+using ::testing::NotNull;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// New Linux dirent format.
+struct linux_dirent64 {
+  uint64_t d_ino;           // Inode number
+  int64_t d_off;            // Offset to next linux_dirent64
+  unsigned short d_reclen;  // NOLINT, Length of this linux_dirent64
+  unsigned char d_type;     // NOLINT, File type
+  char d_name[0];           // Filename (null-terminated)
+};
+
+// Old Linux dirent format.
+struct linux_dirent {
+  unsigned long d_ino;      // NOLINT
+  unsigned long d_off;      // NOLINT
+  unsigned short d_reclen;  // NOLINT
+  char d_name[0];
+};
+
+// Wraps a buffer to provide a set of dirents.
+// T is the underlying dirent type.
+template <typename T>
+class DirentBuffer {
+ public:
+  // DirentBuffer manages the buffer.
+  explicit DirentBuffer(size_t size)
+      : managed_(true), actual_size_(size), reported_size_(size) {
+    data_ = new char[actual_size_];
+  }
+
+  // The buffer is managed externally.
+  DirentBuffer(char* data, size_t actual_size, size_t reported_size)
+      : managed_(false),
+        data_(data),
+        actual_size_(actual_size),
+        reported_size_(reported_size) {}
+
+  ~DirentBuffer() {
+    if (managed_) {
+      delete[] data_;
+    }
+  }
+
+  T* Data() { return reinterpret_cast<T*>(data_); }
+
+  T* Start(size_t read) {
+    read_ = read;
+    if (read_) {
+      return Data();
+    } else {
+      return nullptr;
+    }
+  }
+
+  T* Current() { return reinterpret_cast<T*>(&data_[off_]); }
+
+  T* Next() {
+    size_t new_off = off_ + Current()->d_reclen;
+    if (new_off >= read_ || new_off >= actual_size_) {
+      return nullptr;
+    }
+
+    off_ = new_off;
+    return Current();
+  }
+
+  size_t Size() { return reported_size_; }
+
+  void Reset() {
+    off_ = 0;
+    read_ = 0;
+    memset(data_, 0, actual_size_);
+  }
+
+ private:
+  bool managed_;
+  char* data_;
+  size_t actual_size_;
+  size_t reported_size_;
+
+  size_t off_ = 0;
+
+  size_t read_ = 0;
+};
+
+// Test for getdents/getdents64.
+// T is the Linux dirent type.
+template <typename T>
+class GetdentsTest : public ::testing::Test {
+ public:
+  using LinuxDirentType = T;
+  using DirentBufferType = DirentBuffer<T>;
+
+ protected:
+  void SetUp() override {
+    dir_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+    fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(dir_.path(), O_RDONLY | O_DIRECTORY));
+  }
+
+  // Must be overridden with explicit specialization. See below.
+  int SyscallNum();
+
+  int Getdents(LinuxDirentType* dirp, unsigned int count) {
+    return RetryEINTR(syscall)(SyscallNum(), fd_.get(), dirp, count);
+  }
+
+  // Fill directory with num files, named by number starting at 0.
+  void FillDirectory(size_t num) {
+    for (size_t i = 0; i < num; i++) {
+      auto name = JoinPath(dir_.path(), absl::StrCat(i));
+      TEST_CHECK(CreateWithContents(name, "").ok());
+    }
+  }
+
+  // Fill directory with a given list of filenames.
+  void FillDirectoryWithFiles(const std::vector<std::string>& filenames) {
+    for (const auto& filename : filenames) {
+      auto name = JoinPath(dir_.path(), filename);
+      TEST_CHECK(CreateWithContents(name, "").ok());
+    }
+  }
+
+  // Seek to the start of the directory.
+  PosixError SeekStart() {
+    constexpr off_t kStartOfFile = 0;
+    off_t offset = lseek(fd_.get(), kStartOfFile, SEEK_SET);
+    if (offset < 0) {
+      return PosixError(errno, absl::StrCat("error seeking to ", kStartOfFile));
+    }
+    if (offset != kStartOfFile) {
+      return PosixError(EINVAL, absl::StrCat("tried to seek to ", kStartOfFile,
+                                             " but got ", offset));
+    }
+    return NoError();
+  }
+
+  // Call getdents multiple times, reading all dirents and calling f on each.
+  // f has the type signature PosixError f(T*).
+  // If f returns a non-OK error, so does ReadDirents.
+  template <typename F>
+  PosixError ReadDirents(DirentBufferType* dirents, F const& f) {
+    int n;
+    do {
+      dirents->Reset();
+
+      n = Getdents(dirents->Data(), dirents->Size());
+      MaybeSave();
+      if (n < 0) {
+        return PosixError(errno, "getdents");
+      }
+
+      for (auto d = dirents->Start(n); d; d = dirents->Next()) {
+        RETURN_IF_ERRNO(f(d));
+      }
+    } while (n > 0);
+
+    return NoError();
+  }
+
+  // Call Getdents successively and count all entries.
+  int ReadAndCountAllEntries(DirentBufferType* dirents) {
+    int found = 0;
+
+    EXPECT_NO_ERRNO(ReadDirents(dirents, [&](LinuxDirentType* d) {
+      found++;
+      return NoError();
+    }));
+
+    return found;
+  }
+
+ private:
+  TempPath dir_;
+  FileDescriptor fd_;
+};
+
+// Multiple template parameters are not allowed, so we must use explicit
+// template specialization to set the syscall number.
+
+// SYS_getdents isn't defined on arm64.
+#ifdef __x86_64__
+template <>
+int GetdentsTest<struct linux_dirent>::SyscallNum() {
+  return SYS_getdents;
+}
+#endif
+
+template <>
+int GetdentsTest<struct linux_dirent64>::SyscallNum() {
+  return SYS_getdents64;
+}
+
+#ifdef __x86_64__
+// Test both legacy getdents and getdents64 on x86_64.
+typedef ::testing::Types<struct linux_dirent, struct linux_dirent64>
+    GetdentsTypes;
+#elif __aarch64__
+// Test only getdents64 on arm64.
+typedef ::testing::Types<struct linux_dirent64> GetdentsTypes;
+#endif
+TYPED_TEST_SUITE(GetdentsTest, GetdentsTypes);
+
+// N.B. TYPED_TESTs require explicitly using this-> to access members of
+// GetdentsTest, since we are inside of a derived class template.
+
+TYPED_TEST(GetdentsTest, VerifyEntries) {
+  typename TestFixture::DirentBufferType dirents(1024);
+
+  this->FillDirectory(2);
+
+  // Map of all the entries we expect to find.
+  std::map<std::string, bool> found;
+  found["."] = false;
+  found[".."] = false;
+  found["0"] = false;
+  found["1"] = false;
+
+  EXPECT_NO_ERRNO(this->ReadDirents(
+      &dirents, [&](typename TestFixture::LinuxDirentType* d) {
+        auto kv = found.find(d->d_name);
+        EXPECT_NE(kv, found.end()) << "Unexpected file: " << d->d_name;
+        if (kv != found.end()) {
+          EXPECT_FALSE(kv->second);
+        }
+        found[d->d_name] = true;
+        return NoError();
+      }));
+
+  for (auto& kv : found) {
+    EXPECT_TRUE(kv.second) << "File not found: " << kv.first;
+  }
+}
+
+TYPED_TEST(GetdentsTest, VerifyPadding) {
+  typename TestFixture::DirentBufferType dirents(1024);
+
+  // Create files with names of length 1 through 16.
+  std::vector<std::string> files;
+  std::string filename;
+  for (int i = 0; i < 16; ++i) {
+    absl::StrAppend(&filename, "a");
+    files.push_back(filename);
+  }
+  this->FillDirectoryWithFiles(files);
+
+  // We expect to find all the files, plus '.' and '..'.
+  const int expect_found = 2 + files.size();
+  int found = 0;
+
+  EXPECT_NO_ERRNO(this->ReadDirents(
+      &dirents, [&](typename TestFixture::LinuxDirentType* d) {
+        EXPECT_EQ(d->d_reclen % 8, 0)
+            << "Dirent " << d->d_name
+            << " had reclen that was not byte aligned: " << d->d_name;
+        found++;
+        return NoError();
+      }));
+
+  // Make sure we found all the files.
+  EXPECT_EQ(found, expect_found);
+}
+
+// For a small directory, the provided buffer should be large enough
+// for all entries.
+TYPED_TEST(GetdentsTest, SmallDir) {
+  // . and .. should be in an otherwise empty directory.
+  int expect = 2;
+
+  // Add some actual files.
+  this->FillDirectory(2);
+  expect += 2;
+
+  typename TestFixture::DirentBufferType dirents(256);
+
+  EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+}
+
+// A directory with lots of files requires calling getdents multiple times.
+TYPED_TEST(GetdentsTest, LargeDir) {
+  // . and .. should be in an otherwise empty directory.
+  int expect = 2;
+
+  // Add some actual files.
+  this->FillDirectory(100);
+  expect += 100;
+
+  typename TestFixture::DirentBufferType dirents(256);
+
+  EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+}
+
+// If we lie about the size of the buffer, we should still be able to read the
+// entries with the available space.
+TYPED_TEST(GetdentsTest, PartialBuffer) {
+  // . and .. should be in an otherwise empty directory.
+  int expect = 2;
+
+  // Add some actual files.
+  this->FillDirectory(100);
+  expect += 100;
+
+  void* addr = mmap(0, 2 * kPageSize, PROT_READ | PROT_WRITE,
+                    MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  ASSERT_NE(addr, MAP_FAILED);
+
+  char* buf = reinterpret_cast<char*>(addr);
+
+  // Guard page
+  EXPECT_THAT(
+      mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, PROT_NONE),
+      SyscallSucceeds());
+
+  // Limit space in buf to 256 bytes.
+  buf += kPageSize - 256;
+
+  // Lie about the buffer. Even though we claim the buffer is 1 page,
+  // we should still get all of the dirents in the first 256 bytes.
+  typename TestFixture::DirentBufferType dirents(buf, 256, kPageSize);
+
+  EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+
+  EXPECT_THAT(munmap(addr, 2 * kPageSize), SyscallSucceeds());
+}
+
+// Open many file descriptors, then scan through /proc/self/fd to find and close
+// them all. (The latter is commonly used to handle races between fork/execve
+// and the creation of unwanted non-O_CLOEXEC file descriptors.) This tests that
+// getdents iterates correctly despite mutation of /proc/self/fd.
+TYPED_TEST(GetdentsTest, ProcSelfFd) {
+  constexpr size_t kNfds = 10;
+  std::unordered_map<int, FileDescriptor> fds;
+  fds.reserve(kNfds);
+  for (size_t i = 0; i < kNfds; i++) {
+    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+    fds.emplace(fd.get(), std::move(fd));
+  }
+
+  const FileDescriptor proc_self_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/fd", O_RDONLY | O_DIRECTORY));
+
+  // Make the buffer very small since we want to iterate.
+  typename TestFixture::DirentBufferType dirents(
+      2 * sizeof(typename TestFixture::LinuxDirentType));
+  std::unordered_set<int> prev_fds;
+  while (true) {
+    dirents.Reset();
+    int rv;
+    ASSERT_THAT(rv = RetryEINTR(syscall)(this->SyscallNum(), proc_self_fd.get(),
+                                         dirents.Data(), dirents.Size()),
+                SyscallSucceeds());
+    if (rv == 0) {
+      break;
+    }
+    for (auto* d = dirents.Start(rv); d; d = dirents.Next()) {
+      int dfd;
+      if (!absl::SimpleAtoi(d->d_name, &dfd)) continue;
+      EXPECT_TRUE(prev_fds.insert(dfd).second)
+          << "Repeated observation of /proc/self/fd/" << dfd;
+      fds.erase(dfd);
+    }
+  }
+
+  // Check that we closed every fd.
+  EXPECT_THAT(fds, ::testing::IsEmpty());
+}
+
+// Test that getdents returns ENOTDIR when called on a file.
+TYPED_TEST(GetdentsTest, NotDir) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  typename TestFixture::DirentBufferType dirents(256);
+  EXPECT_THAT(RetryEINTR(syscall)(this->SyscallNum(), fd.get(), dirents.Data(),
+                                  dirents.Size()),
+              SyscallFailsWithErrno(ENOTDIR));
+}
+
+// Test that SEEK_SET to 0 causes getdents to re-read the entries.
+TYPED_TEST(GetdentsTest, SeekResetsCursor) {
+  // . and .. should be in an otherwise empty directory.
+  int expect = 2;
+
+  // Add some files to the directory.
+  this->FillDirectory(10);
+  expect += 10;
+
+  typename TestFixture::DirentBufferType dirents(256);
+
+  // We should get all the expected entries.
+  EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+
+  // Seek back to 0.
+  ASSERT_NO_ERRNO(this->SeekStart());
+
+  // We should get all the expected entries again.
+  EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+}
+
+// Test that getdents() after SEEK_END succeeds.
+// This is a regression test for #128.
+TYPED_TEST(GetdentsTest, Issue128ProcSeekEnd) {
+  auto fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self", O_RDONLY | O_DIRECTORY));
+  typename TestFixture::DirentBufferType dirents(256);
+
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+  ASSERT_THAT(RetryEINTR(syscall)(this->SyscallNum(), fd.get(), dirents.Data(),
+                                  dirents.Size()),
+              SyscallSucceeds());
+}
+
+// Some tests using the glibc readdir interface.
+TEST(ReaddirTest, OpenDir) {
+  DIR* dev;
+  ASSERT_THAT(dev = opendir("/dev"), NotNull());
+  EXPECT_THAT(closedir(dev), SyscallSucceeds());
+}
+
+TEST(ReaddirTest, RootContainsBasicDirectories) {
+  EXPECT_THAT(ListDir("/", true),
+              IsPosixErrorOkAndHolds(IsSupersetOf(
+                  {"bin", "dev", "etc", "lib", "proc", "sbin", "usr"})));
+}
+
+TEST(ReaddirTest, Bug24096713Dev) {
+  auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev", true));
+  EXPECT_THAT(contents, Not(IsEmpty()));
+}
+
+TEST(ReaddirTest, Bug24096713ProcTid) {
+  auto contents = ASSERT_NO_ERRNO_AND_VALUE(
+      ListDir(absl::StrCat("/proc/", syscall(SYS_gettid), "/"), true));
+  EXPECT_THAT(contents, Not(IsEmpty()));
+}
+
+TEST(ReaddirTest, Bug33429925Proc) {
+  auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc", true));
+  EXPECT_THAT(contents, Not(IsEmpty()));
+}
+
+TEST(ReaddirTest, Bug35110122Root) {
+  auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/", true));
+  EXPECT_THAT(contents, Not(IsEmpty()));
+}
+
+// Unlink should invalidate getdents cache.
+TEST(ReaddirTest, GoneAfterRemoveCache) {
+  TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+  std::string name = std::string(Basename(file.path()));
+
+  auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), true));
+  EXPECT_THAT(contents, Contains(name));
+
+  file.reset();
+
+  contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), true));
+  EXPECT_THAT(contents, Not(Contains(name)));
+}
+
+// Regression test for b/137398511. Rename should invalidate getdents cache.
+TEST(ReaddirTest, GoneAfterRenameCache) {
+  TempPath src = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath dst = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(src.path()));
+  std::string name = std::string(Basename(file.path()));
+
+  auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(src.path(), true));
+  EXPECT_THAT(contents, Contains(name));
+
+  ASSERT_THAT(rename(file.path().c_str(), JoinPath(dst.path(), name).c_str()),
+              SyscallSucceeds());
+  // Release file since it was renamed. dst cleanup will ultimately delete it.
+  file.release();
+
+  contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(src.path(), true));
+  EXPECT_THAT(contents, Not(Contains(name)));
+
+  contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dst.path(), true));
+  EXPECT_THAT(contents, Contains(name));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/getrandom.cc b/test/syscalls/linux/getrandom.cc
new file mode 100644
index 000000000..f87cdd7a1
--- /dev/null
+++ b/test/syscalls/linux/getrandom.cc
@@ -0,0 +1,63 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#ifndef SYS_getrandom
+#if defined(__x86_64__)
+#define SYS_getrandom 318
+#elif defined(__i386__)
+#define SYS_getrandom 355
+#elif defined(__aarch64__)
+#define SYS_getrandom 278
+#else
+#error "Unknown architecture"
+#endif
+#endif  // SYS_getrandom
+
+bool SomeByteIsNonZero(char* random_bytes, int length) {
+  for (int i = 0; i < length; i++) {
+    if (random_bytes[i] != 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
+TEST(GetrandomTest, IsRandom) {
+  // This test calls get_random and makes sure that the array is filled in with
+  // something that is non-zero. Perhaps we get back \x00\x00\x00\x00\x00.... as
+  // a random result, but it's so unlikely that we'll just ignore this.
+  char random_bytes[64] = {};
+  int n = syscall(SYS_getrandom, random_bytes, 64, 0);
+  SKIP_IF(!IsRunningOnGvisor() && n < 0 && errno == ENOSYS);
+  EXPECT_THAT(n, SyscallSucceeds());
+  EXPECT_GT(n, 0);  // Some bytes should be returned.
+  EXPECT_TRUE(SomeByteIsNonZero(random_bytes, n));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/getrusage.cc b/test/syscalls/linux/getrusage.cc
new file mode 100644
index 000000000..0e51d42a8
--- /dev/null
+++ b/test/syscalls/linux/getrusage.cc
@@ -0,0 +1,177 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(GetrusageTest, BasicFork) {
+  pid_t pid = fork();
+  if (pid == 0) {
+    struct rusage rusage_self;
+    TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0);
+    struct rusage rusage_children;
+    TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0);
+    // The child has consumed some memory.
+    TEST_CHECK(rusage_self.ru_maxrss != 0);
+    // The child has no children of its own.
+    TEST_CHECK(rusage_children.ru_maxrss == 0);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds());
+  struct rusage rusage_self;
+  ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds());
+  struct rusage rusage_children;
+  ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds());
+  // The parent has consumed some memory.
+  EXPECT_GT(rusage_self.ru_maxrss, 0);
+  // The child has consumed some memory, and because it has exited we can get
+  // its max RSS.
+  EXPECT_GT(rusage_children.ru_maxrss, 0);
+}
+
+// Verifies that a process can get the max resident set size of its grandchild,
+// i.e. that maxrss propagates correctly from children to waiting parents.
+TEST(GetrusageTest, Grandchild) {
+  constexpr int kGrandchildSizeKb = 1024;
+  pid_t pid = fork();
+  if (pid == 0) {
+    pid = fork();
+    if (pid == 0) {
+      int flags = MAP_ANONYMOUS | MAP_POPULATE | MAP_PRIVATE;
+      void* addr =
+          mmap(nullptr, kGrandchildSizeKb * 1024, PROT_WRITE, flags, -1, 0);
+      TEST_PCHECK(addr != MAP_FAILED);
+    } else {
+      int status;
+      TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0) == pid);
+    }
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds());
+  struct rusage rusage_self;
+  ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds());
+  struct rusage rusage_children;
+  ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds());
+  // The parent has consumed some memory.
+  EXPECT_GT(rusage_self.ru_maxrss, 0);
+  // The child should consume next to no memory, but the grandchild will
+  // consume at least 1MB. Verify that usage bubbles up to the grandparent.
+  EXPECT_GT(rusage_children.ru_maxrss, kGrandchildSizeKb);
+}
+
+// Verifies that processes ignoring SIGCHLD do not have updated child maxrss
+// updated.
+TEST(GetrusageTest, IgnoreSIGCHLD) {
+  struct sigaction sa;
+  sa.sa_handler = SIG_IGN;
+  sa.sa_flags = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa));
+  pid_t pid = fork();
+  if (pid == 0) {
+    struct rusage rusage_self;
+    TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0);
+    // The child has consumed some memory.
+    TEST_CHECK(rusage_self.ru_maxrss != 0);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+              SyscallFailsWithErrno(ECHILD));
+  struct rusage rusage_self;
+  ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds());
+  struct rusage rusage_children;
+  ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds());
+  // The parent has consumed some memory.
+  EXPECT_GT(rusage_self.ru_maxrss, 0);
+  // The child's maxrss should not have propagated up.
+  EXPECT_EQ(rusage_children.ru_maxrss, 0);
+}
+
+// Verifies that zombie processes do not update their parent's maxrss. Only
+// reaped processes should do this.
+TEST(GetrusageTest, IgnoreZombie) {
+  pid_t pid = fork();
+  if (pid == 0) {
+    struct rusage rusage_self;
+    TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0);
+    struct rusage rusage_children;
+    TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0);
+    // The child has consumed some memory.
+    TEST_CHECK(rusage_self.ru_maxrss != 0);
+    // The child has no children of its own.
+    TEST_CHECK(rusage_children.ru_maxrss == 0);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  // Give the child time to exit. Because we don't call wait, the child should
+  // remain a zombie.
+  absl::SleepFor(absl::Seconds(5));
+  struct rusage rusage_self;
+  ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds());
+  struct rusage rusage_children;
+  ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds());
+  // The parent has consumed some memory.
+  EXPECT_GT(rusage_self.ru_maxrss, 0);
+  // The child has consumed some memory, but hasn't been reaped.
+  EXPECT_EQ(rusage_children.ru_maxrss, 0);
+}
+
+TEST(GetrusageTest, Wait4) {
+  pid_t pid = fork();
+  if (pid == 0) {
+    struct rusage rusage_self;
+    TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0);
+    struct rusage rusage_children;
+    TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0);
+    // The child has consumed some memory.
+    TEST_CHECK(rusage_self.ru_maxrss != 0);
+    // The child has no children of its own.
+    TEST_CHECK(rusage_children.ru_maxrss == 0);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  struct rusage rusage_children;
+  int status;
+  ASSERT_THAT(RetryEINTR(wait4)(pid, &status, 0, &rusage_children),
+              SyscallSucceeds());
+  // The child has consumed some memory, and because it has exited we can get
+  // its max RSS.
+  EXPECT_GT(rusage_children.ru_maxrss, 0);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc
new file mode 100644
index 000000000..220874aeb
--- /dev/null
+++ b/test/syscalls/linux/inotify.cc
@@ -0,0 +1,2380 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <libgen.h>
+#include <sched.h>
+#include <sys/epoll.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/xattr.h>
+
+#include <atomic>
+#include <list>
+#include <string>
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_join.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/epoll_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using ::absl::StreamFormat;
+using ::absl::StrFormat;
+
+constexpr int kBufSize = 1024;
+
+// C++-friendly version of struct inotify_event.
+struct Event {
+  int32_t wd;
+  uint32_t mask;
+  uint32_t cookie;
+  uint32_t len;
+  std::string name;
+
+  Event(uint32_t mask, int32_t wd, absl::string_view name, uint32_t cookie)
+      : wd(wd),
+        mask(mask),
+        cookie(cookie),
+        len(name.size()),
+        name(std::string(name)) {}
+  Event(uint32_t mask, int32_t wd, absl::string_view name)
+      : Event(mask, wd, name, 0) {}
+  Event(uint32_t mask, int32_t wd) : Event(mask, wd, "", 0) {}
+  Event() : Event(0, 0, "", 0) {}
+};
+
+// Prints the symbolic name for a struct inotify_event's 'mask' field.
+std::string FlagString(uint32_t flags) {
+  std::vector<std::string> names;
+
+#define EMIT(target)          \
+  if (flags & target) {       \
+    names.push_back(#target); \
+    flags &= ~target;         \
+  }
+
+  EMIT(IN_ACCESS);
+  EMIT(IN_ATTRIB);
+  EMIT(IN_CLOSE_WRITE);
+  EMIT(IN_CLOSE_NOWRITE);
+  EMIT(IN_CREATE);
+  EMIT(IN_DELETE);
+  EMIT(IN_DELETE_SELF);
+  EMIT(IN_MODIFY);
+  EMIT(IN_MOVE_SELF);
+  EMIT(IN_MOVED_FROM);
+  EMIT(IN_MOVED_TO);
+  EMIT(IN_OPEN);
+
+  EMIT(IN_DONT_FOLLOW);
+  EMIT(IN_EXCL_UNLINK);
+  EMIT(IN_ONESHOT);
+  EMIT(IN_ONLYDIR);
+
+  EMIT(IN_IGNORED);
+  EMIT(IN_ISDIR);
+  EMIT(IN_Q_OVERFLOW);
+  EMIT(IN_UNMOUNT);
+
+#undef EMIT
+
+  // If we have anything left over at the end, print it as a hex value.
+  if (flags) {
+    names.push_back(absl::StrCat("0x", absl::Hex(flags)));
+  }
+
+  return absl::StrJoin(names, "|");
+}
+
+std::string DumpEvent(const Event& event) {
+  return StrFormat(
+      "%s, wd=%d%s%s", FlagString(event.mask), event.wd,
+      (event.len > 0) ? StrFormat(", name=%s", event.name) : "",
+      (event.cookie > 0) ? StrFormat(", cookie=%ud", event.cookie) : "");
+}
+
+std::string DumpEvents(const std::vector<Event>& events, int indent_level) {
+  std::stringstream ss;
+  ss << StreamFormat("%d event%s:\n", events.size(),
+                     (events.size() > 1) ? "s" : "");
+  int i = 0;
+  for (const Event& ev : events) {
+    ss << StreamFormat("%sevents[%d]: %s\n", std::string(indent_level, '\t'),
+                       i++, DumpEvent(ev));
+  }
+  return ss.str();
+}
+
+// A matcher which takes an expected list of events to match against another
+// list of inotify events, in order. This is similar to the ElementsAre matcher,
+// but displays more informative messages on mismatch.
+class EventsAreMatcher
+    : public ::testing::MatcherInterface<std::vector<Event>> {
+ public:
+  explicit EventsAreMatcher(std::vector<Event> references)
+      : references_(std::move(references)) {}
+
+  bool MatchAndExplain(
+      std::vector<Event> events,
+      ::testing::MatchResultListener* const listener) const override {
+    if (references_.size() != events.size()) {
+      *listener << StreamFormat("\n\tCount mismatch, got %s",
+                                DumpEvents(events, 2));
+      return false;
+    }
+
+    bool success = true;
+    for (unsigned int i = 0; i < references_.size(); ++i) {
+      const Event& reference = references_[i];
+      const Event& target = events[i];
+
+      if (target.mask != reference.mask || target.wd != reference.wd ||
+          target.name != reference.name || target.cookie != reference.cookie) {
+        *listener << StreamFormat("\n\tMismatch at index %d, want %s, got %s,",
+                                  i, DumpEvent(reference), DumpEvent(target));
+        success = false;
+      }
+    }
+
+    if (!success) {
+      *listener << StreamFormat("\n\tIn total of %s", DumpEvents(events, 2));
+    }
+    return success;
+  }
+
+  void DescribeTo(::std::ostream* const os) const override {
+    *os << StreamFormat("%s", DumpEvents(references_, 1));
+  }
+
+  void DescribeNegationTo(::std::ostream* const os) const override {
+    *os << StreamFormat("mismatch from %s", DumpEvents(references_, 1));
+  }
+
+ private:
+  std::vector<Event> references_;
+};
+
+::testing::Matcher<std::vector<Event>> Are(std::vector<Event> events) {
+  return MakeMatcher(new EventsAreMatcher(std::move(events)));
+}
+
+// Similar to the EventsAre matcher, but the order of events are ignored.
+class UnorderedEventsAreMatcher
+    : public ::testing::MatcherInterface<std::vector<Event>> {
+ public:
+  explicit UnorderedEventsAreMatcher(std::vector<Event> references)
+      : references_(std::move(references)) {}
+
+  bool MatchAndExplain(
+      std::vector<Event> events,
+      ::testing::MatchResultListener* const listener) const override {
+    if (references_.size() != events.size()) {
+      *listener << StreamFormat("\n\tCount mismatch, got %s",
+                                DumpEvents(events, 2));
+      return false;
+    }
+
+    std::vector<Event> unmatched(references_);
+
+    for (const Event& candidate : events) {
+      for (auto it = unmatched.begin(); it != unmatched.end();) {
+        const Event& reference = *it;
+        if (candidate.mask == reference.mask && candidate.wd == reference.wd &&
+            candidate.name == reference.name &&
+            candidate.cookie == reference.cookie) {
+          it = unmatched.erase(it);
+          break;
+        } else {
+          ++it;
+        }
+      }
+    }
+
+    // Anything left unmatched? If so, the matcher fails.
+    if (!unmatched.empty()) {
+      *listener << StreamFormat("\n\tFailed to match %s",
+                                DumpEvents(unmatched, 2));
+      *listener << StreamFormat("\n\tIn total of %s", DumpEvents(events, 2));
+      return false;
+    }
+
+    return true;
+  }
+
+  void DescribeTo(::std::ostream* const os) const override {
+    *os << StreamFormat("unordered %s", DumpEvents(references_, 1));
+  }
+
+  void DescribeNegationTo(::std::ostream* const os) const override {
+    *os << StreamFormat("mismatch from unordered %s",
+                        DumpEvents(references_, 1));
+  }
+
+ private:
+  std::vector<Event> references_;
+};
+
+::testing::Matcher<std::vector<Event>> AreUnordered(std::vector<Event> events) {
+  return MakeMatcher(new UnorderedEventsAreMatcher(std::move(events)));
+}
+
+// Reads events from an inotify fd until either EOF, or read returns EAGAIN.
+PosixErrorOr<std::vector<Event>> DrainEvents(int fd) {
+  std::vector<Event> events;
+  while (true) {
+    int events_size = 0;
+    if (ioctl(fd, FIONREAD, &events_size) < 0) {
+      return PosixError(errno, "ioctl(FIONREAD) failed on inotify fd");
+    }
+    // Deliberately use a buffer that is larger than necessary, expecting to
+    // only read events_size bytes.
+    std::vector<char> buf(events_size + kBufSize, 0);
+    const ssize_t readlen = read(fd, buf.data(), buf.size());
+    MaybeSave();
+    // Read error?
+    if (readlen < 0) {
+      if (errno == EAGAIN) {
+        // If EAGAIN, no more events at the moment. Return what we have so far.
+        return events;
+      }
+      // Some other read error. Return an error. Right now if we encounter this
+      // after already reading some events, they get lost. However, we don't
+      // expect to see any error, and the calling test will fail immediately if
+      // we signal an error anyways, so this is acceptable.
+      return PosixError(errno, "read() failed on inotify fd");
+    }
+    if (readlen < static_cast<int>(sizeof(struct inotify_event))) {
+      // Impossibly short read.
+      return PosixError(
+          EIO,
+          "read() didn't return enough data represent even a single event");
+    }
+    if (readlen != events_size) {
+      return PosixError(EINVAL, absl::StrCat("read ", readlen,
+                                             " bytes, expected ", events_size));
+    }
+    if (readlen == 0) {
+      // EOF.
+      return events;
+    }
+
+    // Normal read.
+    const char* cursor = buf.data();
+    while (cursor < (buf.data() + readlen)) {
+      struct inotify_event event = {};
+      memcpy(&event, cursor, sizeof(struct inotify_event));
+
+      Event ev;
+      ev.wd = event.wd;
+      ev.mask = event.mask;
+      ev.cookie = event.cookie;
+      ev.len = event.len;
+      if (event.len > 0) {
+        TEST_CHECK(static_cast<int>(sizeof(struct inotify_event) + event.len) <=
+                   readlen);
+        ev.name = std::string(cursor +
+                              offsetof(struct inotify_event, name));  // NOLINT
+        // Name field should always be smaller than event.len, otherwise we have
+        // a buffer overflow. The two sizes aren't equal because the string
+        // constructor will stop at the first null byte, while event.name may be
+        // padded up to event.len using multiple null bytes.
+        TEST_CHECK(ev.name.size() <= event.len);
+      }
+
+      events.push_back(ev);
+      cursor += sizeof(struct inotify_event) + event.len;
+    }
+  }
+}
+
+PosixErrorOr<FileDescriptor> InotifyInit1(int flags) {
+  int fd;
+  EXPECT_THAT(fd = inotify_init1(flags), SyscallSucceeds());
+  if (fd < 0) {
+    return PosixError(errno, "inotify_init1() failed");
+  }
+  return FileDescriptor(fd);
+}
+
+PosixErrorOr<int> InotifyAddWatch(int fd, const std::string& path,
+                                  uint32_t mask) {
+  int wd;
+  EXPECT_THAT(wd = inotify_add_watch(fd, path.c_str(), mask),
+              SyscallSucceeds());
+  if (wd < 0) {
+    return PosixError(errno, "inotify_add_watch() failed");
+  }
+  return wd;
+}
+
+TEST(Inotify, IllegalSeek) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0));
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST(Inotify, IllegalPread) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0));
+  int val;
+  EXPECT_THAT(pread(fd.get(), &val, sizeof(val), 0),
+              SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST(Inotify, IllegalPwrite) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0));
+  EXPECT_THAT(pwrite(fd.get(), "x", 1, 0), SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST(Inotify, IllegalWrite) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0));
+  int val = 0;
+  EXPECT_THAT(write(fd.get(), &val, sizeof(val)), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(Inotify, InitFlags) {
+  EXPECT_THAT(inotify_init1(IN_NONBLOCK | IN_CLOEXEC), SyscallSucceeds());
+  EXPECT_THAT(inotify_init1(12345), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Inotify, NonBlockingReadReturnsEagain) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  std::vector<char> buf(kBufSize, 0);
+
+  // The read below should return fail with EAGAIN because there is no data to
+  // read and we've specified IN_NONBLOCK. We're guaranteed that there is no
+  // data to read because we haven't registered any watches yet.
+  EXPECT_THAT(read(fd.get(), buf.data(), buf.size()),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(Inotify, AddWatchOnInvalidFdFails) {
+  // Garbage fd.
+  EXPECT_THAT(inotify_add_watch(-1, "/tmp", IN_ALL_EVENTS),
+              SyscallFailsWithErrno(EBADF));
+  EXPECT_THAT(inotify_add_watch(1337, "/tmp", IN_ALL_EVENTS),
+              SyscallFailsWithErrno(EBADF));
+
+  // Non-inotify fds.
+  EXPECT_THAT(inotify_add_watch(0, "/tmp", IN_ALL_EVENTS),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(inotify_add_watch(1, "/tmp", IN_ALL_EVENTS),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(inotify_add_watch(2, "/tmp", IN_ALL_EVENTS),
+              SyscallFailsWithErrno(EINVAL));
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/tmp", O_RDONLY));
+  EXPECT_THAT(inotify_add_watch(fd.get(), "/tmp", IN_ALL_EVENTS),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Inotify, RemovingWatchGeneratesEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds());
+
+  // Read events, ensure the first event is IN_IGNORED.
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  EXPECT_THAT(events, Are({Event(IN_IGNORED, wd)}));
+}
+
+TEST(Inotify, CanDeleteFileAfterRemovingWatch) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds());
+  file1.reset();
+}
+
+TEST(Inotify, RemoveWatchAfterDeletingFileFails) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  file1.reset();
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd), Event(IN_DELETE_SELF, wd),
+                           Event(IN_IGNORED, wd)}));
+
+  EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Inotify, DuplicateWatchRemovalFails) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds());
+  EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Inotify, ConcurrentFileDeletionAndWatchRemoval) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const std::string filename = NewTempAbsPathInDir(root.path());
+
+  auto file_create_delete = [filename]() {
+    const DisableSave ds;  // Too expensive.
+    for (int i = 0; i < 100; ++i) {
+      FileDescriptor file_fd =
+          ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT, S_IRUSR | S_IWUSR));
+      file_fd.reset();  // Close before unlinking (although save is disabled).
+      EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+    }
+  };
+
+  const int shared_fd = fd.get();  // We need to pass it to the thread.
+  auto add_remove_watch = [shared_fd, filename]() {
+    for (int i = 0; i < 100; ++i) {
+      int wd = inotify_add_watch(shared_fd, filename.c_str(), IN_ALL_EVENTS);
+      MaybeSave();
+      if (wd != -1) {
+        // Watch added successfully, try removal.
+        if (inotify_rm_watch(shared_fd, wd)) {
+          // If removal fails, the only acceptable reason is if the wd
+          // is invalid, which will be the case if we try to remove
+          // the watch after the file has been deleted.
+          EXPECT_EQ(errno, EINVAL);
+        }
+      } else {
+        // Add watch failed, this should only fail if the target file doesn't
+        // exist.
+        EXPECT_EQ(errno, ENOENT);
+      }
+    }
+  };
+
+  ScopedThread t1(file_create_delete);
+  ScopedThread t2(add_remove_watch);
+}
+
+TEST(Inotify, DeletingChildGeneratesEvents) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  const std::string file1_path = file1.reset();
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(
+      events,
+      AreUnordered({Event(IN_ATTRIB, file1_wd), Event(IN_DELETE_SELF, file1_wd),
+                    Event(IN_IGNORED, file1_wd),
+                    Event(IN_DELETE, root_wd, Basename(file1_path))}));
+}
+
+// Creating a file in "parent/child" should generate events for child, but not
+// parent.
+TEST(Inotify, CreatingFileGeneratesEvents) {
+  const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath child =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path()));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), parent.path(), IN_ALL_EVENTS));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), child.path(), IN_ALL_EVENTS));
+
+  // Create a new file in the directory.
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(child.path()));
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+  // The library function we use to create the new file opens it for writing to
+  // create it and sets permissions on it, so we expect the three extra events.
+  ASSERT_THAT(events, Are({Event(IN_CREATE, wd, Basename(file1.path())),
+                           Event(IN_OPEN, wd, Basename(file1.path())),
+                           Event(IN_CLOSE_WRITE, wd, Basename(file1.path())),
+                           Event(IN_ATTRIB, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, ReadingFileGeneratesAccessEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      root.path(), "some content", TempPath::kDefaultFileMode));
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  char buf;
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_ACCESS, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, WritingFileGeneratesModifyEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  const std::string data = "some content";
+  EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()),
+              SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_MODIFY, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, SizeZeroReadWriteGeneratesNothing) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  // Read from the empty file.
+  int val;
+  ASSERT_THAT(read(file1_fd.get(), &val, sizeof(val)),
+              SyscallSucceedsWithValue(0));
+
+  // Write zero bytes.
+  ASSERT_THAT(write(file1_fd.get(), "", 0), SyscallSucceedsWithValue(0));
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({}));
+}
+
+TEST(Inotify, FailedFileCreationGeneratesNoEvents) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string dir_path = dir.path();
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(fd.get(), dir_path, IN_ALL_EVENTS));
+
+  const char* p = dir_path.c_str();
+  ASSERT_THAT(mkdir(p, 0777), SyscallFails());
+  ASSERT_THAT(mknod(p, S_IFIFO, 0777), SyscallFails());
+  ASSERT_THAT(symlink(p, p), SyscallFails());
+  ASSERT_THAT(link(p, p), SyscallFails());
+  std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({}));
+}
+
+TEST(Inotify, WatchSetAfterOpenReportsCloseFdEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  FileDescriptor file1_fd_writable =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+  FileDescriptor file1_fd_not_writable =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  file1_fd_writable.reset();  // Close file1_fd_writable.
+  std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_CLOSE_WRITE, wd, Basename(file1.path()))}));
+
+  file1_fd_not_writable.reset();  // Close file1_fd_not_writable.
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events,
+              Are({Event(IN_CLOSE_NOWRITE, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, ChildrenDeletionInWatchedDirGeneratesEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  TempPath dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  const std::string file1_path = file1.reset();
+  const std::string dir1_path = dir1.release();
+  EXPECT_THAT(rmdir(dir1_path.c_str()), SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+  ASSERT_THAT(events,
+              Are({Event(IN_DELETE, wd, Basename(file1_path)),
+                   Event(IN_DELETE | IN_ISDIR, wd, Basename(dir1_path))}));
+}
+
+TEST(Inotify, RmdirOnWatchedTargetGeneratesEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  EXPECT_THAT(rmdir(root.path().c_str()), SyscallSucceeds());
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_DELETE_SELF, wd), Event(IN_IGNORED, wd)}));
+}
+
+TEST(Inotify, MoveGeneratesEvents) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const TempPath dir1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+  const TempPath dir2 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+
+  const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  const int dir1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), dir1.path(), IN_ALL_EVENTS));
+  const int dir2_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), dir2.path(), IN_ALL_EVENTS));
+  // Test move from root -> root.
+  std::string newpath = NewTempAbsPathInDir(root.path());
+  std::string oldpath = file1.release();
+  EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds());
+  file1.reset(newpath);
+  std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(
+      events,
+      Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie),
+           Event(IN_MOVED_TO, root_wd, Basename(newpath), events[1].cookie)}));
+  EXPECT_NE(events[0].cookie, 0);
+  EXPECT_EQ(events[0].cookie, events[1].cookie);
+  uint32_t last_cookie = events[0].cookie;
+
+  // Test move from root -> root/dir1.
+  newpath = NewTempAbsPathInDir(dir1.path());
+  oldpath = file1.release();
+  EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds());
+  file1.reset(newpath);
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(
+      events,
+      Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie),
+           Event(IN_MOVED_TO, dir1_wd, Basename(newpath), events[1].cookie)}));
+  // Cookies should be distinct between distinct rename events.
+  EXPECT_NE(events[0].cookie, last_cookie);
+  EXPECT_EQ(events[0].cookie, events[1].cookie);
+  last_cookie = events[0].cookie;
+
+  // Test move from root/dir1 -> root/dir2.
+  newpath = NewTempAbsPathInDir(dir2.path());
+  oldpath = file1.release();
+  EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds());
+  file1.reset(newpath);
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(
+      events,
+      Are({Event(IN_MOVED_FROM, dir1_wd, Basename(oldpath), events[0].cookie),
+           Event(IN_MOVED_TO, dir2_wd, Basename(newpath), events[1].cookie)}));
+  EXPECT_NE(events[0].cookie, last_cookie);
+  EXPECT_EQ(events[0].cookie, events[1].cookie);
+  last_cookie = events[0].cookie;
+}
+
+TEST(Inotify, MoveWatchedTargetGeneratesEvents) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  const std::string newpath = NewTempAbsPathInDir(root.path());
+  const std::string oldpath = file1.release();
+  EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds());
+  file1.reset(newpath);
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(
+      events,
+      Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie),
+           Event(IN_MOVED_TO, root_wd, Basename(newpath), events[1].cookie),
+           // Self move events do not have a cookie.
+           Event(IN_MOVE_SELF, file1_wd)}));
+  EXPECT_NE(events[0].cookie, 0);
+  EXPECT_EQ(events[0].cookie, events[1].cookie);
+}
+
+TEST(Inotify, CoalesceEvents) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      root.path(), "some content", TempPath::kDefaultFileMode));
+
+  FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  // Read the file a few times. This will would generate multiple IN_ACCESS
+  // events but they should get coalesced to a single event.
+  char buf;
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+  // Use the close event verify that we haven't simply left the additional
+  // IN_ACCESS events unread.
+  file1_fd.reset();  // Close file1_fd.
+
+  const std::string file1_name = std::string(Basename(file1.path()));
+  std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_ACCESS, wd, file1_name),
+                           Event(IN_CLOSE_NOWRITE, wd, file1_name)}));
+
+  // Now let's try interleaving other events into a stream of repeated events.
+  file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds());
+  EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds());
+  EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds());
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+  file1_fd.reset();  // Close the file.
+
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(
+      events,
+      Are({Event(IN_OPEN, wd, file1_name), Event(IN_ACCESS, wd, file1_name),
+           Event(IN_MODIFY, wd, file1_name), Event(IN_ACCESS, wd, file1_name),
+           Event(IN_CLOSE_WRITE, wd, file1_name)}));
+
+  // Ensure events aren't coalesced if they are from different files.
+  const TempPath file2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      root.path(), "some content", TempPath::kDefaultFileMode));
+  // Discard events resulting from creation of file2.
+  ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+  file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+  FileDescriptor file2_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file2.path(), O_RDONLY));
+
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_THAT(read(file2_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+  // Close both files.
+  file1_fd.reset();
+  file2_fd.reset();
+
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  const std::string file2_name = std::string(Basename(file2.path()));
+  ASSERT_THAT(
+      events,
+      Are({Event(IN_OPEN, wd, file1_name), Event(IN_OPEN, wd, file2_name),
+           Event(IN_ACCESS, wd, file1_name), Event(IN_ACCESS, wd, file2_name),
+           Event(IN_ACCESS, wd, file1_name),
+           Event(IN_CLOSE_NOWRITE, wd, file1_name),
+           Event(IN_CLOSE_NOWRITE, wd, file2_name)}));
+}
+
+TEST(Inotify, ClosingInotifyFdWithoutRemovingWatchesWorks) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+  // Note: The check on close will happen in FileDescriptor::~FileDescriptor().
+}
+
+TEST(Inotify, NestedWatches) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      root.path(), "some content", TempPath::kDefaultFileMode));
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+
+  const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  // Read from file1. This should generate an event for both watches.
+  char buf;
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_ACCESS, root_wd, Basename(file1.path())),
+                           Event(IN_ACCESS, file1_wd)}));
+}
+
+TEST(Inotify, ConcurrentThreadsGeneratingEvents) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  std::vector<TempPath> files;
+  files.reserve(10);
+  for (int i = 0; i < 10; i++) {
+    files.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+        root.path(), "some content", TempPath::kDefaultFileMode)));
+  }
+
+  auto test_thread = [&files]() {
+    uint32_t seed = time(nullptr);
+    for (int i = 0; i < 20; i++) {
+      const TempPath& file = files[rand_r(&seed) % files.size()];
+      const FileDescriptor file_fd =
+          ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY));
+      TEST_PCHECK(write(file_fd.get(), "x", 1) == 1);
+    }
+  };
+
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  std::list<ScopedThread> threads;
+  for (int i = 0; i < 3; i++) {
+    threads.emplace_back(test_thread);
+  }
+  for (auto& t : threads) {
+    t.Join();
+  }
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  // 3 threads doing 20 iterations, 3 events per iteration (open, write,
+  // close). However, some events may be coalesced, and we can't reliably
+  // predict how they'll be coalesced since the test threads aren't
+  // synchronized. We can only check that we aren't getting unexpected events.
+  for (const Event& ev : events) {
+    EXPECT_NE(ev.mask & (IN_OPEN | IN_MODIFY | IN_CLOSE_WRITE), 0);
+  }
+}
+
+TEST(Inotify, ReadWithTooSmallBufferFails) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  // Open the file to queue an event. This event will not have a filename, so
+  // reading from the inotify fd should return sizeof(struct inotify_event)
+  // bytes of data.
+  FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+  std::vector<char> buf(kBufSize, 0);
+  ssize_t readlen;
+
+  // Try a buffer too small to hold any potential event. This is rejected
+  // outright without the event being dequeued.
+  EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event) - 1),
+              SyscallFailsWithErrno(EINVAL));
+  // Try a buffer just large enough. This should succeeed.
+  EXPECT_THAT(
+      readlen = read(fd.get(), buf.data(), sizeof(struct inotify_event)),
+      SyscallSucceeds());
+  EXPECT_EQ(readlen, sizeof(struct inotify_event));
+  // Event queue is now empty, the next read should return EAGAIN.
+  EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)),
+              SyscallFailsWithErrno(EAGAIN));
+
+  // Now put a watch on the directory, so that generated events contain a name.
+  EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds());
+
+  // Drain the event generated from the watch removal.
+  ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  file1_fd.reset();  // Close file to generate an event.
+
+  // Try a buffer too small to hold any event and one too small to hold an event
+  // with a name. These should both fail without consuming the event.
+  EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event) - 1),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)),
+              SyscallFailsWithErrno(EINVAL));
+  // Now try with a large enough buffer. This should return the one event.
+  EXPECT_THAT(readlen = read(fd.get(), buf.data(), buf.size()),
+              SyscallSucceeds());
+  EXPECT_GE(readlen,
+            sizeof(struct inotify_event) + Basename(file1.path()).size());
+  // With the single event read, the queue should once again be empty.
+  EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(Inotify, BlockingReadOnInotifyFd) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0));
+  const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      root.path(), "some content", TempPath::kDefaultFileMode));
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  // Spawn a thread performing a blocking read for new events on the inotify fd.
+  std::vector<char> buf(kBufSize, 0);
+  const int shared_fd = fd.get();  // The thread needs it.
+  ScopedThread t([shared_fd, &buf]() {
+    ssize_t readlen;
+    EXPECT_THAT(readlen = read(shared_fd, buf.data(), buf.size()),
+                SyscallSucceeds());
+  });
+
+  // Perform a read on the watched file, which should generate an IN_ACCESS
+  // event, unblocking the event_reader thread.
+  char c;
+  EXPECT_THAT(read(file1_fd.get(), &c, 1), SyscallSucceeds());
+
+  // Wait for the thread to read the event and exit.
+  t.Join();
+
+  // Make sure the event we got back is sane.
+  uint32_t event_mask;
+  memcpy(&event_mask, buf.data() + offsetof(struct inotify_event, mask),
+         sizeof(event_mask));
+  EXPECT_EQ(event_mask, IN_ACCESS);
+}
+
+TEST(Inotify, WatchOnRelativePath) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      root.path(), "some content", TempPath::kDefaultFileMode));
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+
+  // Change working directory to root.
+  const FileDescriptor cwd = ASSERT_NO_ERRNO_AND_VALUE(Open(".", O_PATH));
+  EXPECT_THAT(chdir(root.path().c_str()), SyscallSucceeds());
+
+  // Add a watch on file1 with a relative path.
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+      fd.get(), std::string(Basename(file1.path())), IN_ALL_EVENTS));
+
+  // Perform a read on file1, this should generate an IN_ACCESS event.
+  char c;
+  EXPECT_THAT(read(file1_fd.get(), &c, 1), SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  EXPECT_THAT(events, Are({Event(IN_ACCESS, wd)}));
+
+  // Explicitly reset the working directory so that we don't continue to
+  // reference "root". Once the test ends, "root" will get unlinked. If we
+  // continue to hold a reference, random save/restore tests can fail if a save
+  // is triggered after "root" is unlinked; we can't save deleted fs objects
+  // with active references.
+  EXPECT_THAT(fchdir(cwd.get()), SyscallSucceeds());
+}
+
+TEST(Inotify, ZeroLengthReadWriteDoesNotGenerateEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const char kContent[] = "some content";
+  TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      root.path(), kContent, TempPath::kDefaultFileMode));
+  const int kContentSize = sizeof(kContent) - 1;
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  std::vector<char> buf(kContentSize, 0);
+  // Read all available data.
+  ssize_t readlen;
+  EXPECT_THAT(readlen = read(file1_fd.get(), buf.data(), kContentSize),
+              SyscallSucceeds());
+  EXPECT_EQ(readlen, kContentSize);
+  // Drain all events and make sure we got the IN_ACCESS for the read.
+  std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  EXPECT_THAT(events, Are({Event(IN_ACCESS, wd, Basename(file1.path()))}));
+
+  // Now try read again. This should be a 0-length read, since we're at EOF.
+  char c;
+  EXPECT_THAT(readlen = read(file1_fd.get(), &c, 1), SyscallSucceeds());
+  EXPECT_EQ(readlen, 0);
+  // We should have no new events.
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  EXPECT_TRUE(events.empty());
+
+  // Try issuing a zero-length read.
+  EXPECT_THAT(readlen = read(file1_fd.get(), &c, 0), SyscallSucceeds());
+  EXPECT_EQ(readlen, 0);
+  // We should have no new events.
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  EXPECT_TRUE(events.empty());
+
+  // Try issuing a zero-length write.
+  ssize_t writelen;
+  EXPECT_THAT(writelen = write(file1_fd.get(), &c, 0), SyscallSucceeds());
+  EXPECT_EQ(writelen, 0);
+  // We should have no new events.
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  EXPECT_TRUE(events.empty());
+}
+
+TEST(Inotify, ChmodGeneratesAttribEvent_NoRandomSave) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  FileDescriptor root_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(root.path(), O_RDONLY));
+  FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  auto verify_chmod_events = [&]() {
+    std::vector<Event> events =
+        ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+    ASSERT_THAT(events, Are({Event(IN_ATTRIB, root_wd, Basename(file1.path())),
+                             Event(IN_ATTRIB, file1_wd)}));
+  };
+
+  // Don't do cooperative S/R tests for any of the {f}chmod* syscalls below, the
+  // test will always fail because nodes cannot be saved when they have stricter
+  // permissions than the original host node.
+  const DisableSave ds;
+
+  // Chmod.
+  ASSERT_THAT(chmod(file1.path().c_str(), S_IWGRP), SyscallSucceeds());
+  verify_chmod_events();
+
+  // Fchmod.
+  ASSERT_THAT(fchmod(file1_fd.get(), S_IRGRP | S_IWGRP), SyscallSucceeds());
+  verify_chmod_events();
+
+  // Fchmodat.
+  const std::string file1_basename = std::string(Basename(file1.path()));
+  ASSERT_THAT(fchmodat(root_fd.get(), file1_basename.c_str(), S_IWGRP, 0),
+              SyscallSucceeds());
+  verify_chmod_events();
+
+  // Make sure the chmod'ed file descriptors are destroyed before DisableSave
+  // is destructed.
+  root_fd.reset();
+  file1_fd.reset();
+}
+
+TEST(Inotify, TruncateGeneratesModifyEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  auto verify_truncate_events = [&]() {
+    std::vector<Event> events =
+        ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+    ASSERT_THAT(events, Are({Event(IN_MODIFY, root_wd, Basename(file1.path())),
+                             Event(IN_MODIFY, file1_wd)}));
+  };
+
+  // Truncate.
+  EXPECT_THAT(truncate(file1.path().c_str(), 4096), SyscallSucceeds());
+  verify_truncate_events();
+
+  // Ftruncate.
+  EXPECT_THAT(ftruncate(file1_fd.get(), 8192), SyscallSucceeds());
+  verify_truncate_events();
+
+  // No events if truncate fails.
+  EXPECT_THAT(ftruncate(file1_fd.get(), -1), SyscallFailsWithErrno(EINVAL));
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({}));
+}
+
+TEST(Inotify, GetdentsGeneratesAccessEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  // This internally calls getdents(2). We also expect to see an open/close
+  // event for the dirfd.
+  ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false));
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+  // Linux only seems to generate access events on getdents() on some
+  // calls. Allow the test to pass even if it isn't generated. gVisor will
+  // always generate the IN_ACCESS event so the test will at least ensure gVisor
+  // behaves reasonably.
+  int i = 0;
+  EXPECT_EQ(events[i].mask, IN_OPEN | IN_ISDIR);
+  ++i;
+  if (IsRunningOnGvisor()) {
+    EXPECT_EQ(events[i].mask, IN_ACCESS | IN_ISDIR);
+    ++i;
+  } else {
+    if (events[i].mask == (IN_ACCESS | IN_ISDIR)) {
+      // Skip over the IN_ACCESS event on Linux, it only shows up some of the
+      // time so we can't assert its existence.
+      ++i;
+    }
+  }
+  EXPECT_EQ(events[i].mask, IN_CLOSE_NOWRITE | IN_ISDIR);
+}
+
+TEST(Inotify, MknodGeneratesCreateEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  const TempPath file1(root.path() + "/file1");
+  const int rc = mknod(file1.path().c_str(), S_IFREG, 0);
+  // mknod(2) is only supported on tmpfs in the sandbox.
+  SKIP_IF(IsRunningOnGvisor() && rc != 0);
+  ASSERT_THAT(rc, SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_CREATE, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, SymlinkGeneratesCreateEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  const TempPath link1(NewTempAbsPathInDir(root.path()));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  ASSERT_THAT(symlink(file1.path().c_str(), link1.path().c_str()),
+              SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+  ASSERT_THAT(events, Are({Event(IN_CREATE, root_wd, Basename(link1.path()))}));
+}
+
+TEST(Inotify, LinkGeneratesAttribAndCreateEvents) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  const TempPath link1(root.path() + "/link1");
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  const int rc = link(file1.path().c_str(), link1.path().c_str());
+  // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox.
+  SKIP_IF(IsRunningOnGvisor() && rc != 0 &&
+          (errno == EPERM || errno == ENOENT));
+  ASSERT_THAT(rc, SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_ATTRIB, file1_wd),
+                           Event(IN_CREATE, root_wd, Basename(link1.path()))}));
+}
+
+TEST(Inotify, UtimesGeneratesAttribEvent) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+  const struct timeval times[2] = {{1, 0}, {2, 0}};
+  EXPECT_THAT(futimes(file1_fd.get(), times), SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, HardlinksReuseSameWatch) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  TempPath link1(root.path() + "/link1");
+  const int rc = link(file1.path().c_str(), link1.path().c_str());
+  // link(2) is only supported on tmpfs in the sandbox.
+  SKIP_IF(IsRunningOnGvisor() && rc != 0 &&
+          (errno == EPERM || errno == ENOENT));
+  ASSERT_THAT(rc, SyscallSucceeds());
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+  const int link1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), link1.path(), IN_ALL_EVENTS));
+
+  // The watch descriptors for watches on different links to the same file
+  // should be identical.
+  EXPECT_NE(root_wd, file1_wd);
+  EXPECT_EQ(file1_wd, link1_wd);
+
+  FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+
+  std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events,
+              AreUnordered({Event(IN_OPEN, root_wd, Basename(file1.path())),
+                            Event(IN_OPEN, file1_wd)}));
+
+  // For the next step, we want to ensure all fds to the file are closed. Do
+  // that now and drain the resulting events.
+  file1_fd.reset();
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events,
+              Are({Event(IN_CLOSE_WRITE, root_wd, Basename(file1.path())),
+                   Event(IN_CLOSE_WRITE, file1_wd)}));
+
+  // Try removing the link and let's see what events show up. Note that after
+  // this, we still have a link to the file so the watch shouldn't be
+  // automatically removed.
+  const std::string link1_path = link1.reset();
+
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_ATTRIB, link1_wd),
+                           Event(IN_DELETE, root_wd, Basename(link1_path))}));
+
+  // Now remove the other link. Since this is the last link to the file, the
+  // watch should be automatically removed.
+  const std::string file1_path = file1.reset();
+
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(
+      events,
+      AreUnordered({Event(IN_ATTRIB, file1_wd), Event(IN_DELETE_SELF, file1_wd),
+                    Event(IN_IGNORED, file1_wd),
+                    Event(IN_DELETE, root_wd, Basename(file1_path))}));
+}
+
+// Calling mkdir within "parent/child" should generate an event for child, but
+// not parent.
+TEST(Inotify, MkdirGeneratesCreateEventWithDirFlag) {
+  const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath child =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path()));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), parent.path(), IN_ALL_EVENTS));
+  const int child_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), child.path(), IN_ALL_EVENTS));
+
+  const TempPath dir1(NewTempAbsPathInDir(child.path()));
+  ASSERT_THAT(mkdir(dir1.path().c_str(), 0777), SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(
+      events,
+      Are({Event(IN_CREATE | IN_ISDIR, child_wd, Basename(dir1.path()))}));
+}
+
+TEST(Inotify, MultipleInotifyInstancesAndWatchesAllGetEvents) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+  constexpr int kNumFds = 30;
+  std::vector<FileDescriptor> inotify_fds;
+
+  for (int i = 0; i < kNumFds; ++i) {
+    const DisableSave ds;  // Too expensive.
+    inotify_fds.emplace_back(
+        ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)));
+    const FileDescriptor& fd = inotify_fds[inotify_fds.size() - 1];  // Back.
+    ASSERT_NO_ERRNO_AND_VALUE(
+        InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+    ASSERT_NO_ERRNO_AND_VALUE(
+        InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+  }
+
+  const std::string data = "some content";
+  EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()),
+              SyscallSucceeds());
+
+  for (const FileDescriptor& fd : inotify_fds) {
+    const DisableSave ds;  // Too expensive.
+    const std::vector<Event> events =
+        ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+    if (events.size() >= 2) {
+      EXPECT_EQ(events[0].mask, IN_MODIFY);
+      EXPECT_EQ(events[0].wd, 1);
+      EXPECT_EQ(events[0].name, Basename(file1.path()));
+      EXPECT_EQ(events[1].mask, IN_MODIFY);
+      EXPECT_EQ(events[1].wd, 2);
+      EXPECT_EQ(events[1].name, "");
+    }
+  }
+}
+
+TEST(Inotify, EventsGoUpAtMostOneLevel) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath dir1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+  const int dir1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), dir1.path(), IN_ALL_EVENTS));
+
+  const std::string file1_path = file1.reset();
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_DELETE, dir1_wd, Basename(file1_path))}));
+}
+
+TEST(Inotify, DuplicateWatchReturnsSameWatchDescriptor) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int wd1 = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+  const int wd2 = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  EXPECT_EQ(wd1, wd2);
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  // The watch shouldn't be duplicated, we only expect one event.
+  ASSERT_THAT(events, Are({Event(IN_OPEN, wd1)}));
+}
+
+TEST(Inotify, UnmatchedEventsAreDiscarded) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ACCESS));
+
+  FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+
+  std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  // We only asked for access events, the open event should be discarded.
+  ASSERT_THAT(events, Are({}));
+
+  // IN_IGNORED events are always generated, regardless of the mask.
+  file1_fd.reset();
+  file1.reset();
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_IGNORED, wd)}));
+}
+
+TEST(Inotify, AddWatchWithInvalidEventMaskFails) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  EXPECT_THAT(inotify_add_watch(fd.get(), root.path().c_str(), 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Inotify, AddWatchOnInvalidPathFails) {
+  const TempPath nonexistent(NewTempAbsPath());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  // Non-existent path.
+  EXPECT_THAT(
+      inotify_add_watch(fd.get(), nonexistent.path().c_str(), IN_CREATE),
+      SyscallFailsWithErrno(ENOENT));
+
+  // Garbage path pointer.
+  EXPECT_THAT(inotify_add_watch(fd.get(), nullptr, IN_CREATE),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(Inotify, InOnlyDirFlagRespected) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  EXPECT_THAT(
+      inotify_add_watch(fd.get(), root.path().c_str(), IN_ACCESS | IN_ONLYDIR),
+      SyscallSucceeds());
+
+  EXPECT_THAT(
+      inotify_add_watch(fd.get(), file1.path().c_str(), IN_ACCESS | IN_ONLYDIR),
+      SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(Inotify, MaskAddMergesWithExistingEventMask) {
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_OPEN | IN_CLOSE_WRITE));
+
+  const std::string data = "some content";
+  EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()),
+              SyscallSucceeds());
+
+  // We shouldn't get any events, since IN_MODIFY wasn't in the event mask.
+  std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({}));
+
+  // Add IN_MODIFY to event mask.
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_MODIFY | IN_MASK_ADD));
+
+  EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()),
+              SyscallSucceeds());
+
+  // This time we should get the modify event.
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_MODIFY, wd)}));
+
+  // Now close the fd. If the modify event was added to the event mask rather
+  // than replacing the event mask we won't get the close event.
+  file1_fd.reset();
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_CLOSE_WRITE, wd)}));
+}
+
+// Test that control events bits are not considered when checking event mask.
+TEST(Inotify, ControlEvents) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), dir.path(), IN_ACCESS));
+
+  // Check that events in the mask are dispatched and that control bits are
+  // part of the event mask.
+  std::vector<std::string> files =
+      ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), false));
+  ASSERT_EQ(files.size(), 2);
+
+  const std::vector<Event> events1 =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events1, Are({Event(IN_ACCESS | IN_ISDIR, wd)}));
+
+  // Check that events not in the mask are discarded.
+  const FileDescriptor dir_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY));
+
+  const std::vector<Event> events2 =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  ASSERT_THAT(events2, Are({}));
+}
+
+// Regression test to ensure epoll and directory access doesn't deadlock.
+TEST(Inotify, EpollNoDeadlock) {
+  const DisableSave ds;  // Too many syscalls.
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  // Create lots of directories and watch all of them.
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  std::vector<TempPath> children;
+  for (size_t i = 0; i < 1000; ++i) {
+    auto child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+    ASSERT_NO_ERRNO_AND_VALUE(
+        InotifyAddWatch(fd.get(), child.path(), IN_ACCESS));
+    children.emplace_back(std::move(child));
+  }
+
+  // Run epoll_wait constantly in a separate thread.
+  std::atomic<bool> done(false);
+  ScopedThread th([&fd, &done] {
+    for (auto start = absl::Now(); absl::Now() - start < absl::Seconds(5);) {
+      FileDescriptor epoll_fd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+      ASSERT_NO_ERRNO(RegisterEpollFD(epoll_fd.get(), fd.get(),
+                                      EPOLLIN | EPOLLOUT | EPOLLET, 0));
+      struct epoll_event result[1];
+      EXPECT_THAT(RetryEINTR(epoll_wait)(epoll_fd.get(), result, 1, -1),
+                  SyscallSucceedsWithValue(1));
+
+      sched_yield();
+    }
+    done = true;
+  });
+
+  // While epoll thread is running, constantly access all directories to
+  // generate inotify events.
+  while (!done) {
+    std::vector<std::string> files =
+        ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false));
+    ASSERT_EQ(files.size(), 1002);
+    for (const auto& child : files) {
+      if (child == "." || child == "..") {
+        continue;
+      }
+      ASSERT_NO_ERRNO_AND_VALUE(ListDir(JoinPath(root.path(), child), false));
+    }
+    sched_yield();
+  }
+}
+
+// On Linux, inotify behavior is not very consistent with splice(2). We try our
+// best to emulate Linux for very basic calls to splice.
+TEST(Inotify, SpliceOnWatchTarget) {
+  int pipes[2];
+  ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds());
+
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor inotify_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      dir.path(), "some content", TempPath::kDefaultFileMode));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+  const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(inotify_fd.get(), dir.path(), IN_ALL_EVENTS));
+  const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS));
+
+  EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr, 1, /*flags=*/0),
+              SyscallSucceedsWithValue(1));
+
+  // Surprisingly, events are not generated in Linux if we read from a file.
+  std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  ASSERT_THAT(events, Are({}));
+
+  EXPECT_THAT(splice(pipes[0], nullptr, fd.get(), nullptr, 1, /*flags=*/0),
+              SyscallSucceedsWithValue(1));
+
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  ASSERT_THAT(events, Are({
+                          Event(IN_MODIFY, dir_wd, Basename(file.path())),
+                          Event(IN_MODIFY, file_wd),
+                      }));
+}
+
+TEST(Inotify, SpliceOnInotifyFD) {
+  int pipes[2];
+  ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds());
+
+  const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      root.path(), "some content", TempPath::kDefaultFileMode));
+
+  const FileDescriptor file1_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+  const int watcher = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+  char buf;
+  EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+  EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr,
+                     sizeof(struct inotify_event) + 1, SPLICE_F_NONBLOCK),
+              SyscallSucceedsWithValue(sizeof(struct inotify_event)));
+
+  const FileDescriptor read_fd(pipes[0]);
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(read_fd.get()));
+  ASSERT_THAT(events, Are({Event(IN_ACCESS, watcher)}));
+}
+
+// Watches on a parent should not be triggered by actions on a hard link to one
+// of its children that has a different parent.
+TEST(Inotify, LinkOnOtherParent) {
+  const TempPath dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+  std::string link_path = NewTempAbsPathInDir(dir2.path());
+
+  const int rc = link(file.path().c_str(), link_path.c_str());
+  // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox.
+  SKIP_IF(IsRunningOnGvisor() && rc != 0 &&
+          (errno == EPERM || errno == ENOENT));
+  ASSERT_THAT(rc, SyscallSucceeds());
+
+  const FileDescriptor inotify_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(inotify_fd.get(), dir1.path(), IN_ALL_EVENTS));
+
+  // Perform various actions on the link outside of dir1, which should trigger
+  // no inotify events.
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(link_path.c_str(), O_RDWR));
+  int val = 0;
+  ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+  ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+  ASSERT_THAT(ftruncate(fd.get(), 12345), SyscallSucceeds());
+  ASSERT_THAT(unlink(link_path.c_str()), SyscallSucceeds());
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({}));
+}
+
+TEST(Inotify, Xattr) {
+  // TODO(gvisor.dev/issue/1636): Support extended attributes in runsc gofer.
+  SKIP_IF(IsRunningOnGvisor());
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string path = file.path();
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_RDWR));
+  const FileDescriptor inotify_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(inotify_fd.get(), path, IN_ALL_EVENTS));
+
+  const char* cpath = path.c_str();
+  const char* name = "user.test";
+  int val = 123;
+  ASSERT_THAT(setxattr(cpath, name, &val, sizeof(val), /*flags=*/0),
+              SyscallSucceeds());
+  std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)}));
+
+  ASSERT_THAT(getxattr(cpath, name, &val, sizeof(val)), SyscallSucceeds());
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({}));
+
+  char list[100];
+  ASSERT_THAT(listxattr(cpath, list, sizeof(list)), SyscallSucceeds());
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({}));
+
+  ASSERT_THAT(removexattr(cpath, name), SyscallSucceeds());
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)}));
+
+  ASSERT_THAT(fsetxattr(fd.get(), name, &val, sizeof(val), /*flags=*/0),
+              SyscallSucceeds());
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)}));
+
+  ASSERT_THAT(fgetxattr(fd.get(), name, &val, sizeof(val)), SyscallSucceeds());
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({}));
+
+  ASSERT_THAT(flistxattr(fd.get(), list, sizeof(list)), SyscallSucceeds());
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({}));
+
+  ASSERT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds());
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)}));
+}
+
+TEST(Inotify, Exec) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath bin = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(dir.path(), "/bin/true"));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(fd.get(), bin.path(), IN_ALL_EVENTS));
+
+  // Perform exec.
+  ScopedThread t([&bin]() {
+    ASSERT_THAT(execl(bin.path().c_str(), bin.path().c_str(), (char*)nullptr),
+                SyscallSucceeds());
+  });
+  t.Join();
+
+  std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+  EXPECT_THAT(events, Are({Event(IN_OPEN, wd), Event(IN_ACCESS, wd)}));
+}
+
+// Watches without IN_EXCL_UNLINK, should continue to emit events for file
+// descriptors after their corresponding files have been unlinked.
+//
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) {
+  const DisableSave ds;
+
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(dir.path(), "123", TempPath::kDefaultFileMode));
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  const FileDescriptor inotify_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(inotify_fd.get(), dir.path(), IN_ALL_EVENTS));
+  const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS));
+
+  ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds());
+  int val = 0;
+  ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+  ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+  std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({
+                          Event(IN_ATTRIB, file_wd),
+                          Event(IN_DELETE, dir_wd, Basename(file.path())),
+                          Event(IN_ACCESS, dir_wd, Basename(file.path())),
+                          Event(IN_ACCESS, file_wd),
+                          Event(IN_MODIFY, dir_wd, Basename(file.path())),
+                          Event(IN_MODIFY, file_wd),
+                      }));
+
+  fd.reset();
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({
+                          Event(IN_CLOSE_WRITE, dir_wd, Basename(file.path())),
+                          Event(IN_CLOSE_WRITE, file_wd),
+                          Event(IN_DELETE_SELF, file_wd),
+                          Event(IN_IGNORED, file_wd),
+                      }));
+}
+
+// Watches created with IN_EXCL_UNLINK will stop emitting events on fds for
+// children that have already been unlinked.
+//
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, ExcludeUnlink_NoRandomSave) {
+  const DisableSave ds;
+  // TODO(gvisor.dev/issue/1624): This test fails on VFS1.
+  SKIP_IF(IsRunningWithVFS1());
+
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  const FileDescriptor inotify_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+      inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+  const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+      inotify_fd.get(), file.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+
+  // Unlink the child, which should cause further operations on the open file
+  // descriptor to be ignored.
+  ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds());
+  int val = 0;
+  ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+  ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds());
+  std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({
+                          Event(IN_ATTRIB, file_wd),
+                          Event(IN_DELETE, dir_wd, Basename(file.path())),
+                      }));
+
+  fd.reset();
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  ASSERT_THAT(events, Are({
+                          Event(IN_DELETE_SELF, file_wd),
+                          Event(IN_IGNORED, file_wd),
+                      }));
+}
+
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, ExcludeUnlinkDirectory_NoRandomSave) {
+  // TODO(gvisor.dev/issue/1624): This test fails on VFS1. Remove once VFS1 is
+  // deleted.
+  SKIP_IF(IsRunningWithVFS1());
+
+  const DisableSave ds;
+
+  const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath dir =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path()));
+  std::string dirPath = dir.path();
+  const FileDescriptor inotify_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dirPath.c_str(), O_RDONLY | O_DIRECTORY));
+  const int parent_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+      inotify_fd.get(), parent.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+  const int self_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+      inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+
+  // Unlink the dir, and then close the open fd.
+  ASSERT_THAT(rmdir(dirPath.c_str()), SyscallSucceeds());
+  dir.reset();
+
+  std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  // No close event should appear.
+  ASSERT_THAT(events,
+              Are({Event(IN_DELETE | IN_ISDIR, parent_wd, Basename(dirPath))}));
+
+  fd.reset();
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  ASSERT_THAT(events, Are({
+                          Event(IN_DELETE_SELF, self_wd),
+                          Event(IN_IGNORED, self_wd),
+                      }));
+}
+
+// If "dir/child" and "dir/child2" are links to the same file, and "dir/child"
+// is unlinked, a watch on "dir" with IN_EXCL_UNLINK will exclude future events
+// for fds on "dir/child" but not "dir/child2".
+//
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, ExcludeUnlinkMultipleChildren_NoRandomSave) {
+  const DisableSave ds;
+  // TODO(gvisor.dev/issue/1624): This test fails on VFS1.
+  SKIP_IF(IsRunningWithVFS1());
+
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+  std::string path1 = file.path();
+  std::string path2 = NewTempAbsPathInDir(dir.path());
+
+  const int rc = link(path1.c_str(), path2.c_str());
+  // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox.
+  SKIP_IF(IsRunningOnGvisor() && rc != 0 &&
+          (errno == EPERM || errno == ENOENT));
+  ASSERT_THAT(rc, SyscallSucceeds());
+  const FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path1.c_str(), O_RDWR));
+  const FileDescriptor fd2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path2.c_str(), O_RDWR));
+
+  const FileDescriptor inotify_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+      inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+
+  // After unlinking path1, only events on the fd for path2 should be generated.
+  ASSERT_THAT(unlink(path1.c_str()), SyscallSucceeds());
+  ASSERT_THAT(write(fd1.get(), "x", 1), SyscallSucceeds());
+  ASSERT_THAT(write(fd2.get(), "x", 1), SyscallSucceeds());
+
+  const std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({
+                          Event(IN_DELETE, wd, Basename(path1)),
+                          Event(IN_MODIFY, wd, Basename(path2)),
+                      }));
+}
+
+// On native Linux, actions of data type FSNOTIFY_EVENT_INODE are not affected
+// by IN_EXCL_UNLINK (see
+// fs/notify/inotify/inotify_fsnotify.c:inotify_handle_event). Inode-level
+// events include changes to metadata and extended attributes.
+//
+// We need to disable S/R because there are filesystems where we cannot re-open
+// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
+TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) {
+  const DisableSave ds;
+
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR));
+
+  // NOTE(b/157163751): Create another link before unlinking. This is needed for
+  // the gofer filesystem in gVisor, where open fds will not work once the link
+  // count hits zero. In VFS2, we end up skipping the gofer test anyway, because
+  // hard links are not supported for gofer fs.
+  if (IsRunningOnGvisor()) {
+    std::string link_path = NewTempAbsPath();
+    const int rc = link(file.path().c_str(), link_path.c_str());
+    // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox.
+    SKIP_IF(rc != 0 && (errno == EPERM || errno == ENOENT));
+    ASSERT_THAT(rc, SyscallSucceeds());
+  }
+
+  const FileDescriptor inotify_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+  const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+      inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+  const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+      inotify_fd.get(), file.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+
+  // Even after unlinking, inode-level operations will trigger events regardless
+  // of IN_EXCL_UNLINK.
+  ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds());
+
+  // Perform various actions on fd.
+  ASSERT_THAT(ftruncate(fd.get(), 12345), SyscallSucceeds());
+  std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({
+                          Event(IN_ATTRIB, file_wd),
+                          Event(IN_DELETE, dir_wd, Basename(file.path())),
+                          Event(IN_MODIFY, dir_wd, Basename(file.path())),
+                          Event(IN_MODIFY, file_wd),
+                      }));
+
+  const struct timeval times[2] = {{1, 0}, {2, 0}};
+  ASSERT_THAT(futimes(fd.get(), times), SyscallSucceeds());
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({
+                          Event(IN_ATTRIB, dir_wd, Basename(file.path())),
+                          Event(IN_ATTRIB, file_wd),
+                      }));
+
+  // S/R is disabled on this entire test due to behavior with unlink; it must
+  // also be disabled after this point because of fchmod.
+  ASSERT_THAT(fchmod(fd.get(), 0777), SyscallSucceeds());
+  events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({
+                          Event(IN_ATTRIB, dir_wd, Basename(file.path())),
+                          Event(IN_ATTRIB, file_wd),
+                      }));
+}
+
+TEST(Inotify, OneShot) {
+  // TODO(gvisor.dev/issue/1624): IN_ONESHOT not supported in VFS1.
+  SKIP_IF(IsRunningWithVFS1());
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor inotify_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+  const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+      InotifyAddWatch(inotify_fd.get(), file.path(), IN_MODIFY | IN_ONESHOT));
+
+  // Open an fd, write to it, and then close it.
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY));
+  ASSERT_THAT(write(fd.get(), "x", 1), SyscallSucceedsWithValue(1));
+  fd.reset();
+
+  // We should get a single event followed by IN_IGNORED indicating removal
+  // of the one-shot watch. Prior activity (i.e. open) that is not in the mask
+  // should not trigger removal, and activity after removal (i.e. close) should
+  // not generate events.
+  std::vector<Event> events =
+      ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+  EXPECT_THAT(events, Are({
+                          Event(IN_MODIFY, wd),
+                          Event(IN_IGNORED, wd),
+                      }));
+
+  // The watch should already have been removed.
+  EXPECT_THAT(inotify_rm_watch(inotify_fd.get(), wd),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// This test helps verify that the lock order of filesystem and inotify locks
+// is respected when inotify instances and watch targets are concurrently being
+// destroyed.
+TEST(InotifyTest, InotifyAndTargetDestructionDoNotDeadlock_NoRandomSave) {
+  const DisableSave ds;  // Too many syscalls.
+
+  // A file descriptor protected by a mutex. This ensures that while a
+  // descriptor is in use, it cannot be closed and reused for a different file
+  // description.
+  struct atomic_fd {
+    int fd;
+    absl::Mutex mu;
+  };
+
+  // Set up initial inotify instances.
+  constexpr int num_fds = 3;
+  std::vector<atomic_fd> fds(num_fds);
+  for (int i = 0; i < num_fds; i++) {
+    int fd;
+    ASSERT_THAT(fd = inotify_init1(IN_NONBLOCK), SyscallSucceeds());
+    fds[i].fd = fd;
+  }
+
+  // Set up initial watch targets.
+  std::vector<std::string> paths;
+  for (int i = 0; i < 3; i++) {
+    paths.push_back(NewTempAbsPath());
+    ASSERT_THAT(mknod(paths[i].c_str(), S_IFREG | 0600, 0), SyscallSucceeds());
+  }
+
+  constexpr absl::Duration runtime = absl::Seconds(4);
+
+  // Constantly replace each inotify instance with a new one.
+  auto replace_fds = [&] {
+    for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+      for (auto& afd : fds) {
+        int new_fd;
+        ASSERT_THAT(new_fd = inotify_init1(IN_NONBLOCK), SyscallSucceeds());
+        absl::MutexLock l(&afd.mu);
+        ASSERT_THAT(close(afd.fd), SyscallSucceeds());
+        afd.fd = new_fd;
+        for (auto& p : paths) {
+          // inotify_add_watch may fail if the file at p was deleted.
+          ASSERT_THAT(inotify_add_watch(afd.fd, p.c_str(), IN_ALL_EVENTS),
+                      AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ENOENT)));
+        }
+      }
+      sched_yield();
+    }
+  };
+
+  std::list<ScopedThread> ts;
+  for (int i = 0; i < 3; i++) {
+    ts.emplace_back(replace_fds);
+  }
+
+  // Constantly replace each watch target with a new one.
+  for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+    for (auto& p : paths) {
+      ASSERT_THAT(unlink(p.c_str()), SyscallSucceeds());
+      ASSERT_THAT(mknod(p.c_str(), S_IFREG | 0600, 0), SyscallSucceeds());
+    }
+    sched_yield();
+  }
+}
+
+// This test helps verify that the lock order of filesystem and inotify locks
+// is respected when adding/removing watches occurs concurrently with the
+// removal of their targets.
+TEST(InotifyTest, AddRemoveUnlinkDoNotDeadlock_NoRandomSave) {
+  const DisableSave ds;  // Too many syscalls.
+
+  // Set up inotify instances.
+  constexpr int num_fds = 3;
+  std::vector<int> fds(num_fds);
+  for (int i = 0; i < num_fds; i++) {
+    ASSERT_THAT(fds[i] = inotify_init1(IN_NONBLOCK), SyscallSucceeds());
+  }
+
+  // Set up initial watch targets.
+  std::vector<std::string> paths;
+  for (int i = 0; i < 3; i++) {
+    paths.push_back(NewTempAbsPath());
+    ASSERT_THAT(mknod(paths[i].c_str(), S_IFREG | 0600, 0), SyscallSucceeds());
+  }
+
+  constexpr absl::Duration runtime = absl::Seconds(1);
+
+  // Constantly add/remove watches for each inotify instance/watch target pair.
+  auto add_remove_watches = [&] {
+    for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+      for (int fd : fds) {
+        for (auto& p : paths) {
+          // Do not assert on inotify_add_watch and inotify_rm_watch. They may
+          // fail if the file at p was deleted. inotify_add_watch may also fail
+          // if another thread beat us to adding a watch.
+          const int wd = inotify_add_watch(fd, p.c_str(), IN_ALL_EVENTS);
+          if (wd > 0) {
+            inotify_rm_watch(fd, wd);
+          }
+        }
+      }
+      sched_yield();
+    }
+  };
+
+  std::list<ScopedThread> ts;
+  for (int i = 0; i < 15; i++) {
+    ts.emplace_back(add_remove_watches);
+  }
+
+  // Constantly replace each watch target with a new one.
+  for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+    for (auto& p : paths) {
+      ASSERT_THAT(unlink(p.c_str()), SyscallSucceeds());
+      ASSERT_THAT(mknod(p.c_str(), S_IFREG | 0600, 0), SyscallSucceeds());
+    }
+    sched_yield();
+  }
+}
+
+// This test helps verify that the lock order of filesystem and inotify locks
+// is respected when many inotify events and filesystem operations occur
+// simultaneously.
+TEST(InotifyTest, NotifyNoDeadlock_NoRandomSave) {
+  const DisableSave ds;  // Too many syscalls.
+
+  const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string dir = parent.path();
+
+  // mu protects file, which will change on rename.
+  absl::Mutex mu;
+  std::string file = NewTempAbsPathInDir(dir);
+  ASSERT_THAT(mknod(file.c_str(), 0644 | S_IFREG, 0), SyscallSucceeds());
+
+  const absl::Duration runtime = absl::Milliseconds(300);
+
+  // Add/remove watches on dir and file.
+  ScopedThread add_remove_watches([&] {
+    const FileDescriptor ifd =
+        ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+    int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(
+        InotifyAddWatch(ifd.get(), dir, IN_ALL_EVENTS));
+    int file_wd;
+    {
+      absl::ReaderMutexLock l(&mu);
+      file_wd = ASSERT_NO_ERRNO_AND_VALUE(
+          InotifyAddWatch(ifd.get(), file, IN_ALL_EVENTS));
+    }
+    for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+      ASSERT_THAT(inotify_rm_watch(ifd.get(), file_wd), SyscallSucceeds());
+      ASSERT_THAT(inotify_rm_watch(ifd.get(), dir_wd), SyscallSucceeds());
+      dir_wd = ASSERT_NO_ERRNO_AND_VALUE(
+          InotifyAddWatch(ifd.get(), dir, IN_ALL_EVENTS));
+      {
+        absl::ReaderMutexLock l(&mu);
+        file_wd = ASSERT_NO_ERRNO_AND_VALUE(
+            InotifyAddWatch(ifd.get(), file, IN_ALL_EVENTS));
+      }
+      sched_yield();
+    }
+  });
+
+  // Modify attributes on dir and file.
+  ScopedThread stats([&] {
+    int fd, dir_fd;
+    {
+      absl::ReaderMutexLock l(&mu);
+      ASSERT_THAT(fd = open(file.c_str(), O_RDONLY), SyscallSucceeds());
+    }
+    ASSERT_THAT(dir_fd = open(dir.c_str(), O_RDONLY | O_DIRECTORY),
+                SyscallSucceeds());
+    const struct timeval times[2] = {{1, 0}, {2, 0}};
+
+    for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+      {
+        absl::ReaderMutexLock l(&mu);
+        EXPECT_THAT(utimes(file.c_str(), times), SyscallSucceeds());
+      }
+      EXPECT_THAT(futimes(fd, times), SyscallSucceeds());
+      EXPECT_THAT(utimes(dir.c_str(), times), SyscallSucceeds());
+      EXPECT_THAT(futimes(dir_fd, times), SyscallSucceeds());
+      sched_yield();
+    }
+  });
+
+  // Modify extended attributes on dir and file.
+  ScopedThread xattrs([&] {
+    // TODO(gvisor.dev/issue/1636): Support extended attributes in runsc gofer.
+    if (!IsRunningOnGvisor()) {
+      int fd;
+      {
+        absl::ReaderMutexLock l(&mu);
+        ASSERT_THAT(fd = open(file.c_str(), O_RDONLY), SyscallSucceeds());
+      }
+
+      const char* name = "user.test";
+      int val = 123;
+      for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+        {
+          absl::ReaderMutexLock l(&mu);
+          ASSERT_THAT(
+              setxattr(file.c_str(), name, &val, sizeof(val), /*flags=*/0),
+              SyscallSucceeds());
+          ASSERT_THAT(removexattr(file.c_str(), name), SyscallSucceeds());
+        }
+
+        ASSERT_THAT(fsetxattr(fd, name, &val, sizeof(val), /*flags=*/0),
+                    SyscallSucceeds());
+        ASSERT_THAT(fremovexattr(fd, name), SyscallSucceeds());
+        sched_yield();
+      }
+    }
+  });
+
+  // Read and write file's contents. Read and write dir's entries.
+  ScopedThread read_write([&] {
+    int fd;
+    {
+      absl::ReaderMutexLock l(&mu);
+      ASSERT_THAT(fd = open(file.c_str(), O_RDWR), SyscallSucceeds());
+    }
+    for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+      int val = 123;
+      ASSERT_THAT(write(fd, &val, sizeof(val)), SyscallSucceeds());
+      ASSERT_THAT(read(fd, &val, sizeof(val)), SyscallSucceeds());
+      TempPath new_file =
+          ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir));
+      ASSERT_NO_ERRNO(ListDir(dir, false));
+      new_file.reset();
+      sched_yield();
+    }
+  });
+
+  // Rename file.
+  for (auto start = absl::Now(); absl::Now() - start < runtime;) {
+    const std::string new_path = NewTempAbsPathInDir(dir);
+    {
+      absl::WriterMutexLock l(&mu);
+      ASSERT_THAT(rename(file.c_str(), new_path.c_str()), SyscallSucceeds());
+      file = new_path;
+    }
+    sched_yield();
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/ioctl.cc b/test/syscalls/linux/ioctl.cc
new file mode 100644
index 000000000..b0a07a064
--- /dev/null
+++ b/test/syscalls/linux/ioctl.cc
@@ -0,0 +1,406 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+bool CheckNonBlocking(int fd) {
+  int ret = fcntl(fd, F_GETFL, 0);
+  TEST_CHECK(ret != -1);
+  return (ret & O_NONBLOCK) == O_NONBLOCK;
+}
+
+bool CheckCloExec(int fd) {
+  int ret = fcntl(fd, F_GETFD, 0);
+  TEST_CHECK(ret != -1);
+  return (ret & FD_CLOEXEC) == FD_CLOEXEC;
+}
+
+class IoctlTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    ASSERT_THAT(fd_ = open("/dev/null", O_RDONLY), SyscallSucceeds());
+  }
+
+  void TearDown() override {
+    if (fd_ >= 0) {
+      ASSERT_THAT(close(fd_), SyscallSucceeds());
+      fd_ = -1;
+    }
+  }
+
+  int fd() const { return fd_; }
+
+ private:
+  int fd_ = -1;
+};
+
+TEST_F(IoctlTest, BadFileDescriptor) {
+  EXPECT_THAT(ioctl(-1 /* fd */, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(IoctlTest, InvalidControlNumber) {
+  EXPECT_THAT(ioctl(STDOUT_FILENO, 0), SyscallFailsWithErrno(ENOTTY));
+}
+
+TEST_F(IoctlTest, FIONBIOSucceeds) {
+  EXPECT_FALSE(CheckNonBlocking(fd()));
+  int set = 1;
+  EXPECT_THAT(ioctl(fd(), FIONBIO, &set), SyscallSucceeds());
+  EXPECT_TRUE(CheckNonBlocking(fd()));
+  set = 0;
+  EXPECT_THAT(ioctl(fd(), FIONBIO, &set), SyscallSucceeds());
+  EXPECT_FALSE(CheckNonBlocking(fd()));
+}
+
+TEST_F(IoctlTest, FIONBIOFails) {
+  EXPECT_THAT(ioctl(fd(), FIONBIO, nullptr), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(IoctlTest, FIONCLEXSucceeds) {
+  EXPECT_THAT(ioctl(fd(), FIONCLEX), SyscallSucceeds());
+  EXPECT_FALSE(CheckCloExec(fd()));
+}
+
+TEST_F(IoctlTest, FIOCLEXSucceeds) {
+  EXPECT_THAT(ioctl(fd(), FIOCLEX), SyscallSucceeds());
+  EXPECT_TRUE(CheckCloExec(fd()));
+}
+
+TEST_F(IoctlTest, FIOASYNCFails) {
+  EXPECT_THAT(ioctl(fd(), FIOASYNC, nullptr), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(IoctlTest, FIOASYNCSucceeds) {
+  // Not all FDs support FIOASYNC.
+  const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  int before = -1;
+  ASSERT_THAT(before = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+
+  int set = 1;
+  EXPECT_THAT(ioctl(s.get(), FIOASYNC, &set), SyscallSucceeds());
+
+  int after_set = -1;
+  ASSERT_THAT(after_set = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+  EXPECT_EQ(after_set, before | O_ASYNC) << "before was " << before;
+
+  set = 0;
+  EXPECT_THAT(ioctl(s.get(), FIOASYNC, &set), SyscallSucceeds());
+
+  ASSERT_THAT(fcntl(s.get(), F_GETFL), SyscallSucceedsWithValue(before));
+}
+
+/* Count of the number of SIGIOs handled. */
+static volatile int io_received = 0;
+
+void inc_io_handler(int sig, siginfo_t* siginfo, void* arg) { io_received++; }
+
+TEST_F(IoctlTest, FIOASYNCNoTarget) {
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  // Count SIGIOs received.
+  io_received = 0;
+  struct sigaction sa;
+  sa.sa_sigaction = inc_io_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+  // Actually allow SIGIO delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+  int set = 1;
+  EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+
+  constexpr char kData[] = "abc";
+  ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)),
+              SyscallSucceedsWithValue(sizeof(kData)));
+
+  EXPECT_EQ(io_received, 0);
+}
+
+TEST_F(IoctlTest, FIOASYNCSelfTarget) {
+  // FIXME(b/120624367): gVisor erroneously sends SIGIO on close(2), which would
+  // kill the test when pair goes out of scope. Temporarily ignore SIGIO so that
+  // that the close signal is ignored.
+  struct sigaction sa;
+  sa.sa_handler = SIG_IGN;
+  auto early_sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  // Count SIGIOs received.
+  io_received = 0;
+  sa.sa_sigaction = inc_io_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+  // Actually allow SIGIO delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+  int set = 1;
+  EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+
+  pid_t pid = getpid();
+  EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+  constexpr char kData[] = "abc";
+  ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)),
+              SyscallSucceedsWithValue(sizeof(kData)));
+
+  EXPECT_EQ(io_received, 1);
+}
+
+// Equivalent to FIOASYNCSelfTarget except that FIOSETOWN is called before
+// FIOASYNC.
+TEST_F(IoctlTest, FIOASYNCSelfTarget2) {
+  // FIXME(b/120624367): gVisor erroneously sends SIGIO on close(2), which would
+  // kill the test when pair goes out of scope. Temporarily ignore SIGIO so that
+  // that the close signal is ignored.
+  struct sigaction sa;
+  sa.sa_handler = SIG_IGN;
+  auto early_sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  // Count SIGIOs received.
+  io_received = 0;
+  sa.sa_sigaction = inc_io_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+  // Actually allow SIGIO delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+  pid_t pid = -1;
+  EXPECT_THAT(pid = getpid(), SyscallSucceeds());
+  EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+  int set = 1;
+  EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+
+  constexpr char kData[] = "abc";
+  ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)),
+              SyscallSucceedsWithValue(sizeof(kData)));
+
+  EXPECT_EQ(io_received, 1);
+}
+
+// Check that closing an FD does not result in an event.
+TEST_F(IoctlTest, FIOASYNCSelfTargetClose) {
+  // Count SIGIOs received.
+  struct sigaction sa;
+  io_received = 0;
+  sa.sa_sigaction = inc_io_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+  // Actually allow SIGIO delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+  for (int i = 0; i < 2; i++) {
+    auto pair = ASSERT_NO_ERRNO_AND_VALUE(
+        UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+    pid_t pid = getpid();
+    EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+    int set = 1;
+    EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+  }
+
+  // FIXME(b/120624367): gVisor erroneously sends SIGIO on close.
+  SKIP_IF(IsRunningOnGvisor());
+
+  EXPECT_EQ(io_received, 0);
+}
+
+TEST_F(IoctlTest, FIOASYNCInvalidPID) {
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  int set = 1;
+  ASSERT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+  pid_t pid = INT_MAX;
+  // This succeeds (with behavior equivalent to a pid of 0) in Linux prior to
+  // f73127356f34 "fs/fcntl: return -ESRCH in f_setown when pid/pgid can't be
+  // found", and fails with EPERM after that commit.
+  EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid),
+              AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ESRCH)));
+}
+
+TEST_F(IoctlTest, FIOASYNCUnsetTarget) {
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  // Count SIGIOs received.
+  io_received = 0;
+  struct sigaction sa;
+  sa.sa_sigaction = inc_io_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART;
+  auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+  // Actually allow SIGIO delivery.
+  auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+  int set = 1;
+  EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+
+  pid_t pid = getpid();
+  EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+  // Passing a PID of 0 unsets the target.
+  pid = 0;
+  EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+  constexpr char kData[] = "abc";
+  ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)),
+              SyscallSucceedsWithValue(sizeof(kData)));
+
+  EXPECT_EQ(io_received, 0);
+}
+
+using IoctlTestSIOCGIFCONF = SimpleSocketTest;
+
+TEST_P(IoctlTestSIOCGIFCONF, ValidateNoArrayGetsLength) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Validate that no array can be used to get the length required.
+  struct ifconf ifconf = {};
+  ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds());
+  ASSERT_GT(ifconf.ifc_len, 0);
+}
+
+// This test validates that we will only return a partial array list and not
+// partial ifrreq structs.
+TEST_P(IoctlTestSIOCGIFCONF, ValidateNoPartialIfrsReturned) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  struct ifreq ifr = {};
+  struct ifconf ifconf = {};
+  ifconf.ifc_len = sizeof(ifr) - 1;  // One byte too few.
+  ifconf.ifc_ifcu.ifcu_req = &ifr;
+
+  ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds());
+  ASSERT_EQ(ifconf.ifc_len, 0);
+  ASSERT_EQ(ifr.ifr_name[0], '\0');  // Nothing is returned.
+
+  ifconf.ifc_len = sizeof(ifreq);
+  ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds());
+  ASSERT_GT(ifconf.ifc_len, 0);
+  ASSERT_NE(ifr.ifr_name[0], '\0');  // An interface can now be returned.
+}
+
+TEST_P(IoctlTestSIOCGIFCONF, ValidateLoopbackIsPresent) {
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  struct ifconf ifconf = {};
+  struct ifreq ifr[10] = {};  // Storage for up to 10 interfaces.
+
+  ifconf.ifc_req = ifr;
+  ifconf.ifc_len = sizeof(ifr);
+
+  ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds());
+  size_t num_if = ifconf.ifc_len / sizeof(struct ifreq);
+
+  // We should have at least one interface.
+  ASSERT_GE(num_if, 1);
+
+  // One of the interfaces should be a loopback.
+  bool found_loopback = false;
+  for (size_t i = 0; i < num_if; ++i) {
+    if (strcmp(ifr[i].ifr_name, "lo") == 0) {
+      // SIOCGIFCONF returns the ipv4 address of the interface, let's check it.
+      ASSERT_EQ(ifr[i].ifr_addr.sa_family, AF_INET);
+
+      // Validate the address is correct for loopback.
+      sockaddr_in* sin = reinterpret_cast<sockaddr_in*>(&ifr[i].ifr_addr);
+      ASSERT_EQ(htonl(sin->sin_addr.s_addr), INADDR_LOOPBACK);
+
+      found_loopback = true;
+      break;
+    }
+  }
+  ASSERT_TRUE(found_loopback);
+}
+
+std::vector<SocketKind> IoctlSocketTypes() {
+  return {SimpleSocket(AF_UNIX, SOCK_STREAM, 0),
+          SimpleSocket(AF_UNIX, SOCK_DGRAM, 0),
+          SimpleSocket(AF_INET, SOCK_STREAM, 0),
+          SimpleSocket(AF_INET6, SOCK_STREAM, 0),
+          SimpleSocket(AF_INET, SOCK_DGRAM, 0),
+          SimpleSocket(AF_INET6, SOCK_DGRAM, 0)};
+}
+
+INSTANTIATE_TEST_SUITE_P(IoctlTest, IoctlTestSIOCGIFCONF,
+                         ::testing::ValuesIn(IoctlSocketTypes()));
+
+}  // namespace
+
+TEST_F(IoctlTest, FIOGETOWNSucceeds) {
+  const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  int get = -1;
+  ASSERT_THAT(ioctl(s.get(), FIOGETOWN, &get), SyscallSucceeds());
+  EXPECT_EQ(get, 0);
+}
+
+TEST_F(IoctlTest, SIOCGPGRPSucceeds) {
+  const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  int get = -1;
+  ASSERT_THAT(ioctl(s.get(), SIOCGPGRP, &get), SyscallSucceeds());
+  EXPECT_EQ(get, 0);
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc
new file mode 100644
index 000000000..98d07ae85
--- /dev/null
+++ b/test/syscalls/linux/ip_socket_test_util.cc
@@ -0,0 +1,239 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+
+#include <cstring>
+
+namespace gvisor {
+namespace testing {
+
+uint32_t IPFromInetSockaddr(const struct sockaddr* addr) {
+  auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr);
+  return in_addr->sin_addr.s_addr;
+}
+
+uint16_t PortFromInetSockaddr(const struct sockaddr* addr) {
+  auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr);
+  return ntohs(in_addr->sin_port);
+}
+
+PosixErrorOr<int> InterfaceIndex(std::string name) {
+  int index = if_nametoindex(name.c_str());
+  if (index) {
+    return index;
+  }
+  return PosixError(errno);
+}
+
+namespace {
+
+std::string DescribeSocketType(int type) {
+  return absl::StrCat(((type & SOCK_NONBLOCK) != 0) ? "non-blocking " : "",
+                      ((type & SOCK_CLOEXEC) != 0) ? "close-on-exec " : "");
+}
+
+}  // namespace
+
+SocketPairKind IPv6TCPAcceptBindSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "connected IPv6 TCP socket");
+  return SocketPairKind{
+      description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP,
+      TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, 0,
+                                     /* dual_stack = */ false)};
+}
+
+SocketPairKind IPv4TCPAcceptBindSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "connected IPv4 TCP socket");
+  return SocketPairKind{
+      description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP,
+      TCPAcceptBindSocketPairCreator(AF_INET, type | SOCK_STREAM, 0,
+                                     /* dual_stack = */ false)};
+}
+
+SocketPairKind DualStackTCPAcceptBindSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "connected dual stack TCP socket");
+  return SocketPairKind{
+      description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP,
+      TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, 0,
+                                     /* dual_stack = */ true)};
+}
+
+SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "connected IPv6 TCP socket");
+  return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP,
+                        TCPAcceptBindPersistentListenerSocketPairCreator(
+                            AF_INET6, type | SOCK_STREAM, 0,
+                            /* dual_stack = */ false)};
+}
+
+SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "connected IPv4 TCP socket");
+  return SocketPairKind{description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP,
+                        TCPAcceptBindPersistentListenerSocketPairCreator(
+                            AF_INET, type | SOCK_STREAM, 0,
+                            /* dual_stack = */ false)};
+}
+
+SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "connected dual stack TCP socket");
+  return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP,
+                        TCPAcceptBindPersistentListenerSocketPairCreator(
+                            AF_INET6, type | SOCK_STREAM, 0,
+                            /* dual_stack = */ true)};
+}
+
+SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "connected IPv6 UDP socket");
+  return SocketPairKind{
+      description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP,
+      UDPBidirectionalBindSocketPairCreator(AF_INET6, type | SOCK_DGRAM, 0,
+                                            /* dual_stack = */ false)};
+}
+
+SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "connected IPv4 UDP socket");
+  return SocketPairKind{
+      description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP,
+      UDPBidirectionalBindSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0,
+                                            /* dual_stack = */ false)};
+}
+
+SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "connected dual stack UDP socket");
+  return SocketPairKind{
+      description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP,
+      UDPBidirectionalBindSocketPairCreator(AF_INET6, type | SOCK_DGRAM, 0,
+                                            /* dual_stack = */ true)};
+}
+
+SocketPairKind IPv4UDPUnboundSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket");
+  return SocketPairKind{
+      description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP,
+      UDPUnboundSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0,
+                                  /* dual_stack = */ false)};
+}
+
+SocketKind IPv4UDPUnboundSocket(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket");
+  return SocketKind{
+      description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP,
+      UnboundSocketCreator(AF_INET, type | SOCK_DGRAM, IPPROTO_UDP)};
+}
+
+SocketKind IPv6UDPUnboundSocket(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "IPv6 UDP socket");
+  return SocketKind{
+      description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP,
+      UnboundSocketCreator(AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP)};
+}
+
+SocketKind IPv4TCPUnboundSocket(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "IPv4 TCP socket");
+  return SocketKind{
+      description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP,
+      UnboundSocketCreator(AF_INET, type | SOCK_STREAM, IPPROTO_TCP)};
+}
+
+SocketKind IPv6TCPUnboundSocket(int type) {
+  std::string description =
+      absl::StrCat(DescribeSocketType(type), "IPv6 TCP socket");
+  return SocketKind{
+      description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP,
+      UnboundSocketCreator(AF_INET6, type | SOCK_STREAM, IPPROTO_TCP)};
+}
+
+PosixError IfAddrHelper::Load() {
+  Release();
+  RETURN_ERROR_IF_SYSCALL_FAIL(getifaddrs(&ifaddr_));
+  return NoError();
+}
+
+void IfAddrHelper::Release() {
+  if (ifaddr_) {
+    freeifaddrs(ifaddr_);
+    ifaddr_ = nullptr;
+  }
+}
+
+std::vector<std::string> IfAddrHelper::InterfaceList(int family) const {
+  std::vector<std::string> names;
+  for (auto ifa = ifaddr_; ifa != NULL; ifa = ifa->ifa_next) {
+    if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != family) {
+      continue;
+    }
+    names.emplace(names.end(), ifa->ifa_name);
+  }
+  return names;
+}
+
+const sockaddr* IfAddrHelper::GetAddr(int family, std::string name) const {
+  for (auto ifa = ifaddr_; ifa != NULL; ifa = ifa->ifa_next) {
+    if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != family) {
+      continue;
+    }
+    if (name == ifa->ifa_name) {
+      return ifa->ifa_addr;
+    }
+  }
+  return nullptr;
+}
+
+PosixErrorOr<int> IfAddrHelper::GetIndex(std::string name) const {
+  return InterfaceIndex(name);
+}
+
+std::string GetAddr4Str(const in_addr* a) {
+  char str[INET_ADDRSTRLEN];
+  inet_ntop(AF_INET, a, str, sizeof(str));
+  return std::string(str);
+}
+
+std::string GetAddr6Str(const in6_addr* a) {
+  char str[INET6_ADDRSTRLEN];
+  inet_ntop(AF_INET6, a, str, sizeof(str));
+  return std::string(str);
+}
+
+std::string GetAddrStr(const sockaddr* a) {
+  if (a->sa_family == AF_INET) {
+    auto src = &(reinterpret_cast<const sockaddr_in*>(a)->sin_addr);
+    return GetAddr4Str(src);
+  } else if (a->sa_family == AF_INET6) {
+    auto src = &(reinterpret_cast<const sockaddr_in6*>(a)->sin6_addr);
+    return GetAddr6Str(src);
+  }
+  return std::string("<invalid>");
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h
new file mode 100644
index 000000000..9c3859fcd
--- /dev/null
+++ b/test/syscalls/linux/ip_socket_test_util.h
@@ -0,0 +1,135 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_
+
+#include <arpa/inet.h>
+#include <ifaddrs.h>
+#include <sys/types.h>
+
+#include <string>
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Extracts the IP address from an inet sockaddr in network byte order.
+uint32_t IPFromInetSockaddr(const struct sockaddr* addr);
+
+// Extracts the port from an inet sockaddr in host byte order.
+uint16_t PortFromInetSockaddr(const struct sockaddr* addr);
+
+// InterfaceIndex returns the index of the named interface.
+PosixErrorOr<int> InterfaceIndex(std::string name);
+
+// IPv6TCPAcceptBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with AF_INET6 and the
+// given type bound to the IPv6 loopback.
+SocketPairKind IPv6TCPAcceptBindSocketPair(int type);
+
+// IPv4TCPAcceptBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with AF_INET and the
+// given type bound to the IPv4 loopback.
+SocketPairKind IPv4TCPAcceptBindSocketPair(int type);
+
+// DualStackTCPAcceptBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with AF_INET6 and the
+// given type bound to the IPv4 loopback.
+SocketPairKind DualStackTCPAcceptBindSocketPair(int type);
+
+// IPv6TCPAcceptBindPersistentListenerSocketPair is like
+// IPv6TCPAcceptBindSocketPair except it uses a persistent listening socket to
+// create all socket pairs.
+SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type);
+
+// IPv4TCPAcceptBindPersistentListenerSocketPair is like
+// IPv4TCPAcceptBindSocketPair except it uses a persistent listening socket to
+// create all socket pairs.
+SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type);
+
+// DualStackTCPAcceptBindPersistentListenerSocketPair is like
+// DualStackTCPAcceptBindSocketPair except it uses a persistent listening socket
+// to create all socket pairs.
+SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type);
+
+// IPv6UDPBidirectionalBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and connect() syscalls with AF_INET6 and the
+// given type bound to the IPv6 loopback.
+SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type);
+
+// IPv4UDPBidirectionalBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and connect() syscalls with AF_INET and the
+// given type bound to the IPv4 loopback.
+SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type);
+
+// DualStackUDPBidirectionalBindSocketPair returns a SocketPairKind that
+// represents SocketPairs created with bind() and connect() syscalls with
+// AF_INET6 and the given type bound to the IPv4 loopback.
+SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type);
+
+// IPv4UDPUnboundSocketPair returns a SocketPairKind that represents
+// SocketPairs created with AF_INET and the given type.
+SocketPairKind IPv4UDPUnboundSocketPair(int type);
+
+// IPv4UDPUnboundSocket returns a SocketKind that represents a SimpleSocket
+// created with AF_INET, SOCK_DGRAM, and the given type.
+SocketKind IPv4UDPUnboundSocket(int type);
+
+// IPv6UDPUnboundSocket returns a SocketKind that represents a SimpleSocket
+// created with AF_INET6, SOCK_DGRAM, and the given type.
+SocketKind IPv6UDPUnboundSocket(int type);
+
+// IPv4TCPUnboundSocket returns a SocketKind that represents a SimpleSocket
+// created with AF_INET, SOCK_STREAM and the given type.
+SocketKind IPv4TCPUnboundSocket(int type);
+
+// IPv6TCPUnboundSocket returns a SocketKind that represents a SimpleSocket
+// created with AF_INET6, SOCK_STREAM and the given type.
+SocketKind IPv6TCPUnboundSocket(int type);
+
+// IfAddrHelper is a helper class that determines the local interfaces present
+// and provides functions to obtain their names, index numbers, and IP address.
+class IfAddrHelper {
+ public:
+  IfAddrHelper() : ifaddr_(nullptr) {}
+  ~IfAddrHelper() { Release(); }
+
+  PosixError Load();
+  void Release();
+
+  std::vector<std::string> InterfaceList(int family) const;
+
+  const sockaddr* GetAddr(int family, std::string name) const;
+  PosixErrorOr<int> GetIndex(std::string name) const;
+
+ private:
+  struct ifaddrs* ifaddr_;
+};
+
+// GetAddr4Str returns the given IPv4 network address structure as a string.
+std::string GetAddr4Str(const in_addr* a);
+
+// GetAddr6Str returns the given IPv6 network address structure as a string.
+std::string GetAddr6Str(const in6_addr* a);
+
+// GetAddrStr returns the given IPv4 or IPv6 network address structure as a
+// string.
+std::string GetAddrStr(const sockaddr* a);
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_
diff --git a/test/syscalls/linux/iptables.cc b/test/syscalls/linux/iptables.cc
new file mode 100644
index 000000000..b8e4ece64
--- /dev/null
+++ b/test/syscalls/linux/iptables.cc
@@ -0,0 +1,204 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/iptables.h"
+
+#include <arpa/inet.h>
+#include <linux/capability.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <stdio.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr char kNatTablename[] = "nat";
+constexpr char kErrorTarget[] = "ERROR";
+constexpr size_t kEmptyStandardEntrySize =
+    sizeof(struct ipt_entry) + sizeof(struct ipt_standard_target);
+constexpr size_t kEmptyErrorEntrySize =
+    sizeof(struct ipt_entry) + sizeof(struct ipt_error_target);
+
+TEST(IPTablesBasic, CreateSocket) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int sock;
+  ASSERT_THAT(sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP),
+              SyscallSucceeds());
+
+  ASSERT_THAT(close(sock), SyscallSucceeds());
+}
+
+TEST(IPTablesBasic, FailSockoptNonRaw) {
+  // Even if the user has CAP_NET_RAW, they shouldn't be able to use the
+  // iptables sockopts with a non-raw socket.
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int sock;
+  ASSERT_THAT(sock = socket(AF_INET, SOCK_DGRAM, 0), SyscallSucceeds());
+
+  struct ipt_getinfo info = {};
+  snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  socklen_t info_size = sizeof(info);
+  EXPECT_THAT(getsockopt(sock, IPPROTO_IP, SO_GET_INFO, &info, &info_size),
+              SyscallFailsWithErrno(ENOPROTOOPT));
+
+  ASSERT_THAT(close(sock), SyscallSucceeds());
+}
+
+// Fixture for iptables tests.
+class IPTablesTest : public ::testing::Test {
+ protected:
+  // Creates a socket to be used in tests.
+  void SetUp() override;
+
+  // Closes the socket created by SetUp().
+  void TearDown() override;
+
+  // The socket via which to manipulate iptables.
+  int s_;
+};
+
+void IPTablesTest::SetUp() {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(s_ = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP), SyscallSucceeds());
+}
+
+void IPTablesTest::TearDown() {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  EXPECT_THAT(close(s_), SyscallSucceeds());
+}
+
+// This tests the initial state of a machine with empty iptables. We don't have
+// a guarantee that the iptables are empty when running in native, but we can
+// test that gVisor has the same initial state that a newly-booted Linux machine
+// would have.
+TEST_F(IPTablesTest, InitialState) {
+  SKIP_IF(!IsRunningOnGvisor());
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  //
+  // Get info via sockopt.
+  //
+  struct ipt_getinfo info = {};
+  snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  socklen_t info_size = sizeof(info);
+  ASSERT_THAT(getsockopt(s_, IPPROTO_IP, SO_GET_INFO, &info, &info_size),
+              SyscallSucceeds());
+
+  // The nat table supports PREROUTING, and OUTPUT.
+  unsigned int valid_hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT) |
+                             (1 << NF_IP_POST_ROUTING) | (1 << NF_IP_LOCAL_IN);
+
+  EXPECT_EQ(info.valid_hooks, valid_hooks);
+
+  // Each chain consists of an empty entry with a standard target..
+  EXPECT_EQ(info.hook_entry[NF_IP_PRE_ROUTING], 0);
+  EXPECT_EQ(info.hook_entry[NF_IP_LOCAL_IN], kEmptyStandardEntrySize);
+  EXPECT_EQ(info.hook_entry[NF_IP_LOCAL_OUT], kEmptyStandardEntrySize * 2);
+  EXPECT_EQ(info.hook_entry[NF_IP_POST_ROUTING], kEmptyStandardEntrySize * 3);
+
+  // The underflow points are the same as the entry points.
+  EXPECT_EQ(info.underflow[NF_IP_PRE_ROUTING], 0);
+  EXPECT_EQ(info.underflow[NF_IP_LOCAL_IN], kEmptyStandardEntrySize);
+  EXPECT_EQ(info.underflow[NF_IP_LOCAL_OUT], kEmptyStandardEntrySize * 2);
+  EXPECT_EQ(info.underflow[NF_IP_POST_ROUTING], kEmptyStandardEntrySize * 3);
+
+  // One entry for each chain, plus an error entry at the end.
+  EXPECT_EQ(info.num_entries, 5);
+
+  EXPECT_EQ(info.size, 4 * kEmptyStandardEntrySize + kEmptyErrorEntrySize);
+  EXPECT_EQ(strcmp(info.name, kNatTablename), 0);
+
+  //
+  // Use info to get entries.
+  //
+  socklen_t entries_size = sizeof(struct ipt_get_entries) + info.size;
+  struct ipt_get_entries* entries =
+      static_cast<struct ipt_get_entries*>(malloc(entries_size));
+  snprintf(entries->name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+  entries->size = info.size;
+  ASSERT_THAT(
+      getsockopt(s_, IPPROTO_IP, SO_GET_ENTRIES, entries, &entries_size),
+      SyscallSucceeds());
+
+  // Verify the name and size.
+  ASSERT_EQ(info.size, entries->size);
+  ASSERT_EQ(strcmp(entries->name, kNatTablename), 0);
+
+  // Verify that the entrytable is 4 entries with accept targets and no matches
+  // followed by a single error target.
+  size_t entry_offset = 0;
+  while (entry_offset < entries->size) {
+    struct ipt_entry* entry = reinterpret_cast<struct ipt_entry*>(
+        reinterpret_cast<char*>(entries->entrytable) + entry_offset);
+
+    // ip should be zeroes.
+    struct ipt_ip zeroed = {};
+    EXPECT_EQ(memcmp(static_cast<void*>(&zeroed),
+                     static_cast<void*>(&entry->ip), sizeof(zeroed)),
+              0);
+
+    // target_offset should be zero.
+    EXPECT_EQ(entry->target_offset, sizeof(ipt_entry));
+
+    if (entry_offset < kEmptyStandardEntrySize * 4) {
+      // The first 4 entries are standard targets
+      struct ipt_standard_target* target =
+          reinterpret_cast<struct ipt_standard_target*>(entry->elems);
+      EXPECT_EQ(entry->next_offset, kEmptyStandardEntrySize);
+      EXPECT_EQ(target->target.u.user.target_size, sizeof(*target));
+      EXPECT_EQ(strcmp(target->target.u.user.name, ""), 0);
+      EXPECT_EQ(target->target.u.user.revision, 0);
+      // This is what's returned for an accept verdict. I don't know why.
+      EXPECT_EQ(target->verdict, -NF_ACCEPT - 1);
+    } else {
+      // The last entry is an error target
+      struct ipt_error_target* target =
+          reinterpret_cast<struct ipt_error_target*>(entry->elems);
+      EXPECT_EQ(entry->next_offset, kEmptyErrorEntrySize);
+      EXPECT_EQ(target->target.u.user.target_size, sizeof(*target));
+      EXPECT_EQ(strcmp(target->target.u.user.name, kErrorTarget), 0);
+      EXPECT_EQ(target->target.u.user.revision, 0);
+      EXPECT_EQ(strcmp(target->errorname, kErrorTarget), 0);
+    }
+
+    entry_offset += entry->next_offset;
+  }
+
+  free(entries);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/iptables.h b/test/syscalls/linux/iptables.h
new file mode 100644
index 000000000..0719c60a4
--- /dev/null
+++ b/test/syscalls/linux/iptables.h
@@ -0,0 +1,198 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// There are a number of structs and values that we can't #include because of a
+// difference between C and C++ (C++ won't let you implicitly cast from void* to
+// struct something*). We re-define them here.
+
+#ifndef GVISOR_TEST_SYSCALLS_IPTABLES_TYPES_H_
+#define GVISOR_TEST_SYSCALLS_IPTABLES_TYPES_H_
+
+// Netfilter headers require some headers to preceed them.
+// clang-format off
+#include <netinet/in.h>
+#include <stddef.h>
+// clang-format on
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <stdint.h>
+
+#define ipt_standard_target xt_standard_target
+#define ipt_entry_target xt_entry_target
+#define ipt_error_target xt_error_target
+
+enum SockOpts {
+  // For setsockopt.
+  BASE_CTL = 64,
+  SO_SET_REPLACE = BASE_CTL,
+  SO_SET_ADD_COUNTERS,
+  SO_SET_MAX = SO_SET_ADD_COUNTERS,
+
+  // For getsockopt.
+  SO_GET_INFO = BASE_CTL,
+  SO_GET_ENTRIES,
+  SO_GET_REVISION_MATCH,
+  SO_GET_REVISION_TARGET,
+  SO_GET_MAX = SO_GET_REVISION_TARGET
+};
+
+// ipt_ip specifies basic matching criteria that can be applied by examining
+// only the IP header of a packet.
+struct ipt_ip {
+  // Source IP address.
+  struct in_addr src;
+
+  // Destination IP address.
+  struct in_addr dst;
+
+  // Source IP address mask.
+  struct in_addr smsk;
+
+  // Destination IP address mask.
+  struct in_addr dmsk;
+
+  // Input interface.
+  char iniface[IFNAMSIZ];
+
+  // Output interface.
+  char outiface[IFNAMSIZ];
+
+  // Input interface mask.
+  unsigned char iniface_mask[IFNAMSIZ];
+
+  // Output interface mask.
+  unsigned char outiface_mask[IFNAMSIZ];
+
+  // Transport protocol.
+  uint16_t proto;
+
+  // Flags.
+  uint8_t flags;
+
+  // Inverse flags.
+  uint8_t invflags;
+};
+
+// ipt_entry is an iptables rule. It contains information about what packets the
+// rule matches and what action (target) to perform for matching packets.
+struct ipt_entry {
+  // Basic matching information used to match a packet's IP header.
+  struct ipt_ip ip;
+
+  // A caching field that isn't used by userspace.
+  unsigned int nfcache;
+
+  // The number of bytes between the start of this ipt_entry struct and the
+  // rule's target.
+  uint16_t target_offset;
+
+  // The total size of this rule, from the beginning of the entry to the end of
+  // the target.
+  uint16_t next_offset;
+
+  // A return pointer not used by userspace.
+  unsigned int comefrom;
+
+  // Counters for packets and bytes, which we don't yet implement.
+  struct xt_counters counters;
+
+  // The data for all this rules matches followed by the target. This runs
+  // beyond the value of sizeof(struct ipt_entry).
+  unsigned char elems[0];
+};
+
+// Passed to getsockopt(SO_GET_INFO).
+struct ipt_getinfo {
+  // The name of the table. The user only fills this in, the rest is filled in
+  // when returning from getsockopt. Currently "nat" and "mangle" are supported.
+  char name[XT_TABLE_MAXNAMELEN];
+
+  // A bitmap of which hooks apply to the table. For example, a table with hooks
+  // PREROUTING and FORWARD has the value
+  // (1 << NF_IP_PRE_REOUTING) | (1 << NF_IP_FORWARD).
+  unsigned int valid_hooks;
+
+  // The offset into the entry table for each valid hook. The entry table is
+  // returned by getsockopt(SO_GET_ENTRIES).
+  unsigned int hook_entry[NF_IP_NUMHOOKS];
+
+  // For each valid hook, the underflow is the offset into the entry table to
+  // jump to in case traversing the table yields no verdict (although I have no
+  // clue how that could happen - builtin chains always end with a policy, and
+  // user-defined chains always end with a RETURN.
+  //
+  // The entry referred to must be an "unconditional" entry, meaning it has no
+  // matches, specifies no IP criteria, and either DROPs or ACCEPTs packets.  It
+  // basically has to be capable of making a definitive decision no matter what
+  // it's passed.
+  unsigned int underflow[NF_IP_NUMHOOKS];
+
+  // The number of entries in the entry table returned by
+  // getsockopt(SO_GET_ENTRIES).
+  unsigned int num_entries;
+
+  // The size of the entry table returned by getsockopt(SO_GET_ENTRIES).
+  unsigned int size;
+};
+
+// Passed to getsockopt(SO_GET_ENTRIES).
+struct ipt_get_entries {
+  // The name of the table. The user fills this in. Currently "nat" and "mangle"
+  // are supported.
+  char name[XT_TABLE_MAXNAMELEN];
+
+  // The size of the entry table in bytes. The user fills this in with the value
+  // from struct ipt_getinfo.size.
+  unsigned int size;
+
+  // The entries for the given table. This will run past the size defined by
+  // sizeof(struct ipt_get_entries).
+  struct ipt_entry entrytable[0];
+};
+
+// Passed to setsockopt(SO_SET_REPLACE).
+struct ipt_replace {
+  // The name of the table.
+  char name[XT_TABLE_MAXNAMELEN];
+
+  // The same as struct ipt_getinfo.valid_hooks. Users don't change this.
+  unsigned int valid_hooks;
+
+  // The same as struct ipt_getinfo.num_entries.
+  unsigned int num_entries;
+
+  // The same as struct ipt_getinfo.size.
+  unsigned int size;
+
+  // The same as struct ipt_getinfo.hook_entry.
+  unsigned int hook_entry[NF_IP_NUMHOOKS];
+
+  // The same as struct ipt_getinfo.underflow.
+  unsigned int underflow[NF_IP_NUMHOOKS];
+
+  // The number of counters, which should equal the number of entries.
+  unsigned int num_counters;
+
+  // The unchanged values from each ipt_entry's counters.
+  struct xt_counters* counters;
+
+  // The entries to write to the table. This will run past the size defined by
+  // sizeof(srtuct ipt_replace);
+  struct ipt_entry entries[0];
+};
+
+#endif  // GVISOR_TEST_SYSCALLS_IPTABLES_TYPES_H_
diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc
new file mode 100644
index 000000000..e397d5f57
--- /dev/null
+++ b/test/syscalls/linux/itimer.cc
@@ -0,0 +1,366 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include <atomic>
+#include <functional>
+#include <iostream>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+constexpr char kSIGALRMToMainThread[] = "--itimer_sigarlm_to_main_thread";
+constexpr char kSIGPROFFairnessActive[] = "--itimer_sigprof_fairness_active";
+constexpr char kSIGPROFFairnessIdle[] = "--itimer_sigprof_fairness_idle";
+
+// Time period to be set for the itimers.
+constexpr absl::Duration kPeriod = absl::Milliseconds(25);
+// Total amount of time to spend per thread.
+constexpr absl::Duration kTestDuration = absl::Seconds(20);
+// Amount of spin iterations to perform as the minimum work item per thread.
+// Chosen to be sub-millisecond range.
+constexpr int kIterations = 10000000;
+// Allow deviation in the number of samples.
+constexpr double kNumSamplesDeviationRatio = 0.2;
+
+TEST(ItimerTest, ItimervalUpdatedBeforeExpiration) {
+  constexpr int kSleepSecs = 10;
+  constexpr int kAlarmSecs = 15;
+  static_assert(
+      kSleepSecs < kAlarmSecs,
+      "kSleepSecs must be less than kAlarmSecs for the test to be meaningful");
+  constexpr int kMaxRemainingSecs = kAlarmSecs - kSleepSecs;
+
+  // Install a no-op handler for SIGALRM.
+  struct sigaction sa = {};
+  sigfillset(&sa.sa_mask);
+  sa.sa_handler = +[](int signo) {};
+  auto const cleanup_sa =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+  // Set an itimer-based alarm for kAlarmSecs from now.
+  struct itimerval itv = {};
+  itv.it_value.tv_sec = kAlarmSecs;
+  auto const cleanup_itimer =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv));
+
+  // After sleeping for kSleepSecs, the itimer value should reflect the elapsed
+  // time even if it hasn't expired.
+  absl::SleepFor(absl::Seconds(kSleepSecs));
+  ASSERT_THAT(getitimer(ITIMER_REAL, &itv), SyscallSucceeds());
+  EXPECT_TRUE(
+      itv.it_value.tv_sec < kMaxRemainingSecs ||
+      (itv.it_value.tv_sec == kMaxRemainingSecs && itv.it_value.tv_usec == 0))
+      << "Remaining time: " << itv.it_value.tv_sec << " seconds + "
+      << itv.it_value.tv_usec << " microseconds";
+}
+
+ABSL_CONST_INIT static thread_local std::atomic_int signal_test_num_samples =
+    ATOMIC_VAR_INIT(0);
+
+void SignalTestSignalHandler(int /*signum*/) { signal_test_num_samples++; }
+
+struct SignalTestResult {
+  int expected_total;
+  int main_thread_samples;
+  std::vector<int> worker_samples;
+};
+
+std::ostream& operator<<(std::ostream& os, const SignalTestResult& r) {
+  os << "{expected_total: " << r.expected_total
+     << ", main_thread_samples: " << r.main_thread_samples
+     << ", worker_samples: [";
+  bool first = true;
+  for (int sample : r.worker_samples) {
+    if (!first) {
+      os << ", ";
+    }
+    os << sample;
+    first = false;
+  }
+  os << "]}";
+  return os;
+}
+
+// Starts two worker threads and itimer id and measures the number of signal
+// delivered to each thread.
+SignalTestResult ItimerSignalTest(int id, clock_t main_clock,
+                                  clock_t worker_clock, int signal,
+                                  absl::Duration sleep) {
+  signal_test_num_samples = 0;
+
+  struct sigaction sa = {};
+  sa.sa_handler = &SignalTestSignalHandler;
+  sa.sa_flags = SA_RESTART;
+  sigemptyset(&sa.sa_mask);
+  auto sigaction_cleanup = ScopedSigaction(signal, sa).ValueOrDie();
+
+  int socketfds[2];
+  TEST_PCHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, socketfds) == 0);
+
+  // Do the spinning in the workers.
+  std::function<void*(int)> work = [&](int socket_fd) {
+    FileDescriptor fd(socket_fd);
+
+    absl::Time finish = Now(worker_clock) + kTestDuration;
+    while (Now(worker_clock) < finish) {
+      // Blocked on read.
+      char c;
+      RetryEINTR(read)(fd.get(), &c, 1);
+      for (int i = 0; i < kIterations; i++) {
+        // Ensure compiler won't optimize this loop away.
+        asm("");
+      }
+
+      if (sleep != absl::ZeroDuration()) {
+        // Sleep so that the entire process is idle for a while.
+        absl::SleepFor(sleep);
+      }
+
+      // Unblock the other thread.
+      RetryEINTR(write)(fd.get(), &c, 1);
+    }
+
+    return reinterpret_cast<void*>(signal_test_num_samples.load());
+  };
+
+  ScopedThread th1(
+      static_cast<std::function<void*()>>(std::bind(work, socketfds[0])));
+  ScopedThread th2(
+      static_cast<std::function<void*()>>(std::bind(work, socketfds[1])));
+
+  absl::Time start = Now(main_clock);
+  // Start the timer.
+  struct itimerval timer = {};
+  timer.it_value = absl::ToTimeval(kPeriod);
+  timer.it_interval = absl::ToTimeval(kPeriod);
+  auto cleanup_itimer = ScopedItimer(id, timer).ValueOrDie();
+
+  // Unblock th1.
+  //
+  // N.B. th2 owns socketfds[1] but can't close it until it unblocks.
+  char c = 0;
+  TEST_CHECK(write(socketfds[1], &c, 1) == 1);
+
+  SignalTestResult result;
+
+  // Wait for the workers to be done and collect their sample counts.
+  result.worker_samples.push_back(reinterpret_cast<int64_t>(th1.Join()));
+  result.worker_samples.push_back(reinterpret_cast<int64_t>(th2.Join()));
+  cleanup_itimer.Release()();
+  result.expected_total = (Now(main_clock) - start) / kPeriod;
+  result.main_thread_samples = signal_test_num_samples.load();
+
+  return result;
+}
+
+int TestSIGALRMToMainThread() {
+  SignalTestResult result =
+      ItimerSignalTest(ITIMER_REAL, CLOCK_REALTIME, CLOCK_REALTIME, SIGALRM,
+                       absl::ZeroDuration());
+
+  std::cerr << "result: " << result << std::endl;
+
+  // ITIMER_REAL-generated SIGALRMs prefer to deliver to the thread group leader
+  // (but don't guarantee it), so we expect to see most samples on the main
+  // thread.
+  //
+  // The number of SIGALRMs delivered to a worker should not exceed 20%
+  // of the number of total signals expected (this is somewhat arbitrary).
+  const int worker_threshold = result.expected_total / 5;
+
+  //
+  // Linux only guarantees timers will never expire before the requested time.
+  // Thus, we only check the upper bound and also it at least have one sample.
+  TEST_CHECK(result.main_thread_samples <= result.expected_total);
+  TEST_CHECK(result.main_thread_samples > 0);
+  for (int num : result.worker_samples) {
+    TEST_CHECK_MSG(num <= worker_threshold, "worker received too many samples");
+  }
+
+  return 0;
+}
+
+// Random save/restore is disabled as it introduces additional latency and
+// unpredictable distribution patterns.
+TEST(ItimerTest, DeliversSIGALRMToMainThread_NoRandomSave) {
+  pid_t child;
+  int execve_errno;
+  auto kill = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGALRMToMainThread},
+                  {}, &child, &execve_errno));
+  EXPECT_EQ(0, execve_errno);
+
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+              SyscallSucceedsWithValue(child));
+
+  // Not required anymore.
+  kill.Release();
+
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status;
+}
+
+// Signals are delivered to threads fairly.
+//
+// sleep indicates how long to sleep worker threads each iteration to make the
+// entire process idle.
+int TestSIGPROFFairness(absl::Duration sleep) {
+  SignalTestResult result =
+      ItimerSignalTest(ITIMER_PROF, CLOCK_PROCESS_CPUTIME_ID,
+                       CLOCK_THREAD_CPUTIME_ID, SIGPROF, sleep);
+
+  std::cerr << "result: " << result << std::endl;
+
+  // The number of samples on the main thread should be very low as it did
+  // nothing.
+  TEST_CHECK(result.main_thread_samples < 80);
+
+  // Both workers should get roughly equal number of samples.
+  TEST_CHECK(result.worker_samples.size() == 2);
+
+  TEST_CHECK(result.expected_total > 0);
+
+  // In an ideal world each thread would get exactly 50% of the signals,
+  // but since that's unlikely to happen we allow for them to get no less than
+  // kNumSamplesDeviationRatio of the total observed samples.
+  TEST_CHECK_MSG(std::abs(result.worker_samples[0] - result.worker_samples[1]) <
+                     ((result.worker_samples[0] + result.worker_samples[1]) *
+                      kNumSamplesDeviationRatio),
+                 "one worker received disproportionate share of samples");
+
+  return 0;
+}
+
+// Random save/restore is disabled as it introduces additional latency and
+// unpredictable distribution patterns.
+TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyActive_NoRandomSave) {
+  // On the KVM and ptrace platforms, switches between sentry and application
+  // context are sometimes extremely slow, causing the itimer to send SIGPROF to
+  // a thread that either already has one pending or has had SIGPROF delivered,
+  // but hasn't handled it yet (and thus therefore still has SIGPROF masked). In
+  // either case, since itimer signals are group-directed, signal sending falls
+  // back to notifying the thread group leader. ItimerSignalTest() fails if "too
+  // many" signals are delivered to the thread group leader, so these tests are
+  // flaky on these platforms.
+  //
+  // TODO(b/143247272): Clarify why context switches are so slow on KVM.
+  const auto gvisor_platform = GvisorPlatform();
+  SKIP_IF(gvisor_platform == Platform::kKVM ||
+          gvisor_platform == Platform::kPtrace);
+
+  pid_t child;
+  int execve_errno;
+  auto kill = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGPROFFairnessActive},
+                  {}, &child, &execve_errno));
+  EXPECT_EQ(0, execve_errno);
+
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+              SyscallSucceedsWithValue(child));
+
+  // Not required anymore.
+  kill.Release();
+
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+// Random save/restore is disabled as it introduces additional latency and
+// unpredictable distribution patterns.
+TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyIdle_NoRandomSave) {
+  // See comment in DeliversSIGPROFToThreadsRoughlyFairlyActive.
+  const auto gvisor_platform = GvisorPlatform();
+  SKIP_IF(gvisor_platform == Platform::kKVM ||
+          gvisor_platform == Platform::kPtrace);
+
+  pid_t child;
+  int execve_errno;
+  auto kill = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGPROFFairnessIdle},
+                  {}, &child, &execve_errno));
+  EXPECT_EQ(0, execve_errno);
+
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+              SyscallSucceedsWithValue(child));
+
+  // Not required anymore.
+  kill.Release();
+
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "Exited with code: " << status;
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
+
+namespace {
+void MaskSIGPIPE() {
+  // Always mask SIGPIPE as it's common and tests aren't expected to handle it.
+  // We don't take the TestInit() path so we must do this manually.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_IGN;
+  TEST_CHECK(sigaction(SIGPIPE, &sa, nullptr) == 0);
+}
+}  // namespace
+
+int main(int argc, char** argv) {
+  // These tests require no background threads, so check for them before
+  // TestInit.
+  for (int i = 0; i < argc; i++) {
+    absl::string_view arg(argv[i]);
+
+    if (arg == gvisor::testing::kSIGALRMToMainThread) {
+      MaskSIGPIPE();
+      return gvisor::testing::TestSIGALRMToMainThread();
+    }
+    if (arg == gvisor::testing::kSIGPROFFairnessActive) {
+      MaskSIGPIPE();
+      return gvisor::testing::TestSIGPROFFairness(absl::ZeroDuration());
+    }
+    if (arg == gvisor::testing::kSIGPROFFairnessIdle) {
+      MaskSIGPIPE();
+      // Sleep time > ClockTick (10ms) exercises sleeping gVisor's
+      // kernel.cpuClockTicker.
+      return gvisor::testing::TestSIGPROFFairness(absl::Milliseconds(25));
+    }
+  }
+
+  gvisor::testing::TestInit(&argc, &argv);
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/kill.cc b/test/syscalls/linux/kill.cc
new file mode 100644
index 000000000..db29bd59c
--- /dev/null
+++ b/test/syscalls/linux/kill.cc
@@ -0,0 +1,383 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <csignal>
+
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+ABSL_FLAG(int32_t, scratch_uid, 65534, "scratch UID");
+ABSL_FLAG(int32_t, scratch_gid, 65534, "scratch GID");
+
+using ::testing::Ge;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(KillTest, CanKillValidPid) {
+  // If pid is positive, then signal sig is sent to the process with the ID
+  // specified by pid.
+  EXPECT_THAT(kill(getpid(), 0), SyscallSucceeds());
+  // If pid equals 0, then sig is sent to every process in the process group of
+  // the calling process.
+  EXPECT_THAT(kill(0, 0), SyscallSucceeds());
+
+  ScopedThread([] { EXPECT_THAT(kill(gettid(), 0), SyscallSucceeds()); });
+}
+
+void SigHandler(int sig, siginfo_t* info, void* context) { _exit(0); }
+
+// If pid equals -1, then sig is sent to every process for which the calling
+// process has permission to send signals, except for process 1 (init).
+TEST(KillTest, CanKillAllPIDs) {
+  int pipe_fds[2];
+  ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+  FileDescriptor read_fd(pipe_fds[0]);
+  FileDescriptor write_fd(pipe_fds[1]);
+
+  pid_t pid = fork();
+  if (pid == 0) {
+    read_fd.reset();
+
+    struct sigaction sa;
+    sa.sa_sigaction = SigHandler;
+    sigfillset(&sa.sa_mask);
+    sa.sa_flags = SA_SIGINFO;
+    TEST_PCHECK(sigaction(SIGWINCH, &sa, nullptr) == 0);
+    MaybeSave();
+
+    // Indicate to the parent that we're ready.
+    write_fd.reset();
+
+    // Wait until we get the signal from the parent.
+    while (true) {
+      pause();
+    }
+  }
+
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  write_fd.reset();
+
+  // Wait for the child to indicate that it's unmasked the signal by closing
+  // the write end.
+  char buf;
+  ASSERT_THAT(ReadFd(read_fd.get(), &buf, 1), SyscallSucceedsWithValue(0));
+
+  // Signal the child and wait for it to die with status 0, indicating that
+  // it got the expected signal.
+  EXPECT_THAT(kill(-1, SIGWINCH), SyscallSucceeds());
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+              SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status));
+  EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST(KillTest, CannotKillInvalidPID) {
+  // We need an unused pid to verify that kill fails when given one.
+  //
+  // There is no way to guarantee that a PID is unused, but the PID of a
+  // recently exited process likely won't be reused soon.
+  pid_t fake_pid = fork();
+  if (fake_pid == 0) {
+    _exit(0);
+  }
+
+  ASSERT_THAT(fake_pid, SyscallSucceeds());
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(fake_pid, &status, 0),
+              SyscallSucceedsWithValue(fake_pid));
+  EXPECT_TRUE(WIFEXITED(status));
+  EXPECT_EQ(0, WEXITSTATUS(status));
+
+  EXPECT_THAT(kill(fake_pid, 0), SyscallFailsWithErrno(ESRCH));
+}
+
+TEST(KillTest, CannotUseInvalidSignal) {
+  EXPECT_THAT(kill(getpid(), 200), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(KillTest, CanKillRemoteProcess) {
+  pid_t pid = fork();
+  if (pid == 0) {
+    while (true) {
+      pause();
+    }
+  }
+
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds());
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+              SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFSIGNALED(status));
+  EXPECT_EQ(SIGKILL, WTERMSIG(status));
+}
+
+TEST(KillTest, CanKillOwnProcess) {
+  EXPECT_THAT(kill(getpid(), 0), SyscallSucceeds());
+}
+
+// Verify that you can kill a process even using a tid from a thread other than
+// the group leader.
+TEST(KillTest, CannotKillTid) {
+  pid_t tid;
+  bool tid_available = false;
+  bool finished = false;
+  absl::Mutex mu;
+  ScopedThread t([&] {
+    mu.Lock();
+    tid = gettid();
+    tid_available = true;
+    mu.Await(absl::Condition(&finished));
+    mu.Unlock();
+  });
+  mu.LockWhen(absl::Condition(&tid_available));
+  EXPECT_THAT(kill(tid, 0), SyscallSucceeds());
+  finished = true;
+  mu.Unlock();
+}
+
+TEST(KillTest, SetPgid) {
+  for (int i = 0; i < 10; i++) {
+    // The following in the normal pattern for creating a new process group.
+    // Both the parent and child process will call setpgid in order to avoid any
+    // race conditions. We do this ten times to catch races.
+    pid_t pid = fork();
+    if (pid == 0) {
+      setpgid(0, 0);
+      while (true) {
+        pause();
+      }
+    }
+
+    ASSERT_THAT(pid, SyscallSucceeds());
+
+    // Set the child's group and exit.
+    ASSERT_THAT(setpgid(pid, pid), SyscallSucceeds());
+    EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds());
+
+    int status;
+    EXPECT_THAT(RetryEINTR(waitpid)(-pid, &status, 0),
+                SyscallSucceedsWithValue(pid));
+    EXPECT_TRUE(WIFSIGNALED(status));
+    EXPECT_EQ(SIGKILL, WTERMSIG(status));
+  }
+}
+
+TEST(KillTest, ProcessGroups) {
+  // Fork a new child.
+  //
+  // other_child is used as a placeholder process. We use this PID as our "does
+  // not exist" process group to ensure some amount of safety. (It is still
+  // possible to violate this assumption, but extremely unlikely.)
+  pid_t child = fork();
+  if (child == 0) {
+    while (true) {
+      pause();
+    }
+  }
+  ASSERT_THAT(child, SyscallSucceeds());
+
+  pid_t other_child = fork();
+  if (other_child == 0) {
+    while (true) {
+      pause();
+    }
+  }
+  ASSERT_THAT(other_child, SyscallSucceeds());
+
+  // Ensure the kill does not succeed without the new group.
+  EXPECT_THAT(kill(-child, SIGKILL), SyscallFailsWithErrno(ESRCH));
+
+  // Put the child in its own process group.
+  ASSERT_THAT(setpgid(child, child), SyscallSucceeds());
+
+  // This should be not allowed: you can only create a new group with the same
+  // id or join an existing one. The other_child group should not exist.
+  ASSERT_THAT(setpgid(child, other_child), SyscallFailsWithErrno(EPERM));
+
+  // Done with other_child; kill it.
+  EXPECT_THAT(kill(other_child, SIGKILL), SyscallSucceeds());
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(other_child, &status, 0), SyscallSucceeds());
+
+  // Linux returns success for the no-op call.
+  ASSERT_THAT(setpgid(child, child), SyscallSucceeds());
+
+  // Kill the child's process group.
+  ASSERT_THAT(kill(-child, SIGKILL), SyscallSucceeds());
+
+  // Wait on the process group; ensure that the signal was as expected.
+  EXPECT_THAT(RetryEINTR(waitpid)(-child, &status, 0),
+              SyscallSucceedsWithValue(child));
+  EXPECT_TRUE(WIFSIGNALED(status));
+  EXPECT_EQ(SIGKILL, WTERMSIG(status));
+
+  // Try to kill the process group again; ensure that the wait fails.
+  EXPECT_THAT(kill(-child, SIGKILL), SyscallFailsWithErrno(ESRCH));
+  EXPECT_THAT(RetryEINTR(waitpid)(-child, &status, 0),
+              SyscallFailsWithErrno(ECHILD));
+}
+
+TEST(KillTest, ChildDropsPrivsCannotKill) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+  const int uid = absl::GetFlag(FLAGS_scratch_uid);
+  const int gid = absl::GetFlag(FLAGS_scratch_gid);
+
+  // Create the child that drops privileges and tries to kill the parent.
+  pid_t pid = fork();
+  if (pid == 0) {
+    TEST_PCHECK(setresgid(gid, gid, gid) == 0);
+    MaybeSave();
+
+    TEST_PCHECK(setresuid(uid, uid, uid) == 0);
+    MaybeSave();
+
+    // setresuid should have dropped CAP_KILL. Make sure.
+    TEST_CHECK(!HaveCapability(CAP_KILL).ValueOrDie());
+
+    // Try to kill parent with every signal-sending syscall possible.
+    pid_t parent = getppid();
+
+    TEST_CHECK(kill(parent, SIGKILL) < 0);
+    TEST_PCHECK_MSG(errno == EPERM, "kill failed with wrong errno");
+    MaybeSave();
+
+    TEST_CHECK(tgkill(parent, parent, SIGKILL) < 0);
+    TEST_PCHECK_MSG(errno == EPERM, "tgkill failed with wrong errno");
+    MaybeSave();
+
+    TEST_CHECK(syscall(SYS_tkill, parent, SIGKILL) < 0);
+    TEST_PCHECK_MSG(errno == EPERM, "tkill failed with wrong errno");
+    MaybeSave();
+
+    siginfo_t uinfo;
+    uinfo.si_code = -1;  // SI_QUEUE (allowed).
+
+    TEST_CHECK(syscall(SYS_rt_sigqueueinfo, parent, SIGKILL, &uinfo) < 0);
+    TEST_PCHECK_MSG(errno == EPERM, "rt_sigqueueinfo failed with wrong errno");
+    MaybeSave();
+
+    TEST_CHECK(syscall(SYS_rt_tgsigqueueinfo, parent, parent, SIGKILL, &uinfo) <
+               0);
+    TEST_PCHECK_MSG(errno == EPERM, "rt_sigqueueinfo failed with wrong errno");
+    MaybeSave();
+
+    _exit(0);
+  }
+
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+              SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status = " << status;
+}
+
+TEST(KillTest, CanSIGCONTSameSession) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+  pid_t stopped_child = fork();
+  if (stopped_child == 0) {
+    raise(SIGSTOP);
+    _exit(0);
+  }
+
+  ASSERT_THAT(stopped_child, SyscallSucceeds());
+
+  // Put the child in its own process group. The child and parent process
+  // groups also share a session.
+  ASSERT_THAT(setpgid(stopped_child, stopped_child), SyscallSucceeds());
+
+  // Make sure child stopped.
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(stopped_child, &status, WUNTRACED),
+              SyscallSucceedsWithValue(stopped_child));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << "status " << status;
+
+  const int uid = absl::GetFlag(FLAGS_scratch_uid);
+  const int gid = absl::GetFlag(FLAGS_scratch_gid);
+
+  // Drop privileges only in child process, or else this parent process won't be
+  // able to open some log files after the test ends.
+  pid_t other_child = fork();
+  if (other_child == 0) {
+    // Drop privileges.
+    TEST_PCHECK(setresgid(gid, gid, gid) == 0);
+    MaybeSave();
+
+    TEST_PCHECK(setresuid(uid, uid, uid) == 0);
+    MaybeSave();
+
+    // setresuid should have dropped CAP_KILL.
+    TEST_CHECK(!HaveCapability(CAP_KILL).ValueOrDie());
+
+    // Child 2 and child should now not share a thread group and any UIDs.
+    // Child 2 should have no privileges. That means any signal other than
+    // SIGCONT should fail.
+    TEST_CHECK(kill(stopped_child, SIGKILL) < 0);
+    TEST_PCHECK_MSG(errno == EPERM, "kill failed with wrong errno");
+    MaybeSave();
+
+    TEST_PCHECK(kill(stopped_child, SIGCONT) == 0);
+    MaybeSave();
+
+    _exit(0);
+  }
+
+  ASSERT_THAT(stopped_child, SyscallSucceeds());
+
+  // Make sure child exited normally.
+  EXPECT_THAT(RetryEINTR(waitpid)(stopped_child, &status, 0),
+              SyscallSucceedsWithValue(stopped_child));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+
+  // Make sure other_child exited normally.
+  EXPECT_THAT(RetryEINTR(waitpid)(other_child, &status, 0),
+              SyscallSucceedsWithValue(other_child));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/link.cc b/test/syscalls/linux/link.cc
new file mode 100644
index 000000000..544681168
--- /dev/null
+++ b/test/syscalls/linux/link.cc
@@ -0,0 +1,305 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/str_cat.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+ABSL_FLAG(int32_t, scratch_uid, 65534, "scratch UID");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// IsSameFile returns true if both filenames have the same device and inode.
+bool IsSameFile(const std::string& f1, const std::string& f2) {
+  // Use lstat rather than stat, so that symlinks are not followed.
+  struct stat stat1 = {};
+  EXPECT_THAT(lstat(f1.c_str(), &stat1), SyscallSucceeds());
+  struct stat stat2 = {};
+  EXPECT_THAT(lstat(f2.c_str(), &stat2), SyscallSucceeds());
+
+  return stat1.st_dev == stat2.st_dev && stat1.st_ino == stat2.st_ino;
+}
+
+TEST(LinkTest, CanCreateLinkFile) {
+  auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string newname = NewTempAbsPath();
+
+  // Get the initial link count.
+  uint64_t initial_link_count =
+      ASSERT_NO_ERRNO_AND_VALUE(Links(oldfile.path()));
+
+  EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()), SyscallSucceeds());
+
+  EXPECT_TRUE(IsSameFile(oldfile.path(), newname));
+
+  // Link count should be incremented.
+  EXPECT_THAT(Links(oldfile.path()),
+              IsPosixErrorOkAndHolds(initial_link_count + 1));
+
+  // Delete the link.
+  EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+
+  // Link count should be back to initial.
+  EXPECT_THAT(Links(oldfile.path()),
+              IsPosixErrorOkAndHolds(initial_link_count));
+}
+
+TEST(LinkTest, PermissionDenied) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER)));
+
+  // Make the file "unsafe" to link by making it only readable, but not
+  // writable.
+  const auto unwriteable_file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0400));
+  const std::string special_path = NewTempAbsPath();
+  ASSERT_THAT(mkfifo(special_path.c_str(), 0666), SyscallSucceeds());
+  const auto setuid_file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666 | S_ISUID));
+
+  const std::string newname = NewTempAbsPath();
+
+  // Do setuid in a separate thread so that after finishing this test, the
+  // process can still open files the test harness created before starting this
+  // test. Otherwise, the files are created by root (UID before the test), but
+  // cannot be opened by the `uid` set below after the test. After calling
+  // setuid(non-zero-UID), there is no way to get root privileges back.
+  ScopedThread([&] {
+    // Use syscall instead of glibc setuid wrapper because we want this setuid
+    // call to only apply to this task. POSIX threads, however, require that all
+    // threads have the same UIDs, so using the setuid wrapper sets all threads'
+    // real UID.
+    // Also drops capabilities.
+    EXPECT_THAT(syscall(SYS_setuid, absl::GetFlag(FLAGS_scratch_uid)),
+                SyscallSucceeds());
+
+    EXPECT_THAT(link(unwriteable_file.path().c_str(), newname.c_str()),
+                SyscallFailsWithErrno(EPERM));
+    EXPECT_THAT(link(special_path.c_str(), newname.c_str()),
+                SyscallFailsWithErrno(EPERM));
+    if (!IsRunningWithVFS1()) {
+      EXPECT_THAT(link(setuid_file.path().c_str(), newname.c_str()),
+                  SyscallFailsWithErrno(EPERM));
+    }
+  });
+}
+
+TEST(LinkTest, CannotLinkDirectory) {
+  auto olddir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string newdir = NewTempAbsPath();
+
+  EXPECT_THAT(link(olddir.path().c_str(), newdir.c_str()),
+              SyscallFailsWithErrno(EPERM));
+
+  EXPECT_THAT(rmdir(olddir.path().c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, CannotLinkWithSlash) {
+  auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  // Put a final "/" on newname.
+  const std::string newname = absl::StrCat(NewTempAbsPath(), "/");
+
+  EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(LinkTest, OldnameIsEmpty) {
+  const std::string newname = NewTempAbsPath();
+  EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(LinkTest, OldnameDoesNotExist) {
+  const std::string oldname = NewTempAbsPath();
+  const std::string newname = NewTempAbsPath();
+  EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(LinkTest, NewnameCannotExist) {
+  const std::string newname =
+      JoinPath(GetAbsoluteTestTmpdir(), "thisdoesnotexist", "foo");
+  EXPECT_THAT(link("/thisdoesnotmatter", newname.c_str()),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(LinkTest, WithOldDirFD) {
+  const std::string oldname_parent = NewTempAbsPath();
+  const std::string oldname_base = "child";
+  const std::string oldname = JoinPath(oldname_parent, oldname_base);
+  const std::string newname = NewTempAbsPath();
+
+  // Create oldname_parent directory, and get an FD.
+  ASSERT_THAT(mkdir(oldname_parent.c_str(), 0777), SyscallSucceeds());
+  const FileDescriptor oldname_parent_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(oldname_parent, O_DIRECTORY | O_RDONLY));
+
+  // Create oldname file.
+  const FileDescriptor oldname_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(oldname, O_CREAT | O_RDWR, 0666));
+
+  // Link oldname to newname, using oldname_parent_fd.
+  EXPECT_THAT(linkat(oldname_parent_fd.get(), oldname_base.c_str(), AT_FDCWD,
+                     newname.c_str(), 0),
+              SyscallSucceeds());
+
+  EXPECT_TRUE(IsSameFile(oldname, newname));
+
+  EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+  EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds());
+  EXPECT_THAT(rmdir(oldname_parent.c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, BogusFlags) {
+  ASSERT_THAT(linkat(1, "foo", 2, "bar", 3), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(LinkTest, WithNewDirFD) {
+  auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string newname_parent = NewTempAbsPath();
+  const std::string newname_base = "child";
+  const std::string newname = JoinPath(newname_parent, newname_base);
+
+  // Create newname_parent directory, and get an FD.
+  EXPECT_THAT(mkdir(newname_parent.c_str(), 0777), SyscallSucceeds());
+  const FileDescriptor newname_parent_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(newname_parent, O_DIRECTORY | O_RDONLY));
+
+  // Link newname to oldfile, using newname_parent_fd.
+  EXPECT_THAT(linkat(AT_FDCWD, oldfile.path().c_str(), newname_parent_fd.get(),
+                     newname.c_str(), 0),
+              SyscallSucceeds());
+
+  EXPECT_TRUE(IsSameFile(oldfile.path(), newname));
+
+  EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+  EXPECT_THAT(rmdir(newname_parent.c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, RelPathsWithNonDirFDs) {
+  auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Create a file that will be passed as the directory fd for old/new names.
+  const std::string filename = NewTempAbsPath();
+  const FileDescriptor file_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0666));
+
+  // Using file_fd as olddirfd will fail.
+  EXPECT_THAT(linkat(file_fd.get(), "foo", AT_FDCWD, "bar", 0),
+              SyscallFailsWithErrno(ENOTDIR));
+
+  // Using file_fd as newdirfd will fail.
+  EXPECT_THAT(linkat(AT_FDCWD, oldfile.path().c_str(), file_fd.get(), "bar", 0),
+              SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(LinkTest, AbsPathsWithNonDirFDs) {
+  auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string newname = NewTempAbsPath();
+
+  // Create a file that will be passed as the directory fd for old/new names.
+  const std::string filename = NewTempAbsPath();
+  const FileDescriptor file_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0666));
+
+  // Using file_fd as the dirfds is OK as long as paths are absolute.
+  EXPECT_THAT(linkat(file_fd.get(), oldfile.path().c_str(), file_fd.get(),
+                     newname.c_str(), 0),
+              SyscallSucceeds());
+}
+
+TEST(LinkTest, LinkDoesNotFollowSymlinks) {
+  // Create oldfile, and oldsymlink which points to it.
+  auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string oldsymlink = NewTempAbsPath();
+  EXPECT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()),
+              SyscallSucceeds());
+
+  // Now hard link newname to oldsymlink.
+  const std::string newname = NewTempAbsPath();
+  EXPECT_THAT(link(oldsymlink.c_str(), newname.c_str()), SyscallSucceeds());
+
+  // The link should not have resolved the symlink, so newname and oldsymlink
+  // are the same.
+  EXPECT_TRUE(IsSameFile(oldsymlink, newname));
+  EXPECT_FALSE(IsSameFile(oldfile.path(), newname));
+
+  EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds());
+  EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, LinkatDoesNotFollowSymlinkByDefault) {
+  // Create oldfile, and oldsymlink which points to it.
+  auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string oldsymlink = NewTempAbsPath();
+  EXPECT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()),
+              SyscallSucceeds());
+
+  // Now hard link newname to oldsymlink.
+  const std::string newname = NewTempAbsPath();
+  EXPECT_THAT(
+      linkat(AT_FDCWD, oldsymlink.c_str(), AT_FDCWD, newname.c_str(), 0),
+      SyscallSucceeds());
+
+  // The link should not have resolved the symlink, so newname and oldsymlink
+  // are the same.
+  EXPECT_TRUE(IsSameFile(oldsymlink, newname));
+  EXPECT_FALSE(IsSameFile(oldfile.path(), newname));
+
+  EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds());
+  EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, LinkatWithSymlinkFollow) {
+  // Create oldfile, and oldsymlink which points to it.
+  auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string oldsymlink = NewTempAbsPath();
+  ASSERT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()),
+              SyscallSucceeds());
+
+  // Now hard link newname to oldsymlink, and pass AT_SYMLINK_FOLLOW flag.
+  const std::string newname = NewTempAbsPath();
+  ASSERT_THAT(linkat(AT_FDCWD, oldsymlink.c_str(), AT_FDCWD, newname.c_str(),
+                     AT_SYMLINK_FOLLOW),
+              SyscallSucceeds());
+
+  // The link should have resolved the symlink, so oldfile and newname are the
+  // same.
+  EXPECT_TRUE(IsSameFile(oldfile.path(), newname));
+  EXPECT_FALSE(IsSameFile(oldsymlink, newname));
+
+  EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds());
+  EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/lseek.cc b/test/syscalls/linux/lseek.cc
new file mode 100644
index 000000000..6ce1e6cc3
--- /dev/null
+++ b/test/syscalls/linux/lseek.cc
@@ -0,0 +1,202 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(LseekTest, InvalidWhence) {
+  const std::string kFileData = "hello world\n";
+  const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+  ASSERT_THAT(lseek(fd.get(), 0, -1), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(LseekTest, NegativeOffset) {
+  const std::string kFileData = "hello world\n";
+  const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+  EXPECT_THAT(lseek(fd.get(), -(kFileData.length() + 1), SEEK_CUR),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// A 32-bit off_t is not large enough to represent an offset larger than
+// maximum file size on standard file systems, so it isn't possible to cause
+// overflow.
+#if defined(__x86_64__) || defined(__aarch64__)
+TEST(LseekTest, Overflow) {
+  // HA! Classic Linux. We really should have an EOVERFLOW
+  // here, since we're seeking to something that cannot be
+  // represented.. but instead we are given an EINVAL.
+  const std::string kFileData = "hello world\n";
+  const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+  EXPECT_THAT(lseek(fd.get(), 0x7fffffffffffffff, SEEK_END),
+              SyscallFailsWithErrno(EINVAL));
+}
+#endif
+
+TEST(LseekTest, Set) {
+  const std::string kFileData = "hello world\n";
+  const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+  char buf = '\0';
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+  ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(buf, kFileData.c_str()[0]);
+  EXPECT_THAT(lseek(fd.get(), 6, SEEK_SET), SyscallSucceedsWithValue(6));
+  ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(buf, kFileData.c_str()[6]);
+}
+
+TEST(LseekTest, Cur) {
+  const std::string kFileData = "hello world\n";
+  const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+  char buf = '\0';
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+  ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(buf, kFileData.c_str()[0]);
+  EXPECT_THAT(lseek(fd.get(), 3, SEEK_CUR), SyscallSucceedsWithValue(4));
+  ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(buf, kFileData.c_str()[4]);
+}
+
+TEST(LseekTest, End) {
+  const std::string kFileData = "hello world\n";
+  const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+  char buf = '\0';
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+  ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(buf, kFileData.c_str()[0]);
+  EXPECT_THAT(lseek(fd.get(), -2, SEEK_END), SyscallSucceedsWithValue(10));
+  ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(buf, kFileData.c_str()[kFileData.length() - 2]);
+}
+
+TEST(LseekTest, InvalidFD) {
+  EXPECT_THAT(lseek(-1, 0, SEEK_SET), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(LseekTest, DirCurEnd) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/tmp", O_RDONLY));
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+}
+
+TEST(LseekTest, ProcDir) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self", O_RDONLY));
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(LseekTest, ProcFile) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/meminfo", O_RDONLY));
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(LseekTest, SysDir) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/sys/devices", O_RDONLY));
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(LseekTest, SeekCurrentDir) {
+  // From include/linux/fs.h.
+  constexpr loff_t MAX_LFS_FILESIZE = 0x7fffffffffffffff;
+
+  char* dir = get_current_dir_name();
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir, O_RDONLY));
+
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_END),
+              // Some filesystems (like ext4) allow lseek(SEEK_END) on a
+              // directory and return MAX_LFS_FILESIZE, others return EINVAL.
+              AnyOf(SyscallSucceedsWithValue(MAX_LFS_FILESIZE),
+                    SyscallFailsWithErrno(EINVAL)));
+  free(dir);
+}
+
+TEST(LseekTest, ProcStatTwice) {
+  const FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY));
+  const FileDescriptor fd2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY));
+
+  ASSERT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+  ASSERT_THAT(lseek(fd1.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL));
+  ASSERT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceeds());
+  // Check that just because we moved fd1, fd2 doesn't move.
+  ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd3 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY));
+  ASSERT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+}
+
+TEST(LseekTest, EtcPasswdDup) {
+  const FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/etc/passwd", O_RDONLY));
+  const FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(fd1.Dup());
+
+  ASSERT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+  ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+  ASSERT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceeds());
+  // Check that just because we moved fd1, fd2 doesn't move.
+  ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(1000));
+
+  const FileDescriptor fd3 = ASSERT_NO_ERRNO_AND_VALUE(fd1.Dup());
+  ASSERT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(1000));
+}
+
+// TODO(magi): Add tests where we have donated in sockets.
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/madvise.cc b/test/syscalls/linux/madvise.cc
new file mode 100644
index 000000000..5a1973f60
--- /dev/null
+++ b/test/syscalls/linux/madvise.cc
@@ -0,0 +1,251 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void ExpectAllMappingBytes(Mapping const& m, char c) {
+  auto const v = m.view();
+  for (size_t i = 0; i < kPageSize; i++) {
+    ASSERT_EQ(v[i], c) << "at offset " << i;
+  }
+}
+
+// Equivalent to ExpectAllMappingBytes but async-signal-safe and with less
+// helpful failure messages.
+void CheckAllMappingBytes(Mapping const& m, char c) {
+  auto const v = m.view();
+  for (size_t i = 0; i < kPageSize; i++) {
+    TEST_CHECK_MSG(v[i] == c, "mapping contains wrong value");
+  }
+}
+
+TEST(MadviseDontneedTest, ZerosPrivateAnonPage) {
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  ExpectAllMappingBytes(m, 0);
+  memset(m.ptr(), 1, m.len());
+  ExpectAllMappingBytes(m, 1);
+  ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+  ExpectAllMappingBytes(m, 0);
+}
+
+TEST(MadviseDontneedTest, ZerosCOWAnonPageInCallerOnly) {
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  ExpectAllMappingBytes(m, 0);
+  memset(m.ptr(), 2, m.len());
+  ExpectAllMappingBytes(m, 2);
+
+  // Do madvise in a child process.
+  pid_t pid = fork();
+  CheckAllMappingBytes(m, 2);
+  if (pid == 0) {
+    TEST_PCHECK(madvise(m.ptr(), m.len(), MADV_DONTNEED) == 0);
+    CheckAllMappingBytes(m, 0);
+    _exit(0);
+  }
+
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  int status = 0;
+  ASSERT_THAT(waitpid(-1, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status));
+  EXPECT_EQ(WEXITSTATUS(status), 0);
+  // The child's madvise should not have affected the parent.
+  ExpectAllMappingBytes(m, 2);
+}
+
+TEST(MadviseDontneedTest, DoesNotModifySharedAnonPage) {
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED));
+  ExpectAllMappingBytes(m, 0);
+  memset(m.ptr(), 3, m.len());
+  ExpectAllMappingBytes(m, 3);
+  ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+  ExpectAllMappingBytes(m, 3);
+}
+
+TEST(MadviseDontneedTest, CleansPrivateFilePage) {
+  TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      /* parent = */ GetAbsoluteTestTmpdir(),
+      /* content = */ std::string(kPageSize, 4), TempPath::kDefaultFileMode));
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+
+  Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0));
+  ExpectAllMappingBytes(m, 4);
+  memset(m.ptr(), 5, m.len());
+  ExpectAllMappingBytes(m, 5);
+  ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+  ExpectAllMappingBytes(m, 4);
+}
+
+TEST(MadviseDontneedTest, DoesNotModifySharedFilePage) {
+  TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      /* parent = */ GetAbsoluteTestTmpdir(),
+      /* content = */ std::string(kPageSize, 6), TempPath::kDefaultFileMode));
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+
+  Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0));
+  ExpectAllMappingBytes(m, 6);
+  memset(m.ptr(), 7, m.len());
+  ExpectAllMappingBytes(m, 7);
+  ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+  ExpectAllMappingBytes(m, 7);
+}
+
+TEST(MadviseDontneedTest, IgnoresPermissions) {
+  auto m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE));
+  EXPECT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+}
+
+TEST(MadviseDontforkTest, AddressLength) {
+  auto m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE));
+  char* addr = static_cast<char*>(m.ptr());
+
+  // Address must be page aligned.
+  EXPECT_THAT(madvise(addr + 1, kPageSize, MADV_DONTFORK),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Zero length madvise always succeeds.
+  EXPECT_THAT(madvise(addr, 0, MADV_DONTFORK), SyscallSucceeds());
+
+  // Length must not roll over after rounding up.
+  size_t badlen = std::numeric_limits<std::size_t>::max() - (kPageSize / 2);
+  EXPECT_THAT(madvise(0, badlen, MADV_DONTFORK), SyscallFailsWithErrno(EINVAL));
+
+  // Length need not be page aligned - it is implicitly rounded up.
+  EXPECT_THAT(madvise(addr, 1, MADV_DONTFORK), SyscallSucceeds());
+  EXPECT_THAT(madvise(addr, kPageSize, MADV_DONTFORK), SyscallSucceeds());
+}
+
+TEST(MadviseDontforkTest, DontforkShared) {
+  // Mmap two shared file-backed pages and MADV_DONTFORK the second page.
+  TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      /* parent = */ GetAbsoluteTestTmpdir(),
+      /* content = */ std::string(kPageSize * 2, 2),
+      TempPath::kDefaultFileMode));
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+
+  Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0));
+
+  const Mapping ms1 = Mapping(reinterpret_cast<void*>(m.addr()), kPageSize);
+  const Mapping ms2 =
+      Mapping(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize);
+  m.release();
+
+  ASSERT_THAT(madvise(ms2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds());
+
+  const auto rest = [&] {
+    // First page is mapped in child and modifications are visible to parent
+    // via the shared mapping.
+    TEST_CHECK(IsMapped(ms1.addr()));
+    ExpectAllMappingBytes(ms1, 2);
+    memset(ms1.ptr(), 1, kPageSize);
+    ExpectAllMappingBytes(ms1, 1);
+
+    // Second page must not be mapped in child.
+    TEST_CHECK(!IsMapped(ms2.addr()));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+
+  ExpectAllMappingBytes(ms1, 1);  // page contents modified by child.
+  ExpectAllMappingBytes(ms2, 2);  // page contents unchanged.
+}
+
+TEST(MadviseDontforkTest, DontforkAnonPrivate) {
+  // Mmap three anonymous pages and MADV_DONTFORK the middle page.
+  Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize * 3, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  const Mapping mp1 = Mapping(reinterpret_cast<void*>(m.addr()), kPageSize);
+  const Mapping mp2 =
+      Mapping(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize);
+  const Mapping mp3 =
+      Mapping(reinterpret_cast<void*>(m.addr() + 2 * kPageSize), kPageSize);
+  m.release();
+
+  ASSERT_THAT(madvise(mp2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds());
+
+  // Verify that all pages are zeroed and memset the first, second and third
+  // pages to 1, 2, and 3 respectively.
+  ExpectAllMappingBytes(mp1, 0);
+  memset(mp1.ptr(), 1, kPageSize);
+
+  ExpectAllMappingBytes(mp2, 0);
+  memset(mp2.ptr(), 2, kPageSize);
+
+  ExpectAllMappingBytes(mp3, 0);
+  memset(mp3.ptr(), 3, kPageSize);
+
+  const auto rest = [&] {
+    // Verify first page is mapped, verify its contents and then modify the
+    // page. The mapping is private so the modifications are not visible to
+    // the parent.
+    TEST_CHECK(IsMapped(mp1.addr()));
+    ExpectAllMappingBytes(mp1, 1);
+    memset(mp1.ptr(), 11, kPageSize);
+    ExpectAllMappingBytes(mp1, 11);
+
+    // Verify second page is not mapped.
+    TEST_CHECK(!IsMapped(mp2.addr()));
+
+    // Verify third page is mapped, verify its contents and then modify the
+    // page. The mapping is private so the modifications are not visible to
+    // the parent.
+    TEST_CHECK(IsMapped(mp3.addr()));
+    ExpectAllMappingBytes(mp3, 3);
+    memset(mp3.ptr(), 13, kPageSize);
+    ExpectAllMappingBytes(mp3, 13);
+  };
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+
+  // The fork and COW by child should not affect the parent mappings.
+  ExpectAllMappingBytes(mp1, 1);
+  ExpectAllMappingBytes(mp2, 2);
+  ExpectAllMappingBytes(mp3, 3);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc
new file mode 100644
index 000000000..f8b7f7938
--- /dev/null
+++ b/test/syscalls/linux/memfd.cc
@@ -0,0 +1,557 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/magic.h>
+#include <linux/memfd.h>
+#include <linux/unistd.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/statfs.h>
+#include <sys/syscall.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// The header sys/memfd.h isn't available on all systems, so redefining some of
+// the constants here.
+#define F_LINUX_SPECIFIC_BASE 1024
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#endif /* F_ADD_SEALS */
+
+#ifndef F_GET_SEALS
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+#endif /* F_GET_SEALS */
+
+#define F_SEAL_SEAL 0x0001
+#define F_SEAL_SHRINK 0x0002
+#define F_SEAL_GROW 0x0004
+#define F_SEAL_WRITE 0x0008
+
+using ::testing::StartsWith;
+
+const std::string kMemfdName = "some-memfd";
+
+int memfd_create(const std::string& name, unsigned int flags) {
+  return syscall(__NR_memfd_create, name.c_str(), flags);
+}
+
+PosixErrorOr<FileDescriptor> MemfdCreate(const std::string& name,
+                                         uint32_t flags) {
+  int fd = memfd_create(name, flags);
+  if (fd < 0) {
+    return PosixError(
+        errno, absl::StrFormat("memfd_create(\"%s\", %#x)", name, flags));
+  }
+  MaybeSave();
+  return FileDescriptor(fd);
+}
+
+// Procfs entries for memfds display the appropriate name.
+TEST(MemfdTest, Name) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0));
+  const std::string proc_name = ASSERT_NO_ERRNO_AND_VALUE(
+      ReadLink(absl::StrFormat("/proc/self/fd/%d", memfd.get())));
+  EXPECT_THAT(proc_name, StartsWith("/memfd:" + kMemfdName));
+}
+
+// Memfds support read/write syscalls.
+TEST(MemfdTest, WriteRead) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0));
+
+  // Write a random page of data to the memfd via write(2).
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Read back the same data and verify.
+  std::vector<char> buf2(kPageSize);
+  ASSERT_THAT(lseek(memfd.get(), 0, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(read(memfd.get(), buf2.data(), buf2.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(buf, buf2);
+}
+
+// Memfds can be mapped and used as usual.
+TEST(MemfdTest, Mmap) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0));
+  const Mapping m1 = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0));
+
+  // Write a random page of data to the memfd via mmap m1.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds());
+  memcpy(m1.ptr(), buf.data(), buf.size());
+
+  // Read the data back via a read syscall on the memfd.
+  std::vector<char> buf2(kPageSize);
+  EXPECT_THAT(read(memfd.get(), buf2.data(), buf2.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(buf, buf2);
+
+  // The same data should be accessible via a new mapping m2.
+  const Mapping m2 = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0));
+  EXPECT_EQ(0, memcmp(m1.ptr(), m2.ptr(), kPageSize));
+}
+
+TEST(MemfdTest, DuplicateFDsShareContent) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0));
+  const Mapping m1 = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0));
+  const FileDescriptor memfd2 = ASSERT_NO_ERRNO_AND_VALUE(memfd.Dup());
+
+  // Write a random page of data to the memfd via mmap m1.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds());
+  memcpy(m1.ptr(), buf.data(), buf.size());
+
+  // Read the data back via a read syscall on a duplicate fd.
+  std::vector<char> buf2(kPageSize);
+  EXPECT_THAT(read(memfd2.get(), buf2.data(), buf2.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(buf, buf2);
+}
+
+// File seals are disabled by default on memfds.
+TEST(MemfdTest, SealingDisabledByDefault) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0));
+  EXPECT_THAT(fcntl(memfd.get(), F_GET_SEALS),
+              SyscallSucceedsWithValue(F_SEAL_SEAL));
+  // Attempting to set any seal should fail.
+  EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE),
+              SyscallFailsWithErrno(EPERM));
+}
+
+// Seals can be retrieved and updated for memfds.
+TEST(MemfdTest, SealsGetSet) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  int seals;
+  ASSERT_THAT(seals = fcntl(memfd.get(), F_GET_SEALS), SyscallSucceeds());
+  // No seals are set yet.
+  EXPECT_EQ(0, seals);
+
+  // Set a seal and check that we can get it back.
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds());
+  EXPECT_THAT(fcntl(memfd.get(), F_GET_SEALS),
+              SyscallSucceedsWithValue(F_SEAL_WRITE));
+
+  // Set some more seals and verify.
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK),
+              SyscallSucceeds());
+  EXPECT_THAT(
+      fcntl(memfd.get(), F_GET_SEALS),
+      SyscallSucceedsWithValue(F_SEAL_WRITE | F_SEAL_GROW | F_SEAL_SHRINK));
+
+  // Attempting to set a seal that is already set is a no-op.
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds());
+  EXPECT_THAT(
+      fcntl(memfd.get(), F_GET_SEALS),
+      SyscallSucceedsWithValue(F_SEAL_WRITE | F_SEAL_GROW | F_SEAL_SHRINK));
+
+  // Add remaining seals and verify.
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_SEAL), SyscallSucceeds());
+  EXPECT_THAT(fcntl(memfd.get(), F_GET_SEALS),
+              SyscallSucceedsWithValue(F_SEAL_WRITE | F_SEAL_GROW |
+                                       F_SEAL_SHRINK | F_SEAL_SEAL));
+}
+
+// F_SEAL_GROW prevents a memfd from being grown using ftruncate.
+TEST(MemfdTest, SealGrowWithTruncate) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds());
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds());
+
+  // Try grow the memfd by 1 page.
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize * 2),
+              SyscallFailsWithErrno(EPERM));
+
+  // Ftruncate calls that don't actually grow the memfd are allowed.
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds());
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize / 2), SyscallSucceeds());
+
+  // After shrinking, growing back is not allowed.
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallFailsWithErrno(EPERM));
+}
+
+// F_SEAL_GROW prevents a memfd from being grown using the write syscall.
+TEST(MemfdTest, SealGrowWithWrite) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+
+  // Initially, writing to the memfd succeeds.
+  const std::vector<char> buf(kPageSize);
+  EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Apply F_SEAL_GROW, subsequent writes which extend the memfd should fail.
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds());
+  EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallFailsWithErrno(EPERM));
+
+  // However, zero-length writes are ok since they don't grow the memfd.
+  EXPECT_THAT(write(memfd.get(), buf.data(), 0), SyscallSucceeds());
+
+  // Writing to existing parts of the memfd is also ok.
+  ASSERT_THAT(lseek(memfd.get(), 0, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Returning the end of the file and writing still not allowed.
+  EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallFailsWithErrno(EPERM));
+}
+
+// F_SEAL_GROW causes writes which partially extend off the current EOF to
+// partially succeed, up to the page containing the EOF.
+TEST(MemfdTest, SealGrowPartialWriteTruncated) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds());
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds());
+
+  // FD offset: 1 page, EOF: 1 page.
+
+  ASSERT_THAT(lseek(memfd.get(), kPageSize * 3 / 4, SEEK_SET),
+              SyscallSucceeds());
+
+  // FD offset: 3/4 page. Writing a full page now should only write 1/4 page
+  // worth of data. This partially succeeds because the first page is entirely
+  // within the file and requires no growth, but attempting to write the final
+  // 3/4 page would require growing the file.
+  const std::vector<char> buf(kPageSize);
+  EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize / 4));
+}
+
+// F_SEAL_GROW causes writes which partially extend off the current EOF to fail
+// in its entirety if the only data written would be to the page containing the
+// EOF.
+TEST(MemfdTest, SealGrowPartialWriteTruncatedSamePage) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize * 3 / 4), SyscallSucceeds());
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds());
+
+  // EOF: 3/4 page, writing 1/2 page starting at 1/2 page would cause the file
+  // to grow. Since this would require only the page containing the EOF to be
+  // modified, the write is rejected entirely.
+  const std::vector<char> buf(kPageSize / 2);
+  EXPECT_THAT(pwrite(memfd.get(), buf.data(), buf.size(), kPageSize / 2),
+              SyscallFailsWithErrno(EPERM));
+
+  // However, writing up to EOF is fine.
+  EXPECT_THAT(pwrite(memfd.get(), buf.data(), buf.size() / 2, kPageSize / 2),
+              SyscallSucceedsWithValue(kPageSize / 4));
+}
+
+// F_SEAL_SHRINK prevents a memfd from being shrunk using ftruncate.
+TEST(MemfdTest, SealShrink) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds());
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_SHRINK),
+              SyscallSucceeds());
+
+  // Shrink by half a page.
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize / 2),
+              SyscallFailsWithErrno(EPERM));
+
+  // Ftruncate calls that don't actually shrink the file are allowed.
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallSucceeds());
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize * 2), SyscallSucceeds());
+
+  // After growing, shrinking is still not allowed.
+  ASSERT_THAT(ftruncate(memfd.get(), kPageSize), SyscallFailsWithErrno(EPERM));
+}
+
+// F_SEAL_WRITE prevents a memfd from being written to through a write
+// syscall.
+TEST(MemfdTest, SealWriteWithWrite) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  const std::vector<char> buf(kPageSize);
+  ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds());
+
+  // Attemping to write at the end of the file fails.
+  EXPECT_THAT(write(memfd.get(), buf.data(), 1), SyscallFailsWithErrno(EPERM));
+
+  // Attemping to overwrite an existing part of the memfd fails.
+  EXPECT_THAT(pwrite(memfd.get(), buf.data(), 1, 0),
+              SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(pwrite(memfd.get(), buf.data(), buf.size() / 2, kPageSize / 2),
+              SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(pwrite(memfd.get(), buf.data(), buf.size(), kPageSize / 2),
+              SyscallFailsWithErrno(EPERM));
+
+  // Zero-length writes however do not fail.
+  EXPECT_THAT(write(memfd.get(), buf.data(), 0), SyscallSucceeds());
+}
+
+// F_SEAL_WRITE prevents a memfd from being written to through an mmap.
+TEST(MemfdTest, SealWriteWithMmap) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  const std::vector<char> buf(kPageSize);
+  ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds());
+
+  // Can't create a shared mapping with writes sealed.
+  void* ret = mmap(nullptr, kPageSize, PROT_WRITE, MAP_SHARED, memfd.get(), 0);
+  EXPECT_EQ(ret, MAP_FAILED);
+  EXPECT_EQ(errno, EPERM);
+  ret = mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, memfd.get(), 0);
+  EXPECT_EQ(ret, MAP_FAILED);
+  EXPECT_EQ(errno, EPERM);
+
+  // However, private mappings are ok.
+  EXPECT_NO_ERRNO(Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+                       memfd.get(), 0));
+}
+
+// Adding F_SEAL_WRITE fails when there are outstanding writable mappings to a
+// memfd.
+TEST(MemfdTest, SealWriteWithOutstandingWritbleMapping) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  const std::vector<char> buf(kPageSize);
+  ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Attempting to add F_SEAL_WRITE with active shared mapping with any set of
+  // permissions fails.
+
+  // Read-only shared mapping.
+  {
+    const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+        Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, memfd.get(), 0));
+    EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE),
+                SyscallFailsWithErrno(EBUSY));
+  }
+
+  // Write-only shared mapping.
+  {
+    const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+        Mmap(nullptr, kPageSize, PROT_WRITE, MAP_SHARED, memfd.get(), 0));
+    EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE),
+                SyscallFailsWithErrno(EBUSY));
+  }
+
+  // Read-write shared mapping.
+  {
+    const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+        Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+             memfd.get(), 0));
+    EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE),
+                SyscallFailsWithErrno(EBUSY));
+  }
+
+  // F_SEAL_WRITE can be set with private mappings with any permissions.
+  {
+    const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+        Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+             memfd.get(), 0));
+    EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE),
+                SyscallSucceeds());
+  }
+}
+
+// When applying F_SEAL_WRITE fails due to outstanding writable mappings, any
+// additional seals passed to the same add seal call are also rejected.
+TEST(MemfdTest, NoPartialSealApplicationWhenWriteSealRejected) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0));
+
+  // Try add some seals along with F_SEAL_WRITE. The seal application should
+  // fail since there exists an active shared mapping.
+  EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE | F_SEAL_GROW),
+              SyscallFailsWithErrno(EBUSY));
+
+  // None of the seals should be applied.
+  EXPECT_THAT(fcntl(memfd.get(), F_GET_SEALS), SyscallSucceedsWithValue(0));
+}
+
+// Seals are inode level properties, and apply to all file descriptors referring
+// to a memfd.
+TEST(MemfdTest, SealsAreInodeLevelProperties) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  const FileDescriptor memfd2 = ASSERT_NO_ERRNO_AND_VALUE(memfd.Dup());
+
+  // Add seal through the original memfd, and verify that it appears on the
+  // dupped fd.
+  ASSERT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds());
+  EXPECT_THAT(fcntl(memfd2.get(), F_GET_SEALS),
+              SyscallSucceedsWithValue(F_SEAL_WRITE));
+
+  // Verify the seal actually applies to both fds.
+  std::vector<char> buf(kPageSize);
+  EXPECT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(write(memfd2.get(), buf.data(), buf.size()),
+              SyscallFailsWithErrno(EPERM));
+
+  // Seals are enforced on new FDs that are dupped after the seal is already
+  // applied.
+  const FileDescriptor memfd3 = ASSERT_NO_ERRNO_AND_VALUE(memfd2.Dup());
+  EXPECT_THAT(write(memfd3.get(), buf.data(), buf.size()),
+              SyscallFailsWithErrno(EPERM));
+
+  // Try a new seal applied to one of the dupped fds.
+  ASSERT_THAT(fcntl(memfd3.get(), F_ADD_SEALS, F_SEAL_GROW), SyscallSucceeds());
+  EXPECT_THAT(ftruncate(memfd.get(), kPageSize), SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(ftruncate(memfd2.get(), kPageSize), SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(ftruncate(memfd3.get(), kPageSize), SyscallFailsWithErrno(EPERM));
+}
+
+PosixErrorOr<bool> IsTmpfs(const std::string& path) {
+  struct statfs stat;
+  if (statfs(path.c_str(), &stat)) {
+    if (errno == ENOENT) {
+      // Nothing at path, don't raise this as an error. Instead, just report no
+      // tmpfs at path.
+      return false;
+    }
+    return PosixError(errno,
+                      absl::StrFormat("statfs(\"%s\", %#p)", path, &stat));
+  }
+  return stat.f_type == TMPFS_MAGIC;
+}
+
+// Tmpfs files also support seals, but are created with F_SEAL_SEAL.
+TEST(MemfdTest, TmpfsFilesHaveSealSeal) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs("/tmp")));
+  const TempPath tmpfs_file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn("/tmp"));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfs_file.path(), O_RDWR, 0644));
+  EXPECT_THAT(fcntl(fd.get(), F_GET_SEALS),
+              SyscallSucceedsWithValue(F_SEAL_SEAL));
+}
+
+// Can open a memfd from procfs and use as normal.
+TEST(MemfdTest, CanOpenFromProcfs) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+
+  // Write a random page of data to the memfd via write(2).
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Read back the same data from the fd obtained from procfs and verify.
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(absl::StrFormat("/proc/self/fd/%d", memfd.get()), O_RDWR));
+  std::vector<char> buf2(kPageSize);
+  EXPECT_THAT(pread(fd.get(), buf2.data(), buf2.size(), 0),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(buf, buf2);
+}
+
+// Test that memfd permissions are set up correctly to allow another process to
+// open it from procfs.
+TEST(MemfdTest, OtherProcessCanOpenFromProcfs) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+  const auto memfd_path =
+      absl::StrFormat("/proc/%d/fd/%d", getpid(), memfd.get());
+  const auto rest = [&] {
+    int fd = open(memfd_path.c_str(), O_RDWR);
+    TEST_PCHECK(fd >= 0);
+    TEST_PCHECK(close(fd) >= 0);
+  };
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+// Test that only files opened as writable can have seals applied to them.
+// Normally there's no way to specify file permissions on memfds, but we can
+// obtain a read-only memfd by opening the corresponding procfs fd entry as
+// read-only.
+TEST(MemfdTest, MemfdMustBeWritableToModifySeals) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, MFD_ALLOW_SEALING));
+
+  // Initially adding a seal works.
+  EXPECT_THAT(fcntl(memfd.get(), F_ADD_SEALS, F_SEAL_WRITE), SyscallSucceeds());
+
+  // Re-open the memfd as read-only from procfs.
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(absl::StrFormat("/proc/self/fd/%d", memfd.get()), O_RDONLY));
+
+  // Can't add seals through an unwritable fd.
+  EXPECT_THAT(fcntl(fd.get(), F_ADD_SEALS, F_SEAL_GROW),
+              SyscallFailsWithErrno(EPERM));
+}
+
+// Test that the memfd implementation internally tracks potentially writable
+// maps correctly.
+TEST(MemfdTest, MultipleWritableAndNonWritableRefsToSameFileRegion) {
+  const FileDescriptor memfd =
+      ASSERT_NO_ERRNO_AND_VALUE(MemfdCreate(kMemfdName, 0));
+
+  // Populate with a random page of data.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(memfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Read-only map to the page. This should cause an initial mapping to be
+  // created.
+  Mapping m1 = ASSERT_NO_ERRNO_AND_VALUE(
+      Mmap(nullptr, kPageSize, PROT_READ, MAP_PRIVATE, memfd.get(), 0));
+
+  // Create a shared writable map to the page. This should cause the internal
+  // mapping to become potentially writable.
+  Mapping m2 = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, memfd.get(), 0));
+
+  // Drop the read-only mapping first. If writable-ness isn't tracked correctly,
+  // this can cause some misaccounting, which can trigger asserts internally.
+  m1.reset();
+  m2.reset();
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/memory_accounting.cc b/test/syscalls/linux/memory_accounting.cc
new file mode 100644
index 000000000..94aea4077
--- /dev/null
+++ b/test/syscalls/linux/memory_accounting.cc
@@ -0,0 +1,99 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mman.h>
+
+#include <map>
+
+#include "gtest/gtest.h"
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_split.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using ::absl::StrFormat;
+
+// AnonUsageFromMeminfo scrapes the current anonymous memory usage from
+// /proc/meminfo and returns it in bytes.
+PosixErrorOr<uint64_t> AnonUsageFromMeminfo() {
+  ASSIGN_OR_RETURN_ERRNO(auto meminfo, GetContents("/proc/meminfo"));
+  std::vector<std::string> lines(absl::StrSplit(meminfo, '\n'));
+
+  // Try to find AnonPages line, the format is AnonPages:\\s+(\\d+) kB\n.
+  for (const auto& line : lines) {
+    if (!absl::StartsWith(line, "AnonPages:")) {
+      continue;
+    }
+
+    std::vector<std::string> parts(
+        absl::StrSplit(line, ' ', absl::SkipEmpty()));
+    if (parts.size() == 3) {
+      // The size is the second field, let's try to parse it as a number.
+      ASSIGN_OR_RETURN_ERRNO(auto anon_kb, Atoi<uint64_t>(parts[1]));
+      return anon_kb * 1024;
+    }
+
+    return PosixError(EINVAL, "AnonPages field in /proc/meminfo was malformed");
+  }
+
+  return PosixError(EINVAL, "AnonPages field not found in /proc/meminfo");
+}
+
+TEST(MemoryAccounting, AnonAccountingPreservedOnSaveRestore) {
+  // This test isn't meaningful on Linux. /proc/meminfo reports system-wide
+  // memory usage, which can change arbitrarily in Linux from other activity on
+  // the machine. In gvisor, this test is the only thing running on the
+  // "machine", so values in /proc/meminfo accurately reflect the memory used by
+  // the test.
+  SKIP_IF(!IsRunningOnGvisor());
+
+  uint64_t anon_initial = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo());
+
+  // Cause some anonymous memory usage.
+  uint64_t map_bytes = Megabytes(512);
+  char* mem =
+      static_cast<char*>(mmap(nullptr, map_bytes, PROT_READ | PROT_WRITE,
+                              MAP_POPULATE | MAP_ANON | MAP_PRIVATE, -1, 0));
+  ASSERT_NE(mem, MAP_FAILED)
+      << "Map failed, errno: " << errno << " (" << strerror(errno) << ").";
+
+  // Write something to each page to prevent them from being decommited on
+  // S/R. Zero pages are dropped on save.
+  for (uint64_t i = 0; i < map_bytes; i += kPageSize) {
+    mem[i] = 'a';
+  }
+
+  uint64_t anon_after_alloc = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo());
+  EXPECT_THAT(anon_after_alloc,
+              EquivalentWithin(anon_initial + map_bytes, 0.03));
+
+  // We have many implicit S/R cycles from scraping /proc/meminfo throughout the
+  // test, but throw an explicit S/R in here as well.
+  MaybeSave();
+
+  // Usage should remain the same across S/R.
+  uint64_t anon_after_sr = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo());
+  EXPECT_THAT(anon_after_sr, EquivalentWithin(anon_after_alloc, 0.03));
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/mempolicy.cc b/test/syscalls/linux/mempolicy.cc
new file mode 100644
index 000000000..059fad598
--- /dev/null
+++ b/test/syscalls/linux/mempolicy.cc
@@ -0,0 +1,289 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/syscall.h>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "test/util/cleanup.h"
+#include "test/util/memory_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#define BITS_PER_BYTE 8
+
+#define MPOL_F_STATIC_NODES (1 << 15)
+#define MPOL_F_RELATIVE_NODES (1 << 14)
+#define MPOL_DEFAULT 0
+#define MPOL_PREFERRED 1
+#define MPOL_BIND 2
+#define MPOL_INTERLEAVE 3
+#define MPOL_LOCAL 4
+#define MPOL_F_NODE (1 << 0)
+#define MPOL_F_ADDR (1 << 1)
+#define MPOL_F_MEMS_ALLOWED (1 << 2)
+#define MPOL_MF_STRICT (1 << 0)
+#define MPOL_MF_MOVE (1 << 1)
+#define MPOL_MF_MOVE_ALL (1 << 2)
+
+int get_mempolicy(int* policy, uint64_t* nmask, uint64_t maxnode, void* addr,
+                  int flags) {
+  return syscall(SYS_get_mempolicy, policy, nmask, maxnode, addr, flags);
+}
+
+int set_mempolicy(int mode, uint64_t* nmask, uint64_t maxnode) {
+  return syscall(SYS_set_mempolicy, mode, nmask, maxnode);
+}
+
+int mbind(void* addr, unsigned long len, int mode,
+          const unsigned long* nodemask, unsigned long maxnode,
+          unsigned flags) {
+  return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
+}
+
+// Creates a cleanup object that resets the calling thread's mempolicy to the
+// system default when the calling scope ends.
+Cleanup ScopedMempolicy() {
+  return Cleanup([] {
+    EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, nullptr, 0), SyscallSucceeds());
+  });
+}
+
+// Temporarily change the memory policy for the calling thread within the
+// caller's scope.
+PosixErrorOr<Cleanup> ScopedSetMempolicy(int mode, uint64_t* nmask,
+                                         uint64_t maxnode) {
+  if (set_mempolicy(mode, nmask, maxnode)) {
+    return PosixError(errno, "set_mempolicy");
+  }
+  return ScopedMempolicy();
+}
+
+TEST(MempolicyTest, CheckDefaultPolicy) {
+  int mode = 0;
+  uint64_t nodemask = 0;
+  ASSERT_THAT(get_mempolicy(&mode, &nodemask, sizeof(nodemask) * BITS_PER_BYTE,
+                            nullptr, 0),
+              SyscallSucceeds());
+
+  EXPECT_EQ(MPOL_DEFAULT, mode);
+  EXPECT_EQ(0x0, nodemask);
+}
+
+TEST(MempolicyTest, PolicyPreservedAfterSetMempolicy) {
+  uint64_t nodemask = 0x1;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy(
+      MPOL_BIND, &nodemask, sizeof(nodemask) * BITS_PER_BYTE));
+
+  int mode = 0;
+  uint64_t nodemask_after = 0x0;
+  ASSERT_THAT(get_mempolicy(&mode, &nodemask_after,
+                            sizeof(nodemask_after) * BITS_PER_BYTE, nullptr, 0),
+              SyscallSucceeds());
+  EXPECT_EQ(MPOL_BIND, mode);
+  EXPECT_EQ(0x1, nodemask_after);
+
+  // Try throw in some mode flags.
+  for (auto mode_flag : {MPOL_F_STATIC_NODES, MPOL_F_RELATIVE_NODES}) {
+    auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+        ScopedSetMempolicy(MPOL_INTERLEAVE | mode_flag, &nodemask,
+                           sizeof(nodemask) * BITS_PER_BYTE));
+    mode = 0;
+    nodemask_after = 0x0;
+    ASSERT_THAT(
+        get_mempolicy(&mode, &nodemask_after,
+                      sizeof(nodemask_after) * BITS_PER_BYTE, nullptr, 0),
+        SyscallSucceeds());
+    EXPECT_EQ(MPOL_INTERLEAVE | mode_flag, mode);
+    EXPECT_EQ(0x1, nodemask_after);
+  }
+}
+
+TEST(MempolicyTest, SetMempolicyRejectsInvalidInputs) {
+  auto cleanup = ScopedMempolicy();
+  uint64_t nodemask;
+
+  if (IsRunningOnGvisor()) {
+    // Invalid nodemask, we only support a single node on gvisor.
+    nodemask = 0x4;
+    ASSERT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask,
+                              sizeof(nodemask) * BITS_PER_BYTE),
+                SyscallFailsWithErrno(EINVAL));
+  }
+
+  nodemask = 0x1;
+
+  // Invalid mode.
+  ASSERT_THAT(set_mempolicy(7439, &nodemask, sizeof(nodemask) * BITS_PER_BYTE),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Invalid nodemask size.
+  ASSERT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, 0),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Invalid mode flag.
+  ASSERT_THAT(
+      set_mempolicy(MPOL_DEFAULT | MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES,
+                    &nodemask, sizeof(nodemask) * BITS_PER_BYTE),
+      SyscallFailsWithErrno(EINVAL));
+
+  // MPOL_INTERLEAVE with empty nodemask.
+  nodemask = 0x0;
+  ASSERT_THAT(set_mempolicy(MPOL_INTERLEAVE, &nodemask,
+                            sizeof(nodemask) * BITS_PER_BYTE),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// The manpages specify that the nodemask provided to set_mempolicy are
+// considered empty if the nodemask pointer is null, or if the nodemask size is
+// 0. We use a policy which accepts both empty and non-empty nodemasks
+// (MPOL_PREFERRED), a policy which requires a non-empty nodemask (MPOL_BIND),
+// and a policy which completely ignores the nodemask (MPOL_DEFAULT) to verify
+// argument checking around nodemasks.
+TEST(MempolicyTest, EmptyNodemaskOnSet) {
+  auto cleanup = ScopedMempolicy();
+
+  EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, nullptr, 1), SyscallSucceeds());
+  EXPECT_THAT(set_mempolicy(MPOL_BIND, nullptr, 1),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(set_mempolicy(MPOL_PREFERRED, nullptr, 1), SyscallSucceeds());
+
+  uint64_t nodemask = 0x1;
+  EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, 0),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(set_mempolicy(MPOL_BIND, &nodemask, 0),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(set_mempolicy(MPOL_PREFERRED, &nodemask, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(MempolicyTest, QueryAvailableNodes) {
+  uint64_t nodemask = 0;
+  ASSERT_THAT(
+      get_mempolicy(nullptr, &nodemask, sizeof(nodemask) * BITS_PER_BYTE,
+                    nullptr, MPOL_F_MEMS_ALLOWED),
+      SyscallSucceeds());
+  // We can only be sure there is a single node if running on gvisor.
+  if (IsRunningOnGvisor()) {
+    EXPECT_EQ(0x1, nodemask);
+  }
+
+  // MPOL_F_ADDR and MPOL_F_NODE flags may not be combined with
+  // MPOL_F_MEMS_ALLLOWED.
+  for (auto flags :
+       {MPOL_F_MEMS_ALLOWED | MPOL_F_ADDR, MPOL_F_MEMS_ALLOWED | MPOL_F_NODE,
+        MPOL_F_MEMS_ALLOWED | MPOL_F_ADDR | MPOL_F_NODE}) {
+    ASSERT_THAT(get_mempolicy(nullptr, &nodemask,
+                              sizeof(nodemask) * BITS_PER_BYTE, nullptr, flags),
+                SyscallFailsWithErrno(EINVAL));
+  }
+}
+
+TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) {
+  uint64_t dummy_stack_address;
+  auto dummy_heap_address = absl::make_unique<uint64_t>();
+  int mode;
+
+  for (auto ptr : {&dummy_stack_address, dummy_heap_address.get()}) {
+    mode = -1;
+    ASSERT_THAT(
+        get_mempolicy(&mode, nullptr, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE),
+        SyscallSucceeds());
+    // If we're not running on gvisor, the address may be allocated on a
+    // different numa node.
+    if (IsRunningOnGvisor()) {
+      EXPECT_EQ(0, mode);
+    }
+  }
+
+  void* invalid_address = reinterpret_cast<void*>(-1);
+
+  // Invalid address.
+  ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, invalid_address,
+                            MPOL_F_ADDR | MPOL_F_NODE),
+              SyscallFailsWithErrno(EFAULT));
+
+  // Invalid mode pointer.
+  ASSERT_THAT(get_mempolicy(reinterpret_cast<int*>(invalid_address), nullptr, 0,
+                            &dummy_stack_address, MPOL_F_ADDR | MPOL_F_NODE),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(MempolicyTest, GetMempolicyCanOmitPointers) {
+  int mode;
+  uint64_t nodemask;
+
+  // Omit nodemask pointer.
+  ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, 0), SyscallSucceeds());
+  // Omit mode pointer.
+  ASSERT_THAT(get_mempolicy(nullptr, &nodemask,
+                            sizeof(nodemask) * BITS_PER_BYTE, nullptr, 0),
+              SyscallSucceeds());
+  // Omit both pointers.
+  ASSERT_THAT(get_mempolicy(nullptr, nullptr, 0, nullptr, 0),
+              SyscallSucceeds());
+}
+
+TEST(MempolicyTest, GetMempolicyNextInterleaveNode) {
+  int mode;
+  // Policy for thread not yet set to MPOL_INTERLEAVE, can't query for
+  // the next node which will be used for allocation.
+  ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, MPOL_F_NODE),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Set default policy for thread to MPOL_INTERLEAVE.
+  uint64_t nodemask = 0x1;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy(
+      MPOL_INTERLEAVE, &nodemask, sizeof(nodemask) * BITS_PER_BYTE));
+
+  mode = -1;
+  ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, MPOL_F_NODE),
+              SyscallSucceeds());
+  EXPECT_EQ(0, mode);
+}
+
+TEST(MempolicyTest, Mbind) {
+  // Temporarily set the thread policy to MPOL_PREFERRED.
+  const auto cleanup_thread_policy =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy(MPOL_PREFERRED, nullptr, 0));
+
+  const auto mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS));
+
+  // vmas default to MPOL_DEFAULT irrespective of the thread policy (currently
+  // MPOL_PREFERRED).
+  int mode;
+  ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, mapping.ptr(), MPOL_F_ADDR),
+              SyscallSucceeds());
+  EXPECT_EQ(mode, MPOL_DEFAULT);
+
+  // Set MPOL_PREFERRED for the vma and read it back.
+  ASSERT_THAT(
+      mbind(mapping.ptr(), mapping.len(), MPOL_PREFERRED, nullptr, 0, 0),
+      SyscallSucceeds());
+  ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, mapping.ptr(), MPOL_F_ADDR),
+              SyscallSucceeds());
+  EXPECT_EQ(mode, MPOL_PREFERRED);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/mincore.cc b/test/syscalls/linux/mincore.cc
new file mode 100644
index 000000000..5c1240c89
--- /dev/null
+++ b/test/syscalls/linux/mincore.cc
@@ -0,0 +1,96 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+size_t CountSetLSBs(std::vector<unsigned char> const& vec) {
+  return std::count_if(begin(vec), end(vec),
+                       [](unsigned char c) { return (c & 1) != 0; });
+}
+
+TEST(MincoreTest, DirtyAnonPagesAreResident) {
+  constexpr size_t kTestPageCount = 10;
+  auto const kTestMappingBytes = kTestPageCount * kPageSize;
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  memset(m.ptr(), 0, m.len());
+
+  std::vector<unsigned char> vec(kTestPageCount, 0);
+  ASSERT_THAT(mincore(m.ptr(), kTestMappingBytes, vec.data()),
+              SyscallSucceeds());
+  EXPECT_EQ(kTestPageCount, CountSetLSBs(vec));
+}
+
+TEST(MincoreTest, UnalignedAddressFails) {
+  // Map and touch two pages, then try to mincore the second half of the first
+  // page + the first half of the second page. Both pages are mapped, but
+  // mincore should return EINVAL due to the misaligned start address.
+  constexpr size_t kTestPageCount = 2;
+  auto const kTestMappingBytes = kTestPageCount * kPageSize;
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  memset(m.ptr(), 0, m.len());
+
+  std::vector<unsigned char> vec(kTestPageCount, 0);
+  EXPECT_THAT(mincore(reinterpret_cast<void*>(m.addr() + kPageSize / 2),
+                      kPageSize, vec.data()),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(MincoreTest, UnalignedLengthSucceedsAndIsRoundedUp) {
+  // Map and touch two pages, then try to mincore the first page + the first
+  // half of the second page. mincore should silently round up the length to
+  // include both pages.
+  constexpr size_t kTestPageCount = 2;
+  auto const kTestMappingBytes = kTestPageCount * kPageSize;
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  memset(m.ptr(), 0, m.len());
+
+  std::vector<unsigned char> vec(kTestPageCount, 0);
+  ASSERT_THAT(mincore(m.ptr(), kPageSize + kPageSize / 2, vec.data()),
+              SyscallSucceeds());
+  EXPECT_EQ(kTestPageCount, CountSetLSBs(vec));
+}
+
+TEST(MincoreTest, ZeroLengthSucceedsAndAllowsAnyVecBelowTaskSize) {
+  EXPECT_THAT(mincore(nullptr, 0, nullptr), SyscallSucceeds());
+}
+
+TEST(MincoreTest, InvalidLengthFails) {
+  EXPECT_THAT(mincore(nullptr, -1, nullptr), SyscallFailsWithErrno(ENOMEM));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/mkdir.cc b/test/syscalls/linux/mkdir.cc
new file mode 100644
index 000000000..4036a9275
--- /dev/null
+++ b/test/syscalls/linux/mkdir.cc
@@ -0,0 +1,88 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/temp_umask.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class MkdirTest : public ::testing::Test {
+ protected:
+  // SetUp creates various configurations of files.
+  void SetUp() override { dirname_ = NewTempAbsPath(); }
+
+  // TearDown unlinks created files.
+  void TearDown() override {
+    EXPECT_THAT(rmdir(dirname_.c_str()), SyscallSucceeds());
+  }
+
+  std::string dirname_;
+};
+
+TEST_F(MkdirTest, CanCreateWritableDir) {
+  ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds());
+  std::string filename = JoinPath(dirname_, "anything");
+  int fd;
+  ASSERT_THAT(fd = open(filename.c_str(), O_RDWR | O_CREAT, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  ASSERT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+TEST_F(MkdirTest, HonorsUmask) {
+  constexpr mode_t kMask = 0111;
+  TempUmask mask(kMask);
+  ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds());
+  struct stat statbuf;
+  ASSERT_THAT(stat(dirname_.c_str(), &statbuf), SyscallSucceeds());
+  EXPECT_EQ(0777 & ~kMask, statbuf.st_mode & 0777);
+}
+
+TEST_F(MkdirTest, HonorsUmask2) {
+  constexpr mode_t kMask = 0142;
+  TempUmask mask(kMask);
+  ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds());
+  struct stat statbuf;
+  ASSERT_THAT(stat(dirname_.c_str(), &statbuf), SyscallSucceeds());
+  EXPECT_EQ(0777 & ~kMask, statbuf.st_mode & 0777);
+}
+
+TEST_F(MkdirTest, FailsOnDirWithoutWritePerms) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  ASSERT_THAT(mkdir(dirname_.c_str(), 0555), SyscallSucceeds());
+  auto dir = JoinPath(dirname_.c_str(), "foo");
+  EXPECT_THAT(mkdir(dir.c_str(), 0777), SyscallFailsWithErrno(EACCES));
+  EXPECT_THAT(open(JoinPath(dirname_, "file").c_str(), O_RDWR | O_CREAT, 0666),
+              SyscallFailsWithErrno(EACCES));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc
new file mode 100644
index 000000000..05dfb375a
--- /dev/null
+++ b/test/syscalls/linux/mknod.cc
@@ -0,0 +1,190 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(MknodTest, RegularFile) {
+  const std::string node0 = NewTempAbsPath();
+  EXPECT_THAT(mknod(node0.c_str(), S_IFREG, 0), SyscallSucceeds());
+
+  const std::string node1 = NewTempAbsPath();
+  EXPECT_THAT(mknod(node1.c_str(), 0, 0), SyscallSucceeds());
+}
+
+TEST(MknodTest, RegularFilePermissions) {
+  const std::string node = NewTempAbsPath();
+  mode_t newUmask = 0077;
+  umask(newUmask);
+
+  // Attempt to open file with mode 0777. Not specifying file type should create
+  // a regualar file.
+  mode_t perms = S_IRWXU | S_IRWXG | S_IRWXO;
+  EXPECT_THAT(mknod(node.c_str(), perms, 0), SyscallSucceeds());
+
+  // In the absence of a default ACL, the permissions of the created node are
+  // (mode & ~umask).  -- mknod(2)
+  mode_t wantPerms = perms & ~newUmask;
+  struct stat st;
+  ASSERT_THAT(stat(node.c_str(), &st), SyscallSucceeds());
+  ASSERT_EQ(st.st_mode & 0777, wantPerms);
+
+  // "Zero file type is equivalent to type S_IFREG." - mknod(2)
+  ASSERT_EQ(st.st_mode & S_IFMT, S_IFREG);
+}
+
+TEST(MknodTest, MknodAtFIFO) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string fifo_relpath = NewTempRelPath();
+  const std::string fifo = JoinPath(dir.path(), fifo_relpath);
+
+  const FileDescriptor dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path().c_str(), O_RDONLY));
+  ASSERT_THAT(mknodat(dirfd.get(), fifo_relpath.c_str(), S_IFIFO | S_IRUSR, 0),
+              SyscallSucceeds());
+
+  struct stat st;
+  ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
+  EXPECT_TRUE(S_ISFIFO(st.st_mode));
+}
+
+TEST(MknodTest, MknodOnExistingPathFails) {
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const TempPath slink = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path()));
+
+  EXPECT_THAT(mknod(file.path().c_str(), S_IFREG, 0),
+              SyscallFailsWithErrno(EEXIST));
+  EXPECT_THAT(mknod(file.path().c_str(), S_IFIFO, 0),
+              SyscallFailsWithErrno(EEXIST));
+  EXPECT_THAT(mknod(slink.path().c_str(), S_IFREG, 0),
+              SyscallFailsWithErrno(EEXIST));
+  EXPECT_THAT(mknod(slink.path().c_str(), S_IFIFO, 0),
+              SyscallFailsWithErrno(EEXIST));
+}
+
+TEST(MknodTest, UnimplementedTypesReturnError) {
+  const std::string path = NewTempAbsPath();
+
+  if (IsRunningWithVFS1()) {
+    ASSERT_THAT(mknod(path.c_str(), S_IFSOCK, 0),
+                SyscallFailsWithErrno(EOPNOTSUPP));
+  }
+  // These will fail on linux as well since we don't have CAP_MKNOD.
+  ASSERT_THAT(mknod(path.c_str(), S_IFCHR, 0), SyscallFailsWithErrno(EPERM));
+  ASSERT_THAT(mknod(path.c_str(), S_IFBLK, 0), SyscallFailsWithErrno(EPERM));
+}
+
+TEST(MknodTest, Fifo) {
+  const std::string fifo = NewTempAbsPath();
+  ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0),
+              SyscallSucceeds());
+
+  struct stat st;
+  ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
+  EXPECT_TRUE(S_ISFIFO(st.st_mode));
+
+  std::string msg = "some std::string";
+  std::vector<char> buf(512);
+
+  // Read-end of the pipe.
+  ScopedThread t([&fifo, &buf, &msg]() {
+    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_RDONLY));
+    EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(msg.length()));
+    EXPECT_EQ(msg, std::string(buf.data()));
+  });
+
+  // Write-end of the pipe.
+  FileDescriptor wfd = ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_WRONLY));
+  EXPECT_THAT(WriteFd(wfd.get(), msg.c_str(), msg.length()),
+              SyscallSucceedsWithValue(msg.length()));
+}
+
+TEST(MknodTest, FifoOtrunc) {
+  const std::string fifo = NewTempAbsPath();
+  ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0),
+              SyscallSucceeds());
+
+  struct stat st = {};
+  ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
+  EXPECT_TRUE(S_ISFIFO(st.st_mode));
+
+  std::string msg = "some std::string";
+  std::vector<char> buf(512);
+  // Read-end of the pipe.
+  ScopedThread t([&fifo, &buf, &msg]() {
+    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_RDONLY));
+    EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(msg.length()));
+    EXPECT_EQ(msg, std::string(buf.data()));
+  });
+
+  // Write-end of the pipe.
+  FileDescriptor wfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_WRONLY | O_TRUNC));
+  EXPECT_THAT(WriteFd(wfd.get(), msg.c_str(), msg.length()),
+              SyscallSucceedsWithValue(msg.length()));
+}
+
+TEST(MknodTest, FifoTruncNoOp) {
+  const std::string fifo = NewTempAbsPath();
+  ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0),
+              SyscallSucceeds());
+
+  EXPECT_THAT(truncate(fifo.c_str(), 0), SyscallFailsWithErrno(EINVAL));
+
+  struct stat st = {};
+  ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
+  EXPECT_TRUE(S_ISFIFO(st.st_mode));
+
+  std::string msg = "some std::string";
+  std::vector<char> buf(512);
+  // Read-end of the pipe.
+  ScopedThread t([&fifo, &buf, &msg]() {
+    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_RDONLY));
+    EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(msg.length()));
+    EXPECT_EQ(msg, std::string(buf.data()));
+  });
+
+  FileDescriptor wfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(fifo.c_str(), O_WRONLY | O_TRUNC));
+  EXPECT_THAT(ftruncate(wfd.get(), 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(WriteFd(wfd.get(), msg.c_str(), msg.length()),
+              SyscallSucceedsWithValue(msg.length()));
+  EXPECT_THAT(ftruncate(wfd.get(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/mlock.cc b/test/syscalls/linux/mlock.cc
new file mode 100644
index 000000000..78ac96bed
--- /dev/null
+++ b/test/syscalls/linux/mlock.cc
@@ -0,0 +1,332 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <cstring>
+
+#include "gmock/gmock.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/rlimit_util.h"
+#include "test/util/test_util.h"
+
+using ::testing::_;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<bool> CanMlock() {
+  struct rlimit rlim;
+  if (getrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
+    return PosixError(errno, "getrlimit(RLIMIT_MEMLOCK)");
+  }
+  if (rlim.rlim_cur != 0) {
+    return true;
+  }
+  return HaveCapability(CAP_IPC_LOCK);
+}
+
+// Returns true if the page containing addr is mlocked.
+bool IsPageMlocked(uintptr_t addr) {
+  // This relies on msync(MS_INVALIDATE) interacting correctly with mlocked
+  // pages, which is tested for by the MsyncInvalidate case below.
+  int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)),
+                       kPageSize, MS_ASYNC | MS_INVALIDATE);
+  if (rv == 0) {
+    return false;
+  }
+  // This uses TEST_PCHECK_MSG since it's used in subprocesses.
+  TEST_PCHECK_MSG(errno == EBUSY, "msync failed with unexpected errno");
+  return true;
+}
+
+TEST(MlockTest, Basic) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+}
+
+TEST(MlockTest, ProtNone) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE));
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(mlock(mapping.ptr(), mapping.len()),
+              SyscallFailsWithErrno(ENOMEM));
+  // ENOMEM is returned because mlock can't populate the page, but it's still
+  // considered locked.
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+}
+
+TEST(MlockTest, MadviseDontneed) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
+  EXPECT_THAT(madvise(mapping.ptr(), mapping.len(), MADV_DONTNEED),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(MlockTest, MsyncInvalidate) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
+  EXPECT_THAT(msync(mapping.ptr(), mapping.len(), MS_ASYNC | MS_INVALIDATE),
+              SyscallFailsWithErrno(EBUSY));
+  EXPECT_THAT(msync(mapping.ptr(), mapping.len(), MS_SYNC | MS_INVALIDATE),
+              SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(MlockTest, Fork) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+  EXPECT_THAT(
+      InForkedProcess([&] { TEST_CHECK(!IsPageMlocked(mapping.addr())); }),
+      IsPosixErrorOkAndHolds(0));
+}
+
+TEST(MlockTest, RlimitMemlockZero) {
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
+    ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
+  }
+  Cleanup reset_rlimit =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(mlock(mapping.ptr(), mapping.len()),
+              SyscallFailsWithErrno(EPERM));
+}
+
+TEST(MlockTest, RlimitMemlockInsufficient) {
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
+    ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
+  }
+  Cleanup reset_rlimit =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, kPageSize));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(mlock(mapping.ptr(), mapping.len()),
+              SyscallFailsWithErrno(ENOMEM));
+}
+
+TEST(MunlockTest, Basic) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+}
+
+TEST(MunlockTest, NotLocked) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+  EXPECT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+}
+
+// There is currently no test for mlockall(MCL_CURRENT) because the default
+// RLIMIT_MEMLOCK of 64 KB is insufficient to actually invoke
+// mlockall(MCL_CURRENT).
+
+TEST(MlockallTest, Future) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+
+  // Run this test in a separate (single-threaded) subprocess to ensure that a
+  // background thread doesn't try to mmap a large amount of memory, fail due
+  // to hitting RLIMIT_MEMLOCK, and explode the process violently.
+  auto const do_test = [] {
+    auto const mapping =
+        MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE).ValueOrDie();
+    TEST_CHECK(!IsPageMlocked(mapping.addr()));
+    TEST_PCHECK(mlockall(MCL_FUTURE) == 0);
+    // Ensure that mlockall(MCL_FUTURE) is turned off before the end of the
+    // test, as otherwise mmaps may fail unexpectedly.
+    Cleanup do_munlockall([] { TEST_PCHECK(munlockall() == 0); });
+    auto const mapping2 =
+        MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE).ValueOrDie();
+    TEST_CHECK(IsPageMlocked(mapping2.addr()));
+    // Fire munlockall() and check that it disables mlockall(MCL_FUTURE).
+    do_munlockall.Release()();
+    auto const mapping3 =
+        MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE).ValueOrDie();
+    TEST_CHECK(!IsPageMlocked(mapping2.addr()));
+  };
+  EXPECT_THAT(InForkedProcess(do_test), IsPosixErrorOkAndHolds(0));
+}
+
+TEST(MunlockallTest, Basic) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(munlockall(), SyscallSucceeds());
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+}
+
+#ifndef SYS_mlock2
+#if defined(__x86_64__)
+#define SYS_mlock2 325
+#elif defined(__aarch64__)
+#define SYS_mlock2 284
+#endif
+#endif
+
+#ifndef MLOCK_ONFAULT
+#define MLOCK_ONFAULT 0x01  // Linux: include/uapi/asm-generic/mman-common.h
+#endif
+
+#ifdef SYS_mlock2
+
+int mlock2(void const* addr, size_t len, int flags) {
+  return syscall(SYS_mlock2, addr, len, flags);
+}
+
+TEST(Mlock2Test, NoFlags) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(mlock2(mapping.ptr(), mapping.len(), 0), SyscallSucceeds());
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+}
+
+TEST(Mlock2Test, MlockOnfault) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+  ASSERT_THAT(mlock2(mapping.ptr(), mapping.len(), MLOCK_ONFAULT),
+              SyscallSucceeds());
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+}
+
+TEST(Mlock2Test, UnknownFlags) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+  EXPECT_THAT(mlock2(mapping.ptr(), mapping.len(), ~0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+#endif  // defined(SYS_mlock2)
+
+TEST(MapLockedTest, Basic) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+  EXPECT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
+  EXPECT_FALSE(IsPageMlocked(mapping.addr()));
+}
+
+TEST(MapLockedTest, RlimitMemlockZero) {
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
+    ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
+  }
+  Cleanup reset_rlimit =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0));
+  EXPECT_THAT(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED),
+      PosixErrorIs(EPERM, _));
+}
+
+TEST(MapLockedTest, RlimitMemlockInsufficient) {
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
+    ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
+  }
+  Cleanup reset_rlimit =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, kPageSize));
+  EXPECT_THAT(
+      MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED),
+      PosixErrorIs(EAGAIN, _));
+}
+
+TEST(MremapLockedTest, Basic) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+
+  void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(),
+                      MREMAP_MAYMOVE, nullptr);
+  if (addr == MAP_FAILED) {
+    FAIL() << "mremap failed: " << errno << " (" << strerror(errno) << ")";
+  }
+  mapping.release();
+  mapping.reset(addr, 2 * mapping.len());
+  EXPECT_TRUE(IsPageMlocked(reinterpret_cast<uintptr_t>(addr)));
+}
+
+TEST(MremapLockedTest, RlimitMemlockZero) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
+    ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
+  }
+  Cleanup reset_rlimit =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0));
+  void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(),
+                      MREMAP_MAYMOVE, nullptr);
+  EXPECT_TRUE(addr == MAP_FAILED && errno == EAGAIN)
+      << "addr = " << addr << ", errno = " << errno;
+}
+
+TEST(MremapLockedTest, RlimitMemlockInsufficient) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
+  auto mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
+  EXPECT_TRUE(IsPageMlocked(mapping.addr()));
+
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
+    ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
+  }
+  Cleanup reset_rlimit = ASSERT_NO_ERRNO_AND_VALUE(
+      ScopedSetSoftRlimit(RLIMIT_MEMLOCK, mapping.len()));
+  void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(),
+                      MREMAP_MAYMOVE, nullptr);
+  EXPECT_TRUE(addr == MAP_FAILED && errno == EAGAIN)
+      << "addr = " << addr << ", errno = " << errno;
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc
new file mode 100644
index 000000000..6d3227ab6
--- /dev/null
+++ b/test/syscalls/linux/mmap.cc
@@ -0,0 +1,1676 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/magic.h>
+#include <linux/unistd.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/statfs.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/str_split.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::Gt;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<int64_t> VirtualMemorySize() {
+  ASSIGN_OR_RETURN_ERRNO(auto contents, GetContents("/proc/self/statm"));
+  std::vector<std::string> parts = absl::StrSplit(contents, ' ');
+  if (parts.empty()) {
+    return PosixError(EINVAL, "Unable to parse /proc/self/statm");
+  }
+  ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi<int64_t>(parts[0]));
+  return pages * getpagesize();
+}
+
+class MMapTest : public ::testing::Test {
+ protected:
+  // Unmap mapping, if one was made.
+  void TearDown() override {
+    if (addr_) {
+      EXPECT_THAT(Unmap(), SyscallSucceeds());
+    }
+  }
+
+  // Remembers mapping, so it can be automatically unmapped.
+  uintptr_t Map(uintptr_t addr, size_t length, int prot, int flags, int fd,
+                off_t offset) {
+    void* ret =
+        mmap(reinterpret_cast<void*>(addr), length, prot, flags, fd, offset);
+
+    if (ret != MAP_FAILED) {
+      addr_ = ret;
+      length_ = length;
+    }
+
+    return reinterpret_cast<uintptr_t>(ret);
+  }
+
+  // Unmap previous mapping
+  int Unmap() {
+    if (!addr_) {
+      return -1;
+    }
+
+    int ret = munmap(addr_, length_);
+
+    addr_ = nullptr;
+    length_ = 0;
+
+    return ret;
+  }
+
+  // Msync the mapping.
+  int Msync() { return msync(addr_, length_, MS_SYNC); }
+
+  // Mlock the mapping.
+  int Mlock() { return mlock(addr_, length_); }
+
+  // Munlock the mapping.
+  int Munlock() { return munlock(addr_, length_); }
+
+  int Protect(uintptr_t addr, size_t length, int prot) {
+    return mprotect(reinterpret_cast<void*>(addr), length, prot);
+  }
+
+  void* addr_ = nullptr;
+  size_t length_ = 0;
+};
+
+// Matches if arg contains the same contents as string str.
+MATCHER_P(EqualsMemory, str, "") {
+  if (0 == memcmp(arg, str.c_str(), str.size())) {
+    return true;
+  }
+
+  *result_listener << "Memory did not match. Got:\n"
+                   << absl::BytesToHexString(
+                          std::string(static_cast<char*>(arg), str.size()))
+                   << "Want:\n"
+                   << absl::BytesToHexString(str);
+  return false;
+}
+
+// We can't map pipes, but for different reasons.
+TEST_F(MMapTest, MapPipe) {
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[0], 0),
+              SyscallFailsWithErrno(ENODEV));
+  EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[1], 0),
+              SyscallFailsWithErrno(EACCES));
+  ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+  ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+// It's very common to mmap /dev/zero because anonymous mappings aren't part
+// of POSIX although they are widely supported. So a zero initialized memory
+// region would actually come from a "file backed" /dev/zero mapping.
+TEST_F(MMapTest, MapDevZeroShared) {
+  // This test will verify that we're able to map a page backed by /dev/zero
+  // as MAP_SHARED.
+  const FileDescriptor dev_zero =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+  // Test that we can create a RW SHARED mapping of /dev/zero.
+  ASSERT_THAT(
+      Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
+      SyscallSucceeds());
+}
+
+TEST_F(MMapTest, MapDevZeroPrivate) {
+  // This test will verify that we're able to map a page backed by /dev/zero
+  // as MAP_PRIVATE.
+  const FileDescriptor dev_zero =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+  // Test that we can create a RW SHARED mapping of /dev/zero.
+  ASSERT_THAT(
+      Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0),
+      SyscallSucceeds());
+}
+
+TEST_F(MMapTest, MapDevZeroNoPersistence) {
+  // This test will verify that two independent mappings of /dev/zero do not
+  // appear to reference the same "backed file."
+
+  const FileDescriptor dev_zero1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+  const FileDescriptor dev_zero2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+  ASSERT_THAT(
+      Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero1.get(), 0),
+      SyscallSucceeds());
+
+  // Create a second mapping via the second /dev/zero fd.
+  void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                        dev_zero2.get(), 0);
+  ASSERT_THAT(reinterpret_cast<intptr_t>(psec_map), SyscallSucceeds());
+
+  // Always unmap.
+  auto cleanup_psec_map = Cleanup(
+      [&] { EXPECT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); });
+
+  // Verify that we have independently addressed pages.
+  ASSERT_NE(psec_map, addr_);
+
+  std::string buf_zero(kPageSize, 0x00);
+  std::string buf_ones(kPageSize, 0xFF);
+
+  // Verify the first is actually all zeros after mmap.
+  EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+
+  // Let's fill in the first mapping with 0xFF.
+  memcpy(addr_, buf_ones.data(), kPageSize);
+
+  // Verify that the memcpy actually stuck in the page.
+  EXPECT_THAT(addr_, EqualsMemory(buf_ones));
+
+  // Verify that it didn't affect the second page which should be all zeros.
+  EXPECT_THAT(psec_map, EqualsMemory(buf_zero));
+}
+
+TEST_F(MMapTest, MapDevZeroSharedMultiplePages) {
+  // This will test that we're able to map /dev/zero over multiple pages.
+  const FileDescriptor dev_zero =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+  // Test that we can create a RW SHARED mapping of /dev/zero.
+  ASSERT_THAT(Map(0, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+                  dev_zero.get(), 0),
+              SyscallSucceeds());
+
+  std::string buf_zero(kPageSize * 2, 0x00);
+  std::string buf_ones(kPageSize * 2, 0xFF);
+
+  // Verify the two pages are actually all zeros after mmap.
+  EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+
+  // Fill out the pages with all ones.
+  memcpy(addr_, buf_ones.data(), kPageSize * 2);
+
+  // Verify that the memcpy actually stuck in the pages.
+  EXPECT_THAT(addr_, EqualsMemory(buf_ones));
+}
+
+TEST_F(MMapTest, MapDevZeroSharedFdNoPersistence) {
+  // This test will verify that two independent mappings of /dev/zero do not
+  // appear to reference the same "backed file" even when mapped from the
+  // same initial fd.
+  const FileDescriptor dev_zero =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+  ASSERT_THAT(
+      Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
+      SyscallSucceeds());
+
+  // Create a second mapping via the same fd.
+  void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                        dev_zero.get(), 0);
+  ASSERT_THAT(reinterpret_cast<int64_t>(psec_map), SyscallSucceeds());
+
+  // Always unmap.
+  auto cleanup_psec_map = Cleanup(
+      [&] { ASSERT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); });
+
+  // Verify that we have independently addressed pages.
+  ASSERT_NE(psec_map, addr_);
+
+  std::string buf_zero(kPageSize, 0x00);
+  std::string buf_ones(kPageSize, 0xFF);
+
+  // Verify the first is actually all zeros after mmap.
+  EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+
+  // Let's fill in the first mapping with 0xFF.
+  memcpy(addr_, buf_ones.data(), kPageSize);
+
+  // Verify that the memcpy actually stuck in the page.
+  EXPECT_THAT(addr_, EqualsMemory(buf_ones));
+
+  // Verify that it didn't affect the second page which should be all zeros.
+  EXPECT_THAT(psec_map, EqualsMemory(buf_zero));
+}
+
+TEST_F(MMapTest, MapDevZeroSegfaultAfterUnmap) {
+  SetupGvisorDeathTest();
+
+  // This test will verify that we're able to map a page backed by /dev/zero
+  // as MAP_SHARED and after it's unmapped any access results in a SIGSEGV.
+  // This test is redundant but given the special nature of /dev/zero mappings
+  // it doesn't hurt.
+  const FileDescriptor dev_zero =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+  const auto rest = [&] {
+    // Test that we can create a RW SHARED mapping of /dev/zero.
+    TEST_PCHECK(Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                    dev_zero.get(),
+                    0) != reinterpret_cast<uintptr_t>(MAP_FAILED));
+
+    // Confirm that accesses after the unmap result in a SIGSEGV.
+    //
+    // N.B. We depend on this process being single-threaded to ensure there
+    // can't be another mmap to map addr before the dereference below.
+    void* addr_saved = addr_;  // Unmap resets addr_.
+    TEST_PCHECK(Unmap() == 0);
+    *reinterpret_cast<volatile int*>(addr_saved) = 0xFF;
+  };
+
+  EXPECT_THAT(InForkedProcess(rest),
+              IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV)));
+}
+
+TEST_F(MMapTest, MapDevZeroUnaligned) {
+  const FileDescriptor dev_zero =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+  const size_t size = kPageSize + kPageSize / 2;
+  const std::string buf_zero(size, 0x00);
+
+  ASSERT_THAT(
+      Map(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
+      SyscallSucceeds());
+  EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+  ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+  ASSERT_THAT(
+      Map(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0),
+      SyscallSucceeds());
+  EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+}
+
+// We can't map _some_ character devices.
+TEST_F(MMapTest, MapCharDevice) {
+  const FileDescriptor cdevfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/random", 0, 0));
+  EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, cdevfd.get(), 0),
+              SyscallFailsWithErrno(ENODEV));
+}
+
+// We can't map directories.
+TEST_F(MMapTest, MapDirectory) {
+  const FileDescriptor dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), 0, 0));
+  EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, dirfd.get(), 0),
+              SyscallFailsWithErrno(ENODEV));
+}
+
+// We can map *something*
+TEST_F(MMapTest, MapAnything) {
+  EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceedsWithValue(Gt(0)));
+}
+
+// Map length < PageSize allowed
+TEST_F(MMapTest, SmallMap) {
+  EXPECT_THAT(Map(0, 128, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceeds());
+}
+
+// Hint address doesn't break anything.
+// Note: there is no requirement we actually get the hint address
+TEST_F(MMapTest, HintAddress) {
+  EXPECT_THAT(
+      Map(0x30000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+      SyscallSucceeds());
+}
+
+// MAP_FIXED gives us exactly the requested address
+TEST_F(MMapTest, MapFixed) {
+  EXPECT_THAT(Map(0x30000000, kPageSize, PROT_NONE,
+                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0),
+              SyscallSucceedsWithValue(0x30000000));
+}
+
+// 64-bit addresses work too
+#if defined(__x86_64__) || defined(__aarch64__)
+TEST_F(MMapTest, MapFixed64) {
+  EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE,
+                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0),
+              SyscallSucceedsWithValue(0x300000000000));
+}
+#endif
+
+// MAP_STACK allowed.
+// There isn't a good way to verify it did anything.
+TEST_F(MMapTest, MapStack) {
+  EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
+                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0),
+              SyscallSucceeds());
+}
+
+// MAP_LOCKED allowed.
+// There isn't a good way to verify it did anything.
+TEST_F(MMapTest, MapLocked) {
+  EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
+                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0),
+              SyscallSucceeds());
+}
+
+// MAP_PRIVATE or MAP_SHARED must be passed
+TEST_F(MMapTest, NotPrivateOrShared) {
+  EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_ANONYMOUS, -1, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Only one of MAP_PRIVATE or MAP_SHARED may be passed
+TEST_F(MMapTest, PrivateAndShared) {
+  EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
+                  MAP_PRIVATE | MAP_SHARED | MAP_ANONYMOUS, -1, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(MMapTest, FixedAlignment) {
+  // Addr must be page aligned (MAP_FIXED)
+  EXPECT_THAT(Map(0x30000001, kPageSize, PROT_NONE,
+                  MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Non-MAP_FIXED address does not need to be page aligned
+TEST_F(MMapTest, NonFixedAlignment) {
+  EXPECT_THAT(
+      Map(0x30000001, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+      SyscallSucceeds());
+}
+
+// Length = 0 results in EINVAL.
+TEST_F(MMapTest, InvalidLength) {
+  EXPECT_THAT(Map(0, 0, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Bad fd not allowed.
+TEST_F(MMapTest, BadFd) {
+  EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE, 999, 0),
+              SyscallFailsWithErrno(EBADF));
+}
+
+// Mappings are writable.
+TEST_F(MMapTest, ProtWrite) {
+  uint64_t addr;
+  constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
+
+  EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceeds());
+
+  // This shouldn't cause a SIGSEGV.
+  memset(reinterpret_cast<void*>(addr), 42, kPageSize);
+
+  // The written data should actually be there.
+  EXPECT_EQ(
+      0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
+}
+
+// "Write-only" mappings are writable *and* readable.
+TEST_F(MMapTest, ProtWriteOnly) {
+  uint64_t addr;
+  constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
+
+  EXPECT_THAT(
+      addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+      SyscallSucceeds());
+
+  // This shouldn't cause a SIGSEGV.
+  memset(reinterpret_cast<void*>(addr), 42, kPageSize);
+
+  // The written data should actually be there.
+  EXPECT_EQ(
+      0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
+}
+
+// "Write-only" mappings are readable.
+//
+// This is distinct from above to ensure the page is accessible even if the
+// initial fault is a write fault.
+TEST_F(MMapTest, ProtWriteOnlyReadable) {
+  uint64_t addr;
+  constexpr uint64_t kFirstWord = 0;
+
+  EXPECT_THAT(
+      addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+      SyscallSucceeds());
+
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), &kFirstWord,
+                      sizeof(kFirstWord)));
+}
+
+// Mappings are writable after mprotect from PROT_NONE to PROT_READ|PROT_WRITE.
+TEST_F(MMapTest, ProtectProtWrite) {
+  uint64_t addr;
+  constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
+
+  EXPECT_THAT(
+      addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+      SyscallSucceeds());
+
+  ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE),
+              SyscallSucceeds());
+
+  // This shouldn't cause a SIGSEGV.
+  memset(reinterpret_cast<void*>(addr), 42, kPageSize);
+
+  // The written data should actually be there.
+  EXPECT_EQ(
+      0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
+}
+
+// SIGSEGV raised when reading PROT_NONE memory
+TEST_F(MMapTest, ProtNoneDeath) {
+  SetupGvisorDeathTest();
+
+  uintptr_t addr;
+
+  ASSERT_THAT(
+      addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+      SyscallSucceeds());
+
+  EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr),
+              ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+// SIGSEGV raised when writing PROT_READ only memory
+TEST_F(MMapTest, ReadOnlyDeath) {
+  SetupGvisorDeathTest();
+
+  uintptr_t addr;
+
+  ASSERT_THAT(
+      addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+      SyscallSucceeds());
+
+  EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr) = 42,
+              ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+// Writable mapping mprotect'd to read-only should not be writable.
+TEST_F(MMapTest, MprotectReadOnlyDeath) {
+  SetupGvisorDeathTest();
+
+  uintptr_t addr;
+
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceeds());
+
+  volatile int* val = reinterpret_cast<int*>(addr);
+
+  // Copy to ensure page is mapped in.
+  *val = 42;
+
+  ASSERT_THAT(Protect(addr, kPageSize, PROT_READ), SyscallSucceeds());
+
+  // Now it shouldn't be writable.
+  EXPECT_EXIT(*val = 0, ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+// Verify that calling mprotect an address that's not page aligned fails.
+TEST_F(MMapTest, MprotectNotPageAligned) {
+  uintptr_t addr;
+
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceeds());
+  ASSERT_THAT(Protect(addr + 1, kPageSize - 1, PROT_READ),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Verify that calling mprotect with an absurdly huge length fails.
+TEST_F(MMapTest, MprotectHugeLength) {
+  uintptr_t addr;
+
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceeds());
+  ASSERT_THAT(Protect(addr, static_cast<size_t>(-1), PROT_READ),
+              SyscallFailsWithErrno(ENOMEM));
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+// This code is equivalent in 32 and 64-bit mode
+const uint8_t machine_code[] = {
+    0xb8, 0x2a, 0x00, 0x00, 0x00,  // movl $42, %eax
+    0xc3,                          // retq
+};
+#elif defined(__aarch64__)
+const uint8_t machine_code[] = {
+    0x40, 0x05, 0x80, 0x52,  // mov w0, #42
+    0xc0, 0x03, 0x5f, 0xd6,  // ret
+};
+#endif
+
+// PROT_EXEC allows code execution
+TEST_F(MMapTest, ProtExec) {
+  uintptr_t addr;
+  uint32_t (*func)(void);
+
+  EXPECT_THAT(addr = Map(0, kPageSize, PROT_EXEC | PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceeds());
+
+  memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code));
+
+  func = reinterpret_cast<uint32_t (*)(void)>(addr);
+
+  EXPECT_EQ(42, func());
+}
+
+// No PROT_EXEC disallows code execution
+TEST_F(MMapTest, NoProtExecDeath) {
+  SetupGvisorDeathTest();
+
+  uintptr_t addr;
+  uint32_t (*func)(void);
+
+  EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceeds());
+
+  memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code));
+
+  func = reinterpret_cast<uint32_t (*)(void)>(addr);
+
+  EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+TEST_F(MMapTest, NoExceedLimitData) {
+  void* prevbrk;
+  void* target_brk;
+  struct rlimit setlim;
+
+  prevbrk = sbrk(0);
+  ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
+  target_brk = reinterpret_cast<char*>(prevbrk) + 1;
+
+  setlim.rlim_cur = RLIM_INFINITY;
+  setlim.rlim_max = RLIM_INFINITY;
+  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+  EXPECT_THAT(brk(target_brk), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(MMapTest, ExceedLimitData) {
+  // To unit test this more precisely, we'd need access to the mm's start_brk
+  // and end_brk, which we don't have direct access to :/
+  void* prevbrk;
+  void* target_brk;
+  struct rlimit setlim;
+
+  prevbrk = sbrk(0);
+  ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
+  target_brk = reinterpret_cast<char*>(prevbrk) + 8192;
+
+  setlim.rlim_cur = 0;
+  setlim.rlim_max = RLIM_INFINITY;
+  // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
+  // Reset RLIMIT_DATA during teardown step.
+  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+  EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
+  // Teardown step...
+  setlim.rlim_cur = RLIM_INFINITY;
+  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+}
+
+TEST_F(MMapTest, ExceedLimitDataPrlimit) {
+  // To unit test this more precisely, we'd need access to the mm's start_brk
+  // and end_brk, which we don't have direct access to :/
+  void* prevbrk;
+  void* target_brk;
+  struct rlimit setlim;
+
+  prevbrk = sbrk(0);
+  ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
+  target_brk = reinterpret_cast<char*>(prevbrk) + 8192;
+
+  setlim.rlim_cur = 0;
+  setlim.rlim_max = RLIM_INFINITY;
+  // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
+  // Reset RLIMIT_DATA during teardown step.
+  ASSERT_THAT(prlimit(0, RLIMIT_DATA, &setlim, nullptr), SyscallSucceeds());
+  EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
+  // Teardown step...
+  setlim.rlim_cur = RLIM_INFINITY;
+  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+}
+
+TEST_F(MMapTest, ExceedLimitDataPrlimitPID) {
+  // To unit test this more precisely, we'd need access to the mm's start_brk
+  // and end_brk, which we don't have direct access to :/
+  void* prevbrk;
+  void* target_brk;
+  struct rlimit setlim;
+
+  prevbrk = sbrk(0);
+  ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
+  target_brk = reinterpret_cast<char*>(prevbrk) + 8192;
+
+  setlim.rlim_cur = 0;
+  setlim.rlim_max = RLIM_INFINITY;
+  // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
+  // Reset RLIMIT_DATA during teardown step.
+  ASSERT_THAT(prlimit(syscall(__NR_gettid), RLIMIT_DATA, &setlim, nullptr),
+              SyscallSucceeds());
+  EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
+  // Teardown step...
+  setlim.rlim_cur = RLIM_INFINITY;
+  ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+}
+
+TEST_F(MMapTest, NoExceedLimitAS) {
+  constexpr uint64_t kAllocBytes = 200 << 20;
+  // Add some headroom to the AS limit in case of e.g. unexpected stack
+  // expansion.
+  constexpr uint64_t kExtraASBytes = kAllocBytes + (20 << 20);
+  static_assert(kAllocBytes < kExtraASBytes,
+                "test depends on allocation not exceeding AS limit");
+
+  auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize());
+  struct rlimit setlim;
+  setlim.rlim_cur = vss + kExtraASBytes;
+  setlim.rlim_max = RLIM_INFINITY;
+  ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds());
+  EXPECT_THAT(
+      Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+      SyscallSucceedsWithValue(Gt(0)));
+}
+
+TEST_F(MMapTest, ExceedLimitAS) {
+  constexpr uint64_t kAllocBytes = 200 << 20;
+  // Add some headroom to the AS limit in case of e.g. unexpected stack
+  // expansion.
+  constexpr uint64_t kExtraASBytes = 20 << 20;
+  static_assert(kAllocBytes > kExtraASBytes,
+                "test depends on allocation exceeding AS limit");
+
+  auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize());
+  struct rlimit setlim;
+  setlim.rlim_cur = vss + kExtraASBytes;
+  setlim.rlim_max = RLIM_INFINITY;
+  ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds());
+  EXPECT_THAT(
+      Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+      SyscallFailsWithErrno(ENOMEM));
+}
+
+// Tests that setting an anonymous mmap to PROT_NONE doesn't free the memory.
+TEST_F(MMapTest, SettingProtNoneDoesntFreeMemory) {
+  uintptr_t addr;
+  constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
+
+  EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceedsWithValue(Gt(0)));
+
+  memset(reinterpret_cast<void*>(addr), 42, kPageSize);
+
+  ASSERT_THAT(Protect(addr, kPageSize, PROT_NONE), SyscallSucceeds());
+  ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE),
+              SyscallSucceeds());
+
+  // The written data should still be there.
+  EXPECT_EQ(
+      0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
+}
+
+constexpr char kFileContents[] = "Hello World!";
+
+class MMapFileTest : public MMapTest {
+ protected:
+  FileDescriptor fd_;
+  std::string filename_;
+
+  // Open a file for read/write
+  void SetUp() override {
+    MMapTest::SetUp();
+
+    filename_ = NewTempAbsPath();
+    fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_CREAT | O_RDWR, 0644));
+
+    // Extend file so it can be written once mapped. Deliberately make the file
+    // only half a page in size, so we can test what happens when we access the
+    // second half.
+    // Use ftruncate(2) once the sentry supports it.
+    char zero = 0;
+    size_t count = 0;
+    do {
+      const DisableSave ds;  // saving 2048 times is slow and useless.
+      Write(&zero, 1), SyscallSucceedsWithValue(1);
+    } while (++count < (kPageSize / 2));
+    ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+  }
+
+  // Close and delete file
+  void TearDown() override {
+    MMapTest::TearDown();
+    fd_.reset();  // Make sure the files is closed before we unlink it.
+    ASSERT_THAT(unlink(filename_.c_str()), SyscallSucceeds());
+  }
+
+  ssize_t Read(char* buf, size_t count) {
+    ssize_t len = 0;
+    do {
+      ssize_t ret = read(fd_.get(), buf, count);
+      if (ret < 0) {
+        return ret;
+      } else if (ret == 0) {
+        return len;
+      }
+
+      len += ret;
+      buf += ret;
+    } while (len < static_cast<ssize_t>(count));
+
+    return len;
+  }
+
+  ssize_t Write(const char* buf, size_t count) {
+    ssize_t len = 0;
+    do {
+      ssize_t ret = write(fd_.get(), buf, count);
+      if (ret < 0) {
+        return ret;
+      } else if (ret == 0) {
+        return len;
+      }
+
+      len += ret;
+      buf += ret;
+    } while (len < static_cast<ssize_t>(count));
+
+    return len;
+  }
+};
+
+class MMapFileParamTest
+    : public MMapFileTest,
+      public ::testing::WithParamInterface<std::tuple<int, int>> {
+ protected:
+  int prot() const { return std::get<0>(GetParam()); }
+
+  int flags() const { return std::get<1>(GetParam()); }
+};
+
+// MAP_POPULATE allowed.
+// There isn't a good way to verify it actually did anything.
+TEST_P(MMapFileParamTest, MapPopulate) {
+  ASSERT_THAT(Map(0, kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0),
+              SyscallSucceeds());
+}
+
+// MAP_POPULATE on a short file.
+TEST_P(MMapFileParamTest, MapPopulateShort) {
+  ASSERT_THAT(
+      Map(0, 2 * kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0),
+      SyscallSucceeds());
+}
+
+// Read contents from mapped file.
+TEST_F(MMapFileTest, Read) {
+  size_t len = strlen(kFileContents);
+  ASSERT_EQ(len, Write(kFileContents, len));
+
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), 0),
+              SyscallSucceeds());
+
+  EXPECT_THAT(reinterpret_cast<char*>(addr),
+              EqualsMemory(std::string(kFileContents)));
+}
+
+// Map at an offset.
+TEST_F(MMapFileTest, MapOffset) {
+  ASSERT_THAT(lseek(fd_.get(), kPageSize, SEEK_SET), SyscallSucceeds());
+
+  size_t len = strlen(kFileContents);
+  ASSERT_EQ(len, Write(kFileContents, len));
+
+  uintptr_t addr;
+  ASSERT_THAT(
+      addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), kPageSize),
+      SyscallSucceeds());
+
+  EXPECT_THAT(reinterpret_cast<char*>(addr),
+              EqualsMemory(std::string(kFileContents)));
+}
+
+TEST_F(MMapFileTest, MapOffsetBeyondEnd) {
+  SetupGvisorDeathTest();
+
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+                         fd_.get(), 10 * kPageSize),
+              SyscallSucceeds());
+
+  // Touching the memory causes SIGBUS.
+  size_t len = strlen(kFileContents);
+  EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
+                        reinterpret_cast<volatile char*>(addr)),
+              ::testing::KilledBySignal(SIGBUS), "");
+}
+
+// Verify mmap fails when sum of length and offset overflows.
+TEST_F(MMapFileTest, MapLengthPlusOffsetOverflows) {
+  const size_t length = static_cast<size_t>(-kPageSize);
+  const off_t offset = kPageSize;
+  ASSERT_THAT(Map(0, length, PROT_READ, MAP_PRIVATE, fd_.get(), offset),
+              SyscallFailsWithErrno(ENOMEM));
+}
+
+// MAP_PRIVATE PROT_WRITE is allowed on read-only FDs.
+TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY));
+
+  uintptr_t addr;
+  EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+                         fd.get(), 0),
+              SyscallSucceeds());
+
+  // Touch the page to ensure the kernel didn't lie about writability.
+  size_t len = strlen(kFileContents);
+  std::copy(kFileContents, kFileContents + len,
+            reinterpret_cast<volatile char*>(addr));
+}
+
+// MAP_SHARED PROT_WRITE not allowed on read-only FDs.
+TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY));
+
+  uintptr_t addr;
+  EXPECT_THAT(
+      addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0),
+      SyscallFailsWithErrno(EACCES));
+}
+
+// The FD must be readable.
+TEST_P(MMapFileParamTest, WriteOnlyFd) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY));
+
+  uintptr_t addr;
+  EXPECT_THAT(addr = Map(0, kPageSize, prot(), flags(), fd.get(), 0),
+              SyscallFailsWithErrno(EACCES));
+}
+
+// Overwriting the contents of a file mapped MAP_SHARED PROT_READ
+// should cause the new data to be reflected in the mapping.
+TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) {
+  // Start from scratch.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+  // Expand the file to two pages and dirty them.
+  std::string bufA(kPageSize, 'a');
+  ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
+              SyscallSucceedsWithValue(bufA.size()));
+  std::string bufB(kPageSize, 'b');
+  ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
+              SyscallSucceedsWithValue(bufB.size()));
+
+  // Map the page.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+              SyscallSucceeds());
+
+  // Check that the mapping contains the right file data.
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize));
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(),
+                      kPageSize));
+
+  // Start at the beginning of the file.
+  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+  // Swap the write pattern.
+  ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
+              SyscallSucceedsWithValue(bufB.size()));
+  ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
+              SyscallSucceedsWithValue(bufA.size()));
+
+  // Check that the mapping got updated.
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufB.c_str(), kPageSize));
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufA.c_str(),
+                      kPageSize));
+}
+
+// Partially overwriting a file mapped MAP_SHARED PROT_READ should be reflected
+// in the mapping.
+TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) {
+  // Start from scratch.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+  // Expand the file to two pages and dirty them.
+  std::string bufA(kPageSize, 'a');
+  ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
+              SyscallSucceedsWithValue(bufA.size()));
+  std::string bufB(kPageSize, 'b');
+  ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
+              SyscallSucceedsWithValue(bufB.size()));
+
+  // Map the page.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+              SyscallSucceeds());
+
+  // Check that the mapping contains the right file data.
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize));
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(),
+                      kPageSize));
+
+  // Start at the beginning of the file.
+  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+  // Do a partial overwrite, spanning both pages.
+  std::string bufC(kPageSize + (kPageSize / 2), 'c');
+  ASSERT_THAT(Write(bufC.c_str(), bufC.size()),
+              SyscallSucceedsWithValue(bufC.size()));
+
+  // Check that the mapping got updated.
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufC.c_str(),
+                      kPageSize + (kPageSize / 2)));
+  EXPECT_EQ(0,
+            memcmp(reinterpret_cast<void*>(addr + kPageSize + (kPageSize / 2)),
+                   bufB.c_str(), kPageSize / 2));
+}
+
+// Overwriting a file mapped MAP_SHARED PROT_READ should be reflected in the
+// mapping and the file.
+TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) {
+  // Start from scratch.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+  // Expand the file to two full pages and dirty it.
+  std::string bufA(2 * kPageSize, 'a');
+  ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
+              SyscallSucceedsWithValue(bufA.size()));
+
+  // Map only the first page.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+              SyscallSucceeds());
+
+  // Prepare to overwrite the file contents.
+  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+  // Overwrite everything, beyond the mapped portion.
+  std::string bufB(2 * kPageSize, 'b');
+  ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
+              SyscallSucceedsWithValue(bufB.size()));
+
+  // What the mapped portion should now look like.
+  std::string bufMapped(kPageSize, 'b');
+
+  // Expect that the mapped portion is consistent.
+  EXPECT_EQ(
+      0, memcmp(reinterpret_cast<void*>(addr), bufMapped.c_str(), kPageSize));
+
+  // Prepare to read the entire file contents.
+  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+  // Expect that the file was fully updated.
+  std::vector<char> bufFile(2 * kPageSize);
+  ASSERT_THAT(Read(bufFile.data(), bufFile.size()),
+              SyscallSucceedsWithValue(bufFile.size()));
+  // Cast to void* to avoid EXPECT_THAT assuming bufFile.data() is a
+  // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
+  // std::string, possibly overruning the buffer.
+  EXPECT_THAT(reinterpret_cast<void*>(bufFile.data()), EqualsMemory(bufB));
+}
+
+// Write data to mapped file.
+TEST_F(MMapFileTest, WriteShared) {
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  size_t len = strlen(kFileContents);
+  memcpy(reinterpret_cast<void*>(addr), kFileContents, len);
+
+  // The file may not actually be updated until munmap is called.
+  ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+  std::vector<char> buf(len);
+  ASSERT_THAT(Read(buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
+  // string, possibly overruning the buffer.
+  EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
+              EqualsMemory(std::string(kFileContents)));
+}
+
+// Write data to portion of mapped page beyond the end of the file.
+// These writes are not reflected in the file.
+TEST_F(MMapFileTest, WriteSharedBeyondEnd) {
+  // The file is only half of a page. We map an entire page. Writes to the
+  // end of the mapping must not be reflected in the file.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  // First half; this is reflected in the file.
+  std::string first(kPageSize / 2, 'A');
+  memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());
+
+  // Second half; this is not reflected in the file.
+  std::string second(kPageSize / 2, 'B');
+  memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(),
+         second.size());
+
+  // The file may not actually be updated until munmap is called.
+  ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+  // Big enough to fit the entire page, if the writes are mistakenly written to
+  // the file.
+  std::vector<char> buf(kPageSize);
+
+  // Only the first half is in the file.
+  ASSERT_THAT(Read(buf.data(), buf.size()),
+              SyscallSucceedsWithValue(first.size()));
+  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
+  // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
+  // std::string, possibly overruning the buffer.
+  EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
+}
+
+// The portion of a mapped page that becomes part of the file after a truncate
+// is reflected in the file.
+TEST_F(MMapFileTest, WriteSharedTruncateUp) {
+  // The file is only half of a page. We map an entire page. Writes to the
+  // end of the mapping must not be reflected in the file.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  // First half; this is reflected in the file.
+  std::string first(kPageSize / 2, 'A');
+  memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());
+
+  // Second half; this is not reflected in the file now (see
+  // WriteSharedBeyondEnd), but will be after the truncate.
+  std::string second(kPageSize / 2, 'B');
+  memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(),
+         second.size());
+
+  // Extend the file to a full page. The second half of the page will be
+  // reflected in the file.
+  EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
+
+  // The file may not actually be updated until munmap is called.
+  ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+  // The whole page is in the file.
+  std::vector<char> buf(kPageSize);
+  ASSERT_THAT(Read(buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
+  // string, possibly overruning the buffer.
+  EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
+  EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2),
+              EqualsMemory(second));
+}
+
+TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) {
+  // Start from scratch.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+  // Expand the file to a full page and dirty it.
+  std::string buf(kPageSize, 'a');
+  ASSERT_THAT(Write(buf.c_str(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  // Map the page.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+              SyscallSucceeds());
+
+  // Check that the memory contains the file data.
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize));
+
+  // Truncate down, then up.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+  EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
+
+  // Check that the memory was zeroed.
+  std::string zeroed(kPageSize, '\0');
+  EXPECT_EQ(0,
+            memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize));
+
+  // The file may not actually be updated until msync is called.
+  ASSERT_THAT(Msync(), SyscallSucceeds());
+
+  // Prepare to read the entire file contents.
+  ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+  // Expect that the file is fully updated.
+  std::vector<char> bufFile(kPageSize);
+  ASSERT_THAT(Read(bufFile.data(), bufFile.size()),
+              SyscallSucceedsWithValue(bufFile.size()));
+  EXPECT_EQ(0, memcmp(bufFile.data(), zeroed.c_str(), kPageSize));
+}
+
+TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) {
+  // The file is only half of a page. We map an entire page. Writes to the
+  // end of the mapping must not be reflected in the file.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  // First half; this will be deleted by truncate(0).
+  std::string first(kPageSize / 2, 'A');
+  memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());
+
+  // Truncate down, then up.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+  EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
+
+  // The whole page is zeroed in memory.
+  std::string zeroed(kPageSize, '\0');
+  EXPECT_EQ(0,
+            memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize));
+
+  // The file may not actually be updated until munmap is called.
+  ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+  // The whole file is also zeroed.
+  std::vector<char> buf(kPageSize);
+  ASSERT_THAT(Read(buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
+  // string, possibly overruning the buffer.
+  EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed));
+}
+
+TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) {
+  SetupGvisorDeathTest();
+
+  // Start from scratch.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+  // Expand the file to a full page and dirty it.
+  std::string buf(kPageSize, 'a');
+  ASSERT_THAT(Write(buf.c_str(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  // Map the page.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+              SyscallSucceeds());
+
+  // Check that the mapping contains the file data.
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize));
+
+  // Truncate down.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+  // Accessing the truncated region should cause a SIGBUS.
+  std::vector<char> in(kPageSize);
+  EXPECT_EXIT(
+      std::copy(reinterpret_cast<volatile char*>(addr),
+                reinterpret_cast<volatile char*>(addr) + kPageSize, in.data()),
+      ::testing::KilledBySignal(SIGBUS), "");
+}
+
+TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) {
+  SetupGvisorDeathTest();
+
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  // Touch the memory to be sure it really is mapped.
+  size_t len = strlen(kFileContents);
+  memcpy(reinterpret_cast<void*>(addr), kFileContents, len);
+
+  // Truncate down.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+  // Accessing the truncated file should cause a SIGBUS.
+  EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
+                        reinterpret_cast<volatile char*>(addr)),
+              ::testing::KilledBySignal(SIGBUS), "");
+}
+
+TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) {
+  // Start from scratch.
+  EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+  // Dirty the file.
+  std::string buf(kPageSize, 'a');
+  ASSERT_THAT(Write(buf.c_str(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  // Map a page.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+              SyscallSucceeds());
+
+  // Truncate to half of the page.
+  EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds());
+
+  // First half of the page untouched.
+  EXPECT_EQ(0,
+            memcmp(reinterpret_cast<void*>(addr), buf.data(), kPageSize / 2));
+
+  // Second half is zeroed.
+  std::string zeroed(kPageSize / 2, '\0');
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2),
+                      zeroed.c_str(), kPageSize / 2));
+}
+
+// Page can still be accessed and contents are intact after truncating a partial
+// page.
+TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) {
+  // Expand the file to a full page.
+  EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
+
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  // Fill the entire page.
+  std::string contents(kPageSize, 'A');
+  memcpy(reinterpret_cast<void*>(addr), contents.c_str(), contents.size());
+
+  // Truncate half of the page.
+  EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds());
+
+  // First half of the page untouched.
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), contents.c_str(),
+                      kPageSize / 2));
+
+  // Second half zeroed.
+  std::string zeroed(kPageSize / 2, '\0');
+  EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2),
+                      zeroed.c_str(), kPageSize / 2));
+}
+
+// MAP_PRIVATE writes are not carried through to the underlying file.
+TEST_F(MMapFileTest, WritePrivate) {
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  size_t len = strlen(kFileContents);
+  memcpy(reinterpret_cast<void*>(addr), kFileContents, len);
+
+  // The file should not be updated, but if it mistakenly is, it may not be
+  // until after munmap is called.
+  ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+  std::vector<char> buf(len);
+  ASSERT_THAT(Read(buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+  // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+  // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
+  // string, possibly overruning the buffer.
+  EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
+              EqualsMemory(std::string(len, '\0')));
+}
+
+// SIGBUS raised when reading or writing past end of a mapped file.
+TEST_P(MMapFileParamTest, SigBusDeath) {
+  SetupGvisorDeathTest();
+
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0),
+              SyscallSucceeds());
+
+  auto* start = reinterpret_cast<volatile char*>(addr + kPageSize);
+
+  // MMapFileTest makes a file kPageSize/2 long. The entire first page should be
+  // accessible, but anything beyond it should not.
+  if (prot() & PROT_WRITE) {
+    // Write beyond first page.
+    size_t len = strlen(kFileContents);
+    EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, start),
+                ::testing::KilledBySignal(SIGBUS), "");
+  } else {
+    // Read beyond first page.
+    std::vector<char> in(kPageSize);
+    EXPECT_EXIT(std::copy(start, start + kPageSize, in.data()),
+                ::testing::KilledBySignal(SIGBUS), "");
+  }
+}
+
+// Tests that SIGBUS is not raised when reading or writing to a file-mapped
+// page before EOF, even if part of the mapping extends beyond EOF.
+//
+// See b/27877699.
+TEST_P(MMapFileParamTest, NoSigBusOnPagesBeforeEOF) {
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0),
+              SyscallSucceeds());
+
+  // The test passes if this survives.
+  auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1);
+  size_t len = strlen(kFileContents);
+  if (prot() & PROT_WRITE) {
+    std::copy(kFileContents, kFileContents + len, start);
+  } else {
+    std::vector<char> in(len);
+    std::copy(start, start + len, in.data());
+  }
+}
+
+// Tests that SIGBUS is not raised when reading or writing from a file-mapped
+// page containing EOF, *after* the EOF.
+TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) {
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0),
+              SyscallSucceeds());
+
+  // The test passes if this survives. (Technically addr+kPageSize/2 is already
+  // beyond EOF, but +1 to check for fencepost errors.)
+  auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1);
+  size_t len = strlen(kFileContents);
+  if (prot() & PROT_WRITE) {
+    std::copy(kFileContents, kFileContents + len, start);
+  } else {
+    std::vector<char> in(len);
+    std::copy(start, start + len, in.data());
+  }
+}
+
+// Tests that reading from writable shared file-mapped pages succeeds.
+//
+// On most platforms this is trivial, but when the file is mapped via the sentry
+// page cache (which does not yet support writing to shared mappings), a bug
+// caused reads to fail unnecessarily on such mappings. See b/28913513.
+TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) {
+  uintptr_t addr;
+  size_t len = strlen(kFileContents);
+
+  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  std::vector<char> buf(kPageSize);
+  // The test passes if this survives.
+  std::copy(reinterpret_cast<volatile char*>(addr),
+            reinterpret_cast<volatile char*>(addr) + len, buf.data());
+}
+
+// Tests that EFAULT is returned when invoking a syscall that requires the OS to
+// read past end of file (resulting in a fault in sentry context in the gVisor
+// case). See b/28913513.
+TEST_F(MMapFileTest, InternalSigBus) {
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  // This depends on the fact that gVisor implements pipes internally.
+  int pipefd[2];
+  ASSERT_THAT(pipe(pipefd), SyscallSucceeds());
+  EXPECT_THAT(
+      write(pipefd[1], reinterpret_cast<void*>(addr + kPageSize), kPageSize),
+      SyscallFailsWithErrno(EFAULT));
+
+  EXPECT_THAT(close(pipefd[0]), SyscallSucceeds());
+  EXPECT_THAT(close(pipefd[1]), SyscallSucceeds());
+}
+
+// Like InternalSigBus, but test the WriteZerosAt path by reading from
+// /dev/zero to a shared mapping (so that the SIGBUS isn't caught during
+// copy-on-write breaking).
+TEST_F(MMapFileTest, InternalSigBusZeroing) {
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+
+  const FileDescriptor dev_zero =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+  EXPECT_THAT(read(dev_zero.get(), reinterpret_cast<void*>(addr + kPageSize),
+                   kPageSize),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+// Checks that mmaps with a length of uint64_t(-PAGE_SIZE + 1) or greater do not
+// induce a sentry panic (due to "rounding up" to 0).
+TEST_F(MMapTest, HugeLength) {
+  EXPECT_THAT(Map(0, static_cast<uint64_t>(-kPageSize + 1), PROT_NONE,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallFailsWithErrno(ENOMEM));
+}
+
+// Tests for a specific gVisor MM caching bug.
+TEST_F(MMapTest, AccessCOWInvalidatesCachedSegments) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+  auto zero_fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+
+  // Get a two-page private mapping and fill it with 1s.
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+              SyscallSucceeds());
+  memset(addr_, 1, 2 * kPageSize);
+  MaybeSave();
+
+  // Fork to make the mapping copy-on-write.
+  pid_t const pid = fork();
+  if (pid == 0) {
+    // The child process waits for the parent to SIGKILL it.
+    while (true) {
+      pause();
+    }
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  auto cleanup_child = Cleanup([&] {
+    EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds());
+    int status;
+    EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  });
+
+  // Induce a read-only Access of the first page of the mapping, which will not
+  // cause a copy. The usermem.Segment should be cached.
+  ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Induce a writable Access of both pages of the mapping. This should
+  // invalidate the cached Segment.
+  ASSERT_THAT(PreadFd(zero_fd.get(), addr_, 2 * kPageSize, 0),
+              SyscallSucceedsWithValue(2 * kPageSize));
+
+  // Induce a read-only Access of the first page of the mapping again. It should
+  // read the 0s that were stored in the mapping by the read from /dev/zero. If
+  // the read failed to invalidate the cached Segment, it will instead read the
+  // 1s in the stale page.
+  ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0),
+              SyscallSucceedsWithValue(kPageSize));
+  std::vector<char> buf(kPageSize);
+  ASSERT_THAT(PreadFd(fd.get(), buf.data(), kPageSize, 0),
+              SyscallSucceedsWithValue(kPageSize));
+  for (size_t i = 0; i < kPageSize; i++) {
+    ASSERT_EQ(0, buf[i]) << "at offset " << i;
+  }
+}
+
+TEST_F(MMapTest, NoReserve) {
+  const size_t kSize = 10 * 1 << 20;  // 10M
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kSize, PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0),
+              SyscallSucceeds());
+  EXPECT_GT(addr, 0);
+
+  // Check that every page can be read/written. Technically, writing to memory
+  // could SIGSEGV in case there is no more memory available. In gVisor it
+  // would never happen though because NORESERVE is ignored. In Linux, it's
+  // possible to fail, but allocation is small enough that it's highly likely
+  // to succeed.
+  for (size_t j = 0; j < kSize; j += kPageSize) {
+    EXPECT_EQ(0, reinterpret_cast<char*>(addr)[j]);
+    reinterpret_cast<char*>(addr)[j] = j;
+  }
+}
+
+// Map more than the gVisor page-cache map unit (64k) and ensure that
+// it is consistent with reading from the file.
+TEST_F(MMapFileTest, Bug38498194) {
+  // Choose a sufficiently large map unit.
+  constexpr int kSize = 4 * 1024 * 1024;
+  EXPECT_THAT(ftruncate(fd_.get(), kSize), SyscallSucceeds());
+
+  // Map a large enough region so that multiple internal segments
+  // are created to back the mapping.
+  uintptr_t addr;
+  ASSERT_THAT(
+      addr = Map(0, kSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0),
+      SyscallSucceeds());
+
+  std::vector<char> expect(kSize, 'a');
+  std::copy(expect.data(), expect.data() + expect.size(),
+            reinterpret_cast<volatile char*>(addr));
+
+  // Trigger writeback for gVisor. In Linux pages stay cached until
+  // it can't hold onto them anymore.
+  ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+  std::vector<char> buf(kSize);
+  ASSERT_THAT(Read(buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+  EXPECT_EQ(buf, expect) << std::string(buf.data(), buf.size());
+}
+
+// Tests that reading from a file to a memory mapping of the same file does not
+// deadlock. See b/34813270.
+TEST_F(MMapFileTest, SelfRead) {
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+                         fd_.get(), 0),
+              SyscallSucceeds());
+  EXPECT_THAT(Read(reinterpret_cast<char*>(addr), kPageSize / 2),
+              SyscallSucceedsWithValue(kPageSize / 2));
+  // The resulting file contents are poorly-specified and irrelevant.
+}
+
+// Tests that writing to a file from a memory mapping of the same file does not
+// deadlock. Regression test for b/34813270.
+TEST_F(MMapFileTest, SelfWrite) {
+  uintptr_t addr;
+  ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+              SyscallSucceeds());
+  EXPECT_THAT(Write(reinterpret_cast<char*>(addr), kPageSize / 2),
+              SyscallSucceedsWithValue(kPageSize / 2));
+  // The resulting file contents are poorly-specified and irrelevant.
+}
+
+TEST(MMapDeathTest, TruncateAfterCOWBreak) {
+  SetupGvisorDeathTest();
+
+  // Create and map a single-page file.
+  auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDWR));
+  ASSERT_THAT(ftruncate(fd.get(), kPageSize), SyscallSucceeds());
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0));
+
+  // Write to this mapping, causing the page to be copied for write.
+  memset(mapping.ptr(), 'a', mapping.len());
+  MaybeSave();  // Trigger a co-operative save cycle.
+
+  // Truncate the file and expect it to invalidate the copied page.
+  ASSERT_THAT(ftruncate(fd.get(), 0), SyscallSucceeds());
+  EXPECT_EXIT(*reinterpret_cast<volatile char*>(mapping.ptr()),
+              ::testing::KilledBySignal(SIGBUS), "");
+}
+
+// Regression test for #147.
+TEST(MMapNoFixtureTest, MapReadOnlyAfterCreateWriteOnly) {
+  std::string filename = NewTempAbsPath();
+
+  // We have to create the file O_RDONLY to reproduce the bug because
+  // fsgofer.localFile.Create() silently upgrades O_WRONLY to O_RDWR, causing
+  // the cached "write-only" FD to be read/write and therefore usable by mmap().
+  auto const ro_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(filename, O_RDONLY | O_CREAT | O_EXCL, 0666));
+
+  // Get a write-only FD for the same file, which should be ignored by mmap()
+  // (but isn't in #147).
+  auto const wo_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_WRONLY));
+  ASSERT_THAT(ftruncate(wo_fd.get(), kPageSize), SyscallSucceeds());
+
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      Mmap(nullptr, kPageSize, PROT_READ, MAP_SHARED, ro_fd.get(), 0));
+  std::vector<char> buf(kPageSize);
+  // The test passes if this survives.
+  std::copy(static_cast<char*>(mapping.ptr()),
+            static_cast<char*>(mapping.endptr()), buf.data());
+}
+
+// Conditional on MAP_32BIT.
+// This flag is supported only on x86-64, for 64-bit programs.
+#ifdef __x86_64__
+
+TEST(MMapNoFixtureTest, Map32Bit) {
+  auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE | MAP_32BIT));
+  EXPECT_LT(mapping.addr(), static_cast<uintptr_t>(1) << 32);
+  EXPECT_LE(mapping.endaddr(), static_cast<uintptr_t>(1) << 32);
+}
+
+#endif  // defined(__x86_64__)
+
+INSTANTIATE_TEST_SUITE_P(
+    ReadWriteSharedPrivate, MMapFileParamTest,
+    ::testing::Combine(::testing::ValuesIn({
+                           PROT_READ,
+                           PROT_WRITE,
+                           PROT_READ | PROT_WRITE,
+                       }),
+                       ::testing::ValuesIn({MAP_SHARED, MAP_PRIVATE})));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc
new file mode 100644
index 000000000..a3e9745cf
--- /dev/null
+++ b/test/syscalls/linux/mount.cc
@@ -0,0 +1,327 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/time.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/mount_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(MountTest, MountBadFilesystem) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  // Linux expects a valid target before it checks the file system name.
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(mount("", dir.path().c_str(), "foobar", 0, ""),
+              SyscallFailsWithErrno(ENODEV));
+}
+
+TEST(MountTest, MountInvalidTarget) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = NewTempAbsPath();
+  EXPECT_THAT(mount("", dir.c_str(), "tmpfs", 0, ""),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(MountTest, MountPermDenied) {
+  // Clear CAP_SYS_ADMIN.
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) {
+    EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false));
+  }
+
+  // Linux expects a valid target before checking capability.
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(mount("", dir.path().c_str(), "", 0, ""),
+              SyscallFailsWithErrno(EPERM));
+}
+
+TEST(MountTest, UmountPermDenied) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const mount =
+      ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0));
+
+  // Drop privileges in another thread, so we can still unmount the mounted
+  // directory.
+  ScopedThread([&]() {
+    EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false));
+    EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EPERM));
+  });
+}
+
+TEST(MountTest, MountOverBusy) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777));
+
+  // Should be able to mount over a busy directory.
+  ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0));
+}
+
+TEST(MountTest, OpenFileBusy) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+      Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0));
+  auto const fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777));
+
+  // An open file should prevent unmounting.
+  EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(MountTest, UmountDetach) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  // structure:
+  //
+  // dir (mount point)
+  //   subdir
+  //   file
+  //
+  // We show that we can walk around in the mount after detach-unmount dir.
+  //
+  // We show that even though dir is unreachable from outside the mount, we can
+  // still reach dir's (former) parent!
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+  auto mount =
+      ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "mode=0700",
+                                      /* umountflags= */ MNT_DETACH));
+  const struct stat after = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+  EXPECT_NE(before.st_ino, after.st_ino);
+
+  // Create files in the new mount.
+  constexpr char kContents[] = "no no no";
+  auto const subdir =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+  auto const file = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(dir.path(), kContents, 0777));
+
+  auto const dir_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(subdir.path(), O_RDONLY | O_DIRECTORY));
+  auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  // Unmount the tmpfs.
+  mount.Release()();
+
+  const struct stat after2 = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+  EXPECT_EQ(before.st_ino, after2.st_ino);
+
+  // Can still read file after unmounting.
+  std::vector<char> buf(sizeof(kContents));
+  EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()), SyscallSucceeds());
+
+  // Walk to dir.
+  auto const mounted_dir = ASSERT_NO_ERRNO_AND_VALUE(
+      OpenAt(dir_fd.get(), "..", O_DIRECTORY | O_RDONLY));
+  // Walk to dir/file.
+  auto const fd_again = ASSERT_NO_ERRNO_AND_VALUE(
+      OpenAt(mounted_dir.get(), std::string(Basename(file.path())), O_RDONLY));
+
+  std::vector<char> buf2(sizeof(kContents));
+  EXPECT_THAT(ReadFd(fd_again.get(), buf2.data(), buf2.size()),
+              SyscallSucceeds());
+  EXPECT_EQ(buf, buf2);
+
+  // Walking outside the unmounted realm should still work, too!
+  auto const dir_parent = ASSERT_NO_ERRNO_AND_VALUE(
+      OpenAt(mounted_dir.get(), "..", O_DIRECTORY | O_RDONLY));
+}
+
+TEST(MountTest, ActiveSubmountBusy) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const mount1 = ASSERT_NO_ERRNO_AND_VALUE(
+      Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0));
+
+  auto const dir2 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+  auto const mount2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir2.path(), "tmpfs", 0, "", 0));
+
+  // Since dir now has an active submount, should not be able to unmount.
+  EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(MountTest, MountTmpfs) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  // NOTE(b/129868551): Inode IDs are only stable across S/R if we have an open
+  // FD for that inode. Since we are going to compare inode IDs below, get a
+  // FileDescriptor for this directory here, which will be closed automatically
+  // at the end of the test.
+  auto const fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY, O_RDONLY));
+
+  const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+
+  {
+    auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+        Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0));
+
+    const struct stat s = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+    EXPECT_EQ(s.st_mode, S_IFDIR | 0700);
+    EXPECT_NE(s.st_ino, before.st_ino);
+
+    EXPECT_NO_ERRNO(Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777));
+  }
+
+  // Now that dir is unmounted again, we should have the old inode back.
+  const struct stat after = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+  EXPECT_EQ(before.st_ino, after.st_ino);
+}
+
+TEST(MountTest, MountTmpfsMagicValIgnored) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+      Mount("", dir.path(), "tmpfs", MS_MGC_VAL, "mode=0700", 0));
+}
+
+// Passing nullptr to data is equivalent to "".
+TEST(MountTest, NullData) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  EXPECT_THAT(mount("", dir.path().c_str(), "tmpfs", 0, nullptr),
+              SyscallSucceeds());
+  EXPECT_THAT(umount2(dir.path().c_str(), 0), SyscallSucceeds());
+}
+
+TEST(MountTest, MountReadonly) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+      Mount("", dir.path(), "tmpfs", MS_RDONLY, "mode=0777", 0));
+
+  const struct stat s = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+  EXPECT_EQ(s.st_mode, S_IFDIR | 0777);
+
+  std::string const filename = JoinPath(dir.path(), "foo");
+  EXPECT_THAT(open(filename.c_str(), O_RDWR | O_CREAT, 0777),
+              SyscallFailsWithErrno(EROFS));
+}
+
+PosixErrorOr<absl::Time> ATime(absl::string_view file) {
+  struct stat s = {};
+  if (stat(std::string(file).c_str(), &s) == -1) {
+    return PosixError(errno, "stat failed");
+  }
+  return absl::TimeFromTimespec(s.st_atim);
+}
+
+TEST(MountTest, MountNoAtime) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+      Mount("", dir.path(), "tmpfs", MS_NOATIME, "mode=0777", 0));
+
+  std::string const contents = "No no no, don't follow the instructions!";
+  auto const file = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(dir.path(), contents, 0777));
+
+  absl::Time const before = ASSERT_NO_ERRNO_AND_VALUE(ATime(file.path()));
+
+  // Reading from the file should change the atime, but the MS_NOATIME flag
+  // should prevent that.
+  auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+  char buf[100];
+  int read_n;
+  ASSERT_THAT(read_n = read(fd.get(), buf, sizeof(buf)), SyscallSucceeds());
+  EXPECT_EQ(std::string(buf, read_n), contents);
+
+  absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(ATime(file.path()));
+
+  // Expect that atime hasn't changed.
+  EXPECT_EQ(before, after);
+}
+
+TEST(MountTest, MountNoExec) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+      Mount("", dir.path(), "tmpfs", MS_NOEXEC, "mode=0777", 0));
+
+  std::string const contents = "No no no, don't follow the instructions!";
+  auto const file = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(dir.path(), contents, 0777));
+
+  int execve_errno;
+  ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(file.path(), {}, {}, nullptr, &execve_errno));
+  EXPECT_EQ(execve_errno, EACCES);
+}
+
+TEST(MountTest, RenameRemoveMountPoint) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  auto const dir_parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const dir =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir_parent.path()));
+  auto const new_dir = NewTempAbsPath();
+
+  auto const mount =
+      ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0));
+
+  ASSERT_THAT(rename(dir.path().c_str(), new_dir.c_str()),
+              SyscallFailsWithErrno(EBUSY));
+
+  ASSERT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EBUSY));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/mremap.cc b/test/syscalls/linux/mremap.cc
new file mode 100644
index 000000000..f0e5f7d82
--- /dev/null
+++ b/test/syscalls/linux/mremap.cc
@@ -0,0 +1,492 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "absl/strings/string_view.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::_;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Fixture for mremap tests parameterized by mmap flags.
+using MremapParamTest = ::testing::TestWithParam<int>;
+
+TEST_P(MremapParamTest, Noop) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+
+  ASSERT_THAT(Mremap(m.ptr(), kPageSize, kPageSize, 0, nullptr),
+              IsPosixErrorOkAndHolds(m.ptr()));
+  EXPECT_TRUE(IsMapped(m.addr()));
+}
+
+TEST_P(MremapParamTest, InPlace_ShrinkingWholeVMA) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    // N.B. we must be in a single-threaded subprocess to ensure a
+    // background thread doesn't concurrently map the second page.
+    void* addr = mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, nullptr);
+    TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(addr == m.ptr());
+    MaybeSave();
+
+    TEST_CHECK(IsMapped(m.addr()));
+    TEST_CHECK(!IsMapped(m.addr() + kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, InPlace_ShrinkingPartialVMA) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    void* addr = mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, nullptr);
+    TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(addr == m.ptr());
+    MaybeSave();
+
+    TEST_CHECK(IsMapped(m.addr()));
+    TEST_CHECK(!IsMapped(m.addr() + kPageSize));
+    TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, InPlace_ShrinkingAcrossVMAs) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_READ, GetParam()));
+  // Changing permissions on the first page forces it to become a separate vma.
+  ASSERT_THAT(mprotect(m.ptr(), kPageSize, PROT_NONE), SyscallSucceeds());
+
+  const auto rest = [&] {
+    // Both old_size and new_size now span two vmas; mremap
+    // shouldn't care.
+    void* addr = mremap(m.ptr(), 3 * kPageSize, 2 * kPageSize, 0, nullptr);
+    TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(addr == m.ptr());
+    MaybeSave();
+
+    TEST_CHECK(IsMapped(m.addr()));
+    TEST_CHECK(IsMapped(m.addr() + kPageSize));
+    TEST_CHECK(!IsMapped(m.addr() + 2 * kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, InPlace_ExpansionSuccess) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    // Unmap the second page so that the first can be expanded back into it.
+    //
+    // N.B. we must be in a single-threaded subprocess to ensure a
+    // background thread doesn't concurrently map this page.
+    TEST_PCHECK(
+        munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0);
+    MaybeSave();
+
+    void* addr = mremap(m.ptr(), kPageSize, 2 * kPageSize, 0, nullptr);
+    TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(addr == m.ptr());
+    MaybeSave();
+
+    TEST_CHECK(IsMapped(m.addr()));
+    TEST_CHECK(IsMapped(m.addr() + kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, InPlace_ExpansionFailure) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    // Unmap the second page, leaving a one-page hole. Trying to expand the
+    // first page to three pages should fail since the original third page
+    // is still mapped.
+    TEST_PCHECK(
+        munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0);
+    MaybeSave();
+
+    void* addr = mremap(m.ptr(), kPageSize, 3 * kPageSize, 0, nullptr);
+    TEST_CHECK_MSG(addr == MAP_FAILED, "mremap unexpectedly succeeded");
+    TEST_PCHECK_MSG(errno == ENOMEM, "mremap failed with wrong errno");
+    MaybeSave();
+
+    TEST_CHECK(IsMapped(m.addr()));
+    TEST_CHECK(!IsMapped(m.addr() + kPageSize));
+    TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, MayMove_Expansion) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    // Unmap the second page, leaving a one-page hole. Trying to expand the
+    // first page to three pages with MREMAP_MAYMOVE should force the
+    // mapping to be relocated since the original third page is still
+    // mapped.
+    TEST_PCHECK(
+        munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0);
+    MaybeSave();
+
+    void* addr2 =
+        mremap(m.ptr(), kPageSize, 3 * kPageSize, MREMAP_MAYMOVE, nullptr);
+    TEST_PCHECK_MSG(addr2 != MAP_FAILED, "mremap failed");
+    MaybeSave();
+
+    const Mapping m2 = Mapping(addr2, 3 * kPageSize);
+    TEST_CHECK(m.addr() != m2.addr());
+
+    TEST_CHECK(!IsMapped(m.addr()));
+    TEST_CHECK(!IsMapped(m.addr() + kPageSize));
+    TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize));
+    TEST_CHECK(IsMapped(m2.addr()));
+    TEST_CHECK(IsMapped(m2.addr() + kPageSize));
+    TEST_CHECK(IsMapped(m2.addr() + 2 * kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_SourceAndDestinationCannotOverlap) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+
+  ASSERT_THAT(Mremap(m.ptr(), kPageSize, kPageSize,
+                     MREMAP_MAYMOVE | MREMAP_FIXED, m.ptr()),
+              PosixErrorIs(EINVAL, _));
+  EXPECT_TRUE(IsMapped(m.addr()));
+}
+
+TEST_P(MremapParamTest, Fixed_SameSize) {
+  Mapping const src =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+  Mapping const dst =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    // Unmap dst to create a hole.
+    TEST_PCHECK(munmap(dst.ptr(), kPageSize) == 0);
+    MaybeSave();
+
+    void* addr = mremap(src.ptr(), kPageSize, kPageSize,
+                        MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+    TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(addr == dst.ptr());
+    MaybeSave();
+
+    TEST_CHECK(!IsMapped(src.addr()));
+    TEST_CHECK(IsMapped(dst.addr()));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_SameSize_Unmapping) {
+  // Like the Fixed_SameSize case, but expect mremap to unmap the destination
+  // automatically.
+  Mapping const src =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+  Mapping const dst =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    void* addr = mremap(src.ptr(), kPageSize, kPageSize,
+                        MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+    TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(addr == dst.ptr());
+    MaybeSave();
+
+    TEST_CHECK(!IsMapped(src.addr()));
+    TEST_CHECK(IsMapped(dst.addr()));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_ShrinkingWholeVMA) {
+  Mapping const src =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+  Mapping const dst =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    // Unmap dst so we can check that mremap does not keep the
+    // second page.
+    TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0);
+    MaybeSave();
+
+    void* addr = mremap(src.ptr(), 2 * kPageSize, kPageSize,
+                        MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+    TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(addr == dst.ptr());
+    MaybeSave();
+
+    TEST_CHECK(!IsMapped(src.addr()));
+    TEST_CHECK(!IsMapped(src.addr() + kPageSize));
+    TEST_CHECK(IsMapped(dst.addr()));
+    TEST_CHECK(!IsMapped(dst.addr() + kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_ShrinkingPartialVMA) {
+  Mapping const src =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam()));
+  Mapping const dst =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    // Unmap dst so we can check that mremap does not keep the
+    // second page.
+    TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0);
+    MaybeSave();
+
+    void* addr = mremap(src.ptr(), 2 * kPageSize, kPageSize,
+                        MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+    TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(addr == dst.ptr());
+    MaybeSave();
+
+    TEST_CHECK(!IsMapped(src.addr()));
+    TEST_CHECK(!IsMapped(src.addr() + kPageSize));
+    TEST_CHECK(IsMapped(src.addr() + 2 * kPageSize));
+    TEST_CHECK(IsMapped(dst.addr()));
+    TEST_CHECK(!IsMapped(dst.addr() + kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_ShrinkingAcrossVMAs) {
+  Mapping const src =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_READ, GetParam()));
+  // Changing permissions on the first page forces it to become a separate vma.
+  ASSERT_THAT(mprotect(src.ptr(), kPageSize, PROT_NONE), SyscallSucceeds());
+  Mapping const dst =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    // Unlike flags=0, MREMAP_FIXED requires that [old_address,
+    // old_address+new_size) only spans a single vma.
+    void* addr = mremap(src.ptr(), 3 * kPageSize, 2 * kPageSize,
+                        MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+    TEST_CHECK_MSG(addr == MAP_FAILED, "mremap unexpectedly succeeded");
+    TEST_PCHECK_MSG(errno == EFAULT, "mremap failed with wrong errno");
+    MaybeSave();
+
+    TEST_CHECK(IsMapped(src.addr()));
+    TEST_CHECK(IsMapped(src.addr() + kPageSize));
+    // Despite failing, mremap should have unmapped [old_address+new_size,
+    // old_address+old_size) (i.e. the third page).
+    TEST_CHECK(!IsMapped(src.addr() + 2 * kPageSize));
+    // Despite failing, mremap should have unmapped the destination pages.
+    TEST_CHECK(!IsMapped(dst.addr()));
+    TEST_CHECK(!IsMapped(dst.addr() + kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_Expansion) {
+  Mapping const src =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+  Mapping const dst =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+  const auto rest = [&] {
+    // Unmap dst so we can check that mremap actually maps all pages
+    // at the destination.
+    TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0);
+    MaybeSave();
+
+    void* addr = mremap(src.ptr(), kPageSize, 2 * kPageSize,
+                        MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+    TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(addr == dst.ptr());
+    MaybeSave();
+
+    TEST_CHECK(!IsMapped(src.addr()));
+    TEST_CHECK(IsMapped(dst.addr()));
+    TEST_CHECK(IsMapped(dst.addr() + kPageSize));
+  };
+
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+INSTANTIATE_TEST_SUITE_P(PrivateShared, MremapParamTest,
+                         ::testing::Values(MAP_PRIVATE, MAP_SHARED));
+
+// mremap with old_size == 0 only works with MAP_SHARED after Linux 4.14
+// (dba58d3b8c50 "mm/mremap: fail map duplication attempts for private
+// mappings").
+
+TEST(MremapTest, InPlace_Copy) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED));
+  EXPECT_THAT(Mremap(m.ptr(), 0, kPageSize, 0, nullptr),
+              PosixErrorIs(ENOMEM, _));
+}
+
+TEST(MremapTest, MayMove_Copy) {
+  Mapping const m =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED));
+
+  // Remainder of this test executes in a subprocess to ensure that if mremap
+  // incorrectly removes m, it is not remapped by another thread.
+  const auto rest = [&] {
+    void* ptr = mremap(m.ptr(), 0, kPageSize, MREMAP_MAYMOVE, nullptr);
+    MaybeSave();
+    TEST_PCHECK_MSG(ptr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(ptr != m.ptr());
+    TEST_CHECK(IsMapped(m.addr()));
+    TEST_CHECK(IsMapped(reinterpret_cast<uintptr_t>(ptr)));
+  };
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST(MremapTest, MustMove_Copy) {
+  Mapping const src =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED));
+  Mapping const dst =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE));
+
+  // Remainder of this test executes in a subprocess to ensure that if mremap
+  // incorrectly removes src, it is not remapped by another thread.
+  const auto rest = [&] {
+    void* ptr = mremap(src.ptr(), 0, kPageSize, MREMAP_MAYMOVE | MREMAP_FIXED,
+                       dst.ptr());
+    MaybeSave();
+    TEST_PCHECK_MSG(ptr != MAP_FAILED, "mremap failed");
+    TEST_CHECK(ptr == dst.ptr());
+    TEST_CHECK(IsMapped(src.addr()));
+    TEST_CHECK(IsMapped(dst.addr()));
+  };
+  EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+void ExpectAllBytesAre(absl::string_view v, char c) {
+  for (size_t i = 0; i < v.size(); i++) {
+    ASSERT_EQ(v[i], c) << "at offset " << i;
+  }
+}
+
+TEST(MremapTest, ExpansionPreservesCOWPagesAndExposesNewFilePages) {
+  // Create a file with 3 pages. The first is filled with 'a', the second is
+  // filled with 'b', and the third is filled with 'c'.
+  TempPath const file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+  ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'a').c_str(), kPageSize),
+              SyscallSucceedsWithValue(kPageSize));
+  ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'b').c_str(), kPageSize),
+              SyscallSucceedsWithValue(kPageSize));
+  ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'c').c_str(), kPageSize),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Create a private mapping of the first 2 pages, and fill the second page
+  // with 'd'.
+  Mapping const src = ASSERT_NO_ERRNO_AND_VALUE(Mmap(nullptr, 2 * kPageSize,
+                                                     PROT_READ | PROT_WRITE,
+                                                     MAP_PRIVATE, fd.get(), 0));
+  memset(reinterpret_cast<void*>(src.addr() + kPageSize), 'd', kPageSize);
+  MaybeSave();
+
+  // Move the mapping while expanding it to 3 pages. The resulting mapping
+  // should contain the original first page of the file (filled with 'a'),
+  // followed by the private copy of the second page (filled with 'd'), followed
+  // by the newly-mapped third page of the file (filled with 'c').
+  Mapping const dst = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(3 * kPageSize, PROT_NONE, MAP_PRIVATE));
+  ASSERT_THAT(Mremap(src.ptr(), 2 * kPageSize, 3 * kPageSize,
+                     MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()),
+              IsPosixErrorOkAndHolds(dst.ptr()));
+  auto const v = dst.view();
+  ExpectAllBytesAre(v.substr(0, kPageSize), 'a');
+  ExpectAllBytesAre(v.substr(kPageSize, kPageSize), 'd');
+  ExpectAllBytesAre(v.substr(2 * kPageSize, kPageSize), 'c');
+}
+
+TEST(MremapDeathTest, SharedAnon) {
+  SetupGvisorDeathTest();
+
+  // Reserve 4 pages of address space.
+  Mapping const reserved = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(4 * kPageSize, PROT_NONE, MAP_PRIVATE));
+
+  // Create a 2-page shared anonymous mapping at the beginning of the
+  // reservation. Fill the first page with 'a' and the second with 'b'.
+  Mapping const m = ASSERT_NO_ERRNO_AND_VALUE(
+      Mmap(reserved.ptr(), 2 * kPageSize, PROT_READ | PROT_WRITE,
+           MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
+  memset(m.ptr(), 'a', kPageSize);
+  memset(reinterpret_cast<void*>(m.addr() + kPageSize), 'b', kPageSize);
+  MaybeSave();
+
+  // Shrink the mapping to 1 page in-place.
+  ASSERT_THAT(Mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, m.ptr()),
+              IsPosixErrorOkAndHolds(m.ptr()));
+
+  // Expand the mapping to 3 pages, moving it forward by 1 page in the process
+  // since the old and new mappings can't overlap.
+  void* const new_m = reinterpret_cast<void*>(m.addr() + kPageSize);
+  ASSERT_THAT(Mremap(m.ptr(), kPageSize, 3 * kPageSize,
+                     MREMAP_MAYMOVE | MREMAP_FIXED, new_m),
+              IsPosixErrorOkAndHolds(new_m));
+
+  // The first 2 pages of the mapping should still contain the data we wrote
+  // (i.e. shrinking should not have discarded the second page's data), while
+  // touching the third page should raise SIGBUS.
+  auto const v =
+      absl::string_view(static_cast<char const*>(new_m), 3 * kPageSize);
+  ExpectAllBytesAre(v.substr(0, kPageSize), 'a');
+  ExpectAllBytesAre(v.substr(kPageSize, kPageSize), 'b');
+  EXPECT_EXIT(ExpectAllBytesAre(v.substr(2 * kPageSize, kPageSize), '\0'),
+              ::testing::KilledBySignal(SIGBUS), "");
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/msync.cc b/test/syscalls/linux/msync.cc
new file mode 100644
index 000000000..2b2b6aef9
--- /dev/null
+++ b/test/syscalls/linux/msync.cc
@@ -0,0 +1,151 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "test/util/file_descriptor.h"
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Parameters for msync tests. Use a std::tuple so we can use
+// ::testing::Combine.
+using MsyncTestParam =
+    std::tuple<int,                                    // msync flags
+               std::function<PosixErrorOr<Mapping>()>  // returns mapping to
+                                                       // msync
+               >;
+
+class MsyncParameterizedTest : public ::testing::TestWithParam<MsyncTestParam> {
+ protected:
+  int msync_flags() const { return std::get<0>(GetParam()); }
+
+  PosixErrorOr<Mapping> GetMapping() const { return std::get<1>(GetParam())(); }
+};
+
+// All valid msync(2) flag combinations, not including MS_INVALIDATE. ("Linux
+// permits a call to msync() that specifies neither [MS_SYNC or MS_ASYNC], with
+// semantics that are (currently) equivalent to specifying MS_ASYNC." -
+// msync(2))
+constexpr std::initializer_list<int> kMsyncFlags = {MS_SYNC, MS_ASYNC, 0};
+
+// Returns functions that return mappings that should be successfully
+// msync()able.
+std::vector<std::function<PosixErrorOr<Mapping>()>> SyncableMappings() {
+  std::vector<std::function<PosixErrorOr<Mapping>()>> funcs;
+  for (bool const writable : {false, true}) {
+    for (int const mflags : {MAP_PRIVATE, MAP_SHARED}) {
+      int const prot = PROT_READ | (writable ? PROT_WRITE : 0);
+      int const oflags = O_CREAT | (writable ? O_RDWR : O_RDONLY);
+      funcs.push_back([=] { return MmapAnon(kPageSize, prot, mflags); });
+      funcs.push_back([=]() -> PosixErrorOr<Mapping> {
+        std::string const path = NewTempAbsPath();
+        ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, oflags, 0644));
+        // Don't unlink the file since that breaks save/restore. Just let the
+        // test infrastructure clean up all of our temporary files when we're
+        // done.
+        return Mmap(nullptr, kPageSize, prot, mflags, fd.get(), 0);
+      });
+    }
+  }
+  return funcs;
+}
+
+PosixErrorOr<Mapping> NoMappings() {
+  return PosixError(EINVAL, "unexpected attempt to create a mapping");
+}
+
+// "Fixture" for msync tests that hold for all valid flags, but do not create
+// mappings.
+using MsyncNoMappingTest = MsyncParameterizedTest;
+
+TEST_P(MsyncNoMappingTest, UnmappedAddressWithZeroLengthSucceeds) {
+  EXPECT_THAT(msync(nullptr, 0, msync_flags()), SyscallSucceeds());
+}
+
+TEST_P(MsyncNoMappingTest, UnmappedAddressWithNonzeroLengthFails) {
+  EXPECT_THAT(msync(nullptr, kPageSize, msync_flags()),
+              SyscallFailsWithErrno(ENOMEM));
+}
+
+INSTANTIATE_TEST_SUITE_P(All, MsyncNoMappingTest,
+                         ::testing::Combine(::testing::ValuesIn(kMsyncFlags),
+                                            ::testing::Values(NoMappings)));
+
+// "Fixture" for msync tests that are not parameterized by msync flags, but do
+// create mappings.
+using MsyncNoFlagsTest = MsyncParameterizedTest;
+
+TEST_P(MsyncNoFlagsTest, BothSyncAndAsyncFails) {
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
+  EXPECT_THAT(msync(m.ptr(), m.len(), MS_SYNC | MS_ASYNC),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    All, MsyncNoFlagsTest,
+    ::testing::Combine(::testing::Values(0),  // ignored
+                       ::testing::ValuesIn(SyncableMappings())));
+
+// "Fixture" for msync tests parameterized by both msync flags and sources of
+// mappings.
+using MsyncFullParamTest = MsyncParameterizedTest;
+
+TEST_P(MsyncFullParamTest, NormallySucceeds) {
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
+  EXPECT_THAT(msync(m.ptr(), m.len(), msync_flags()), SyscallSucceeds());
+}
+
+TEST_P(MsyncFullParamTest, UnalignedLengthSucceeds) {
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
+  EXPECT_THAT(msync(m.ptr(), m.len() - 1, msync_flags()), SyscallSucceeds());
+}
+
+TEST_P(MsyncFullParamTest, UnalignedAddressFails) {
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
+  EXPECT_THAT(
+      msync(reinterpret_cast<void*>(m.addr() + 1), m.len() - 1, msync_flags()),
+      SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(MsyncFullParamTest, InvalidateUnlockedSucceeds) {
+  auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
+  EXPECT_THAT(msync(m.ptr(), m.len(), msync_flags() | MS_INVALIDATE),
+              SyscallSucceeds());
+}
+
+// The test for MS_INVALIDATE on mlocked pages is in mlock.cc since it requires
+// probing for mlock support.
+
+INSTANTIATE_TEST_SUITE_P(
+    All, MsyncFullParamTest,
+    ::testing::Combine(::testing::ValuesIn(kMsyncFlags),
+                       ::testing::ValuesIn(SyncableMappings())));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/munmap.cc b/test/syscalls/linux/munmap.cc
new file mode 100644
index 000000000..067241f4d
--- /dev/null
+++ b/test/syscalls/linux/munmap.cc
@@ -0,0 +1,53 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mman.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class MunmapTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    m_ = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE,
+              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    ASSERT_NE(MAP_FAILED, m_);
+  }
+
+  void* m_ = nullptr;
+};
+
+TEST_F(MunmapTest, HappyCase) {
+  EXPECT_THAT(munmap(m_, kPageSize), SyscallSucceeds());
+}
+
+TEST_F(MunmapTest, ZeroLength) {
+  EXPECT_THAT(munmap(m_, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(MunmapTest, LastPageRoundUp) {
+  // Attempt to unmap up to and including the last page.
+  EXPECT_THAT(munmap(m_, static_cast<size_t>(-kPageSize + 1)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/network_namespace.cc b/test/syscalls/linux/network_namespace.cc
new file mode 100644
index 000000000..133fdecf0
--- /dev/null
+++ b/test/syscalls/linux/network_namespace.cc
@@ -0,0 +1,52 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <net/if.h>
+#include <sched.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+TEST(NetworkNamespaceTest, LoopbackExists) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  ScopedThread t([&] {
+    ASSERT_THAT(unshare(CLONE_NEWNET), SyscallSucceedsWithValue(0));
+
+    // TODO(gvisor.dev/issue/1833): Update this to test that only "lo" exists.
+    // Check loopback device exists.
+    int sock = socket(AF_INET, SOCK_DGRAM, 0);
+    ASSERT_THAT(sock, SyscallSucceeds());
+    struct ifreq ifr;
+    strncpy(ifr.ifr_name, "lo", IFNAMSIZ);
+    EXPECT_THAT(ioctl(sock, SIOCGIFINDEX, &ifr), SyscallSucceeds())
+        << "lo cannot be found";
+  });
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc
new file mode 100644
index 000000000..bb7d108e8
--- /dev/null
+++ b/test/syscalls/linux/open.cc
@@ -0,0 +1,451 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/capability.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// This test is currently very rudimentary.
+//
+// There are plenty of extra cases to cover once the sentry supports them.
+//
+// Different types of opens:
+// * O_CREAT
+// * O_DIRECTORY
+// * O_NOFOLLOW
+// * O_PATH <- Will we ever support this?
+//
+// Special operations on open:
+// * O_EXCL
+//
+// Special files:
+// * Blocking behavior for a named pipe.
+//
+// Different errors:
+// * EACCES
+// * EEXIST
+// * ENAMETOOLONG
+// * ELOOP
+// * ENOTDIR
+// * EPERM
+class OpenTest : public FileTest {
+  void SetUp() override {
+    FileTest::SetUp();
+
+    ASSERT_THAT(
+        write(test_file_fd_.get(), test_data_.c_str(), test_data_.length()),
+        SyscallSucceedsWithValue(test_data_.length()));
+    EXPECT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), SyscallSucceeds());
+  }
+
+ public:
+  const std::string test_data_ = "hello world\n";
+};
+
+TEST_F(OpenTest, OTrunc) {
+  auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd");
+  ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds());
+  ASSERT_THAT(open(dirpath.c_str(), O_TRUNC, 0666),
+              SyscallFailsWithErrno(EISDIR));
+}
+
+TEST_F(OpenTest, OTruncAndReadOnlyDir) {
+  auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd");
+  ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds());
+  ASSERT_THAT(open(dirpath.c_str(), O_TRUNC | O_RDONLY, 0666),
+              SyscallFailsWithErrno(EISDIR));
+}
+
+TEST_F(OpenTest, OTruncAndReadOnlyFile) {
+  auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncfile");
+  const FileDescriptor existing =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dirpath.c_str(), O_RDWR | O_CREAT, 0666));
+  const FileDescriptor otrunc = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(dirpath.c_str(), O_TRUNC | O_RDONLY, 0666));
+}
+
+TEST_F(OpenTest, ReadOnly) {
+  char buf;
+  const FileDescriptor ro_file =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+  EXPECT_THAT(read(ro_file.get(), &buf, 1), SyscallSucceedsWithValue(1));
+  EXPECT_THAT(lseek(ro_file.get(), 0, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(write(ro_file.get(), &buf, 1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(OpenTest, WriteOnly) {
+  char buf;
+  const FileDescriptor wo_file =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY));
+
+  EXPECT_THAT(read(wo_file.get(), &buf, 1), SyscallFailsWithErrno(EBADF));
+  EXPECT_THAT(lseek(wo_file.get(), 0, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(write(wo_file.get(), &buf, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_F(OpenTest, ReadWrite) {
+  char buf;
+  const FileDescriptor rw_file =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  EXPECT_THAT(read(rw_file.get(), &buf, 1), SyscallSucceedsWithValue(1));
+  EXPECT_THAT(lseek(rw_file.get(), 0, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(write(rw_file.get(), &buf, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_F(OpenTest, RelPath) {
+  auto name = std::string(Basename(test_file_name_));
+
+  ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name, O_RDONLY));
+}
+
+TEST_F(OpenTest, AbsPath) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+}
+
+TEST_F(OpenTest, AtRelPath) {
+  auto name = std::string(Basename(test_file_name_));
+  const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(GetAbsoluteTestTmpdir(), O_RDONLY | O_DIRECTORY));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(OpenAt(dirfd.get(), name, O_RDONLY));
+}
+
+TEST_F(OpenTest, AtAbsPath) {
+  const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(GetAbsoluteTestTmpdir(), O_RDONLY | O_DIRECTORY));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(OpenAt(dirfd.get(), test_file_name_, O_RDONLY));
+}
+
+TEST_F(OpenTest, OpenNoFollowSymlink) {
+  const std::string link_path = JoinPath(GetAbsoluteTestTmpdir(), "link");
+  ASSERT_THAT(symlink(test_file_name_.c_str(), link_path.c_str()),
+              SyscallSucceeds());
+  auto cleanup = Cleanup([link_path]() {
+    EXPECT_THAT(unlink(link_path.c_str()), SyscallSucceeds());
+  });
+
+  // Open will succeed without O_NOFOLLOW and fails with O_NOFOLLOW.
+  const FileDescriptor fd2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(link_path, O_RDONLY));
+  ASSERT_THAT(open(link_path.c_str(), O_RDONLY | O_NOFOLLOW),
+              SyscallFailsWithErrno(ELOOP));
+}
+
+TEST_F(OpenTest, OpenNoFollowStillFollowsLinksInPath) {
+  // We will create the following structure:
+  // tmp_folder/real_folder/file
+  // tmp_folder/sym_folder -> tmp_folder/real_folder
+  //
+  // We will then open tmp_folder/sym_folder/file with O_NOFOLLOW and it
+  // should succeed as O_NOFOLLOW only applies to the final path component.
+  auto tmp_path =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(GetAbsoluteTestTmpdir()));
+  auto sym_path = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), tmp_path.path()));
+  auto file_path =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(tmp_path.path()));
+
+  auto path_via_symlink = JoinPath(sym_path.path(), Basename(file_path.path()));
+  const FileDescriptor fd2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(path_via_symlink, O_RDONLY | O_NOFOLLOW));
+}
+
+// Test that open(2) can follow symlinks that point back to the same tree.
+// Test sets up files as follows:
+//   root/child/symlink => redirects to ../..
+//   root/child/target => regular file
+//
+// open("root/child/symlink/root/child/file")
+TEST_F(OpenTest, SymlinkRecurse) {
+  auto root =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(GetAbsoluteTestTmpdir()));
+  auto child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+  auto symlink = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(child.path(), "../.."));
+  auto target = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(child.path(), "abc", 0644));
+  auto path_via_symlink =
+      JoinPath(symlink.path(), Basename(root.path()), Basename(child.path()),
+               Basename(target.path()));
+  const auto contents =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents(path_via_symlink));
+  ASSERT_EQ(contents, "abc");
+}
+
+TEST_F(OpenTest, Fault) {
+  char* totally_not_null = nullptr;
+  ASSERT_THAT(open(totally_not_null, O_RDONLY), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(OpenTest, AppendOnly) {
+  // First write some data to the fresh file.
+  const int64_t kBufSize = 1024;
+  std::vector<char> buf(kBufSize, 'a');
+
+  FileDescriptor fd0 = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+  std::fill(buf.begin(), buf.end(), 'a');
+  EXPECT_THAT(WriteFd(fd0.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+  fd0.reset();  // Close the file early.
+
+  // Next get two handles to the same file. We open two files because we want
+  // to make sure that appending is respected between them.
+  const FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND));
+  EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+  const FileDescriptor fd2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND));
+  EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+  // Then try to write to the first file and make sure the bytes are appended.
+  EXPECT_THAT(WriteFd(fd1.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  // Check that the size of the file is correct and that the offset has been
+  // incremented to that size.
+  struct stat s0;
+  EXPECT_THAT(fstat(fd1.get(), &s0), SyscallSucceeds());
+  EXPECT_EQ(s0.st_size, kBufSize * 2);
+  EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR),
+              SyscallSucceedsWithValue(kBufSize * 2));
+
+  // Then try to write to the second file and make sure the bytes are appended.
+  EXPECT_THAT(WriteFd(fd2.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  // Check that the size of the file is correct and that the offset has been
+  // incremented to that size.
+  struct stat s1;
+  EXPECT_THAT(fstat(fd2.get(), &s1), SyscallSucceeds());
+  EXPECT_EQ(s1.st_size, kBufSize * 3);
+  EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR),
+              SyscallSucceedsWithValue(kBufSize * 3));
+}
+
+TEST_F(OpenTest, AppendConcurrentWrite) {
+  constexpr int kThreadCount = 5;
+  constexpr int kBytesPerThread = 10000;
+  std::unique_ptr<ScopedThread> threads[kThreadCount];
+
+  // In case of the uncached policy, we expect that a file system can be changed
+  // externally, so we create a new inode each time when we open a file and we
+  // can't guarantee that writes to files with O_APPEND will work correctly.
+  SKIP_IF(getenv("GVISOR_GOFER_UNCACHED"));
+
+  EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds());
+
+  std::string filename = test_file_name_;
+  DisableSave ds;  // Too many syscalls.
+  // Start kThreadCount threads which will write concurrently into the same
+  // file.
+  for (int i = 0; i < kThreadCount; i++) {
+    threads[i] = absl::make_unique<ScopedThread>([filename]() {
+      const FileDescriptor fd =
+          ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDWR | O_APPEND));
+
+      for (int j = 0; j < kBytesPerThread; j++) {
+        EXPECT_THAT(WriteFd(fd.get(), &j, 1), SyscallSucceedsWithValue(1));
+      }
+    });
+  }
+  for (int i = 0; i < kThreadCount; i++) {
+    threads[i]->Join();
+  }
+
+  // Check that the size of the file is correct.
+  struct stat st;
+  EXPECT_THAT(stat(test_file_name_.c_str(), &st), SyscallSucceeds());
+  EXPECT_EQ(st.st_size, kThreadCount * kBytesPerThread);
+}
+
+TEST_F(OpenTest, Truncate) {
+  {
+    // First write some data to the new file and close it.
+    FileDescriptor fd0 =
+        ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY));
+    std::vector<char> orig(10, 'a');
+    EXPECT_THAT(WriteFd(fd0.get(), orig.data(), orig.size()),
+                SyscallSucceedsWithValue(orig.size()));
+  }
+
+  // Then open with truncate and verify that offset is set to 0.
+  const FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_TRUNC));
+  EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+  // Then write less data to the file and ensure the old content is gone.
+  std::vector<char> want(5, 'b');
+  EXPECT_THAT(WriteFd(fd1.get(), want.data(), want.size()),
+              SyscallSucceedsWithValue(want.size()));
+
+  struct stat stat;
+  EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds());
+  EXPECT_EQ(stat.st_size, want.size());
+  EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR),
+              SyscallSucceedsWithValue(want.size()));
+
+  // Read the data and ensure only the latest write is in the file.
+  std::vector<char> got(want.size() + 1, 'c');
+  ASSERT_THAT(pread(fd1.get(), got.data(), got.size(), 0),
+              SyscallSucceedsWithValue(want.size()));
+  EXPECT_EQ(memcmp(want.data(), got.data(), want.size()), 0)
+      << "rbuf=" << got.data();
+  EXPECT_EQ(got.back(), 'c');  // Last byte should not have been modified.
+}
+
+TEST_F(OpenTest, NameTooLong) {
+  char buf[4097] = {};
+  memset(buf, 'a', 4097);
+  EXPECT_THAT(open(buf, O_RDONLY), SyscallFailsWithErrno(ENAMETOOLONG));
+}
+
+TEST_F(OpenTest, DotsFromRoot) {
+  const FileDescriptor rootfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/", O_RDONLY | O_DIRECTORY));
+  const FileDescriptor other_rootfd =
+      ASSERT_NO_ERRNO_AND_VALUE(OpenAt(rootfd.get(), "..", O_RDONLY));
+}
+
+TEST_F(OpenTest, DirectoryWritableFails) {
+  ASSERT_THAT(open(GetAbsoluteTestTmpdir().c_str(), O_RDWR),
+              SyscallFailsWithErrno(EISDIR));
+}
+
+TEST_F(OpenTest, FileNotDirectory) {
+  // Create a file and try to open it with O_DIRECTORY.
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  ASSERT_THAT(open(file.path().c_str(), O_RDONLY | O_DIRECTORY),
+              SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST_F(OpenTest, Null) {
+  char c = '\0';
+  ASSERT_THAT(open(&c, O_RDONLY), SyscallFailsWithErrno(ENOENT));
+}
+
+// NOTE(b/119785738): While the man pages specify that this behavior should be
+// undefined, Linux truncates the file on opening read only if we have write
+// permission, so we will too.
+TEST_F(OpenTest, CanTruncateReadOnly) {
+  const FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY | O_TRUNC));
+
+  struct stat stat;
+  EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds());
+  EXPECT_EQ(stat.st_size, 0);
+}
+
+// If we don't have read permission on the file, opening with
+// O_TRUNC should fail.
+TEST_F(OpenTest, CanTruncateReadOnlyNoWritePermission_NoRandomSave) {
+  // Drop capabilities that allow us to override file permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  const DisableSave ds;  // Permissions are dropped.
+  ASSERT_THAT(chmod(test_file_name_.c_str(), S_IRUSR | S_IRGRP),
+              SyscallSucceeds());
+
+  ASSERT_THAT(open(test_file_name_.c_str(), O_RDONLY | O_TRUNC),
+              SyscallFailsWithErrno(EACCES));
+
+  const FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+  struct stat stat;
+  EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds());
+  EXPECT_EQ(stat.st_size, test_data_.size());
+}
+
+// If we don't have read permission but have write permission, opening O_WRONLY
+// and O_TRUNC should succeed.
+TEST_F(OpenTest, CanTruncateWriteOnlyNoReadPermission_NoRandomSave) {
+  const DisableSave ds;  // Permissions are dropped.
+
+  EXPECT_THAT(fchmod(test_file_fd_.get(), S_IWUSR | S_IWGRP),
+              SyscallSucceeds());
+
+  const FileDescriptor fd1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY | O_TRUNC));
+
+  EXPECT_THAT(fchmod(test_file_fd_.get(), S_IRUSR | S_IRGRP),
+              SyscallSucceeds());
+
+  const FileDescriptor fd2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+  struct stat stat;
+  EXPECT_THAT(fstat(fd2.get(), &stat), SyscallSucceeds());
+  EXPECT_EQ(stat.st_size, 0);
+}
+
+TEST_F(OpenTest, CanTruncateWithStrangePermissions) {
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+  const DisableSave ds;  // Permissions are dropped.
+  std::string path = NewTempAbsPath();
+  int fd;
+  // Create a file without user permissions.
+  EXPECT_THAT(  // SAVE_BELOW
+      fd = open(path.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 055),
+      SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  // Cannot open file because we are owner and have no permissions set.
+  EXPECT_THAT(open(path.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES));
+
+  // We *can* chmod the file, because we are the owner.
+  EXPECT_THAT(chmod(path.c_str(), 0755), SyscallSucceeds());
+
+  // Now we can open the file again.
+  EXPECT_THAT(fd = open(path.c_str(), O_RDWR), SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(OpenTest, OpenNonDirectoryWithTrailingSlash) {
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string bad_path = file.path() + "/";
+  EXPECT_THAT(open(bad_path.c_str(), O_RDONLY), SyscallFailsWithErrno(ENOTDIR));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc
new file mode 100644
index 000000000..51eacf3f2
--- /dev/null
+++ b/test/syscalls/linux/open_create.cc
@@ -0,0 +1,155 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/temp_umask.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+TEST(CreateTest, TmpFile) {
+  int fd;
+  EXPECT_THAT(fd = open(JoinPath(GetAbsoluteTestTmpdir(), "a").c_str(),
+                        O_RDWR | O_CREAT, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(CreateTest, ExistingFile) {
+  int fd;
+  EXPECT_THAT(
+      fd = open(JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile").c_str(),
+                O_RDWR | O_CREAT, 0666),
+      SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  EXPECT_THAT(
+      fd = open(JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile").c_str(),
+                O_RDWR | O_CREAT, 0666),
+      SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(CreateTest, CreateAtFile) {
+  int dirfd;
+  EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(openat(dirfd, "CreateAtFile", O_RDWR | O_CREAT, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(CreateTest, HonorsUmask_NoRandomSave) {
+  const DisableSave ds;  // file cannot be re-opened as writable.
+  TempUmask mask(0222);
+  int fd;
+  ASSERT_THAT(
+      fd = open(JoinPath(GetAbsoluteTestTmpdir(), "UmaskedFile").c_str(),
+                O_RDWR | O_CREAT, 0666),
+      SyscallSucceeds());
+  struct stat statbuf;
+  ASSERT_THAT(fstat(fd, &statbuf), SyscallSucceeds());
+  EXPECT_EQ(0444, statbuf.st_mode & 0777);
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(CreateTest, CreateExclusively) {
+  std::string filename = NewTempAbsPath();
+
+  int fd;
+  ASSERT_THAT(fd = open(filename.c_str(), O_CREAT | O_RDWR, 0644),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  EXPECT_THAT(open(filename.c_str(), O_CREAT | O_EXCL | O_RDWR, 0644),
+              SyscallFailsWithErrno(EEXIST));
+}
+
+TEST(CreateTeast, CreatWithOTrunc) {
+  std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd");
+  ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds());
+  ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC, 0666),
+              SyscallFailsWithErrno(EISDIR));
+}
+
+TEST(CreateTeast, CreatDirWithOTruncAndReadOnly) {
+  std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd");
+  ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds());
+  ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0666),
+              SyscallFailsWithErrno(EISDIR));
+}
+
+TEST(CreateTeast, CreatFileWithOTruncAndReadOnly) {
+  std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncfile");
+  int dirfd;
+  ASSERT_THAT(dirfd = open(dirpath.c_str(), O_RDWR | O_CREAT, 0666),
+              SyscallSucceeds());
+  ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0666),
+              SyscallSucceeds());
+  ASSERT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(CreateTest, CreateFailsOnUnpermittedDir) {
+  // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+  // always override directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_THAT(open("/foo", O_CREAT | O_RDWR, 0644),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(CreateTest, CreateFailsOnDirWithoutWritePerms) {
+  // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+  // always override directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  auto parent = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555));
+  auto file = JoinPath(parent.path(), "foo");
+  ASSERT_THAT(open(file.c_str(), O_CREAT | O_RDWR, 0644),
+              SyscallFailsWithErrno(EACCES));
+}
+
+// A file originally created RW, but opened RO can later be opened RW.
+// Regression test for b/65385065.
+TEST(CreateTest, OpenCreateROThenRW) {
+  TempPath file(NewTempAbsPath());
+
+  // Create a RW file, but only open it RO.
+  FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(file.path(), O_CREAT | O_EXCL | O_RDONLY, 0644));
+
+  // Now get a RW FD.
+  FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  // fd1 is not writable, but fd2 is.
+  char c = 'a';
+  EXPECT_THAT(WriteFd(fd1.get(), &c, 1), SyscallFailsWithErrno(EBADF));
+  EXPECT_THAT(WriteFd(fd2.get(), &c, 1), SyscallSucceedsWithValue(1));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc
new file mode 100644
index 000000000..5ac68feb4
--- /dev/null
+++ b/test/syscalls/linux/packet_socket.cc
@@ -0,0 +1,440 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <ifaddrs.h>
+#include <linux/capability.h>
+#include <linux/if_arp.h>
+#include <linux/if_packet.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/endian.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+// Some of these tests involve sending packets via AF_PACKET sockets and the
+// loopback interface. Because AF_PACKET circumvents so much of the networking
+// stack, Linux sees these packets as "martian", i.e. they claim to be to/from
+// localhost but don't have the usual associated data. Thus Linux drops them by
+// default. You can see where this happens by following the code at:
+//
+// - net/ipv4/ip_input.c:ip_rcv_finish, which calls
+// - net/ipv4/route.c:ip_route_input_noref, which calls
+// - net/ipv4/route.c:ip_route_input_slow, which finds and drops martian
+//   packets.
+//
+// To tell Linux not to drop these packets, you need to tell it to accept our
+// funny packets (which are completely valid and correct, but lack associated
+// in-kernel data because we use AF_PACKET):
+//
+// echo 1 >> /proc/sys/net/ipv4/conf/lo/accept_local
+// echo 1 >> /proc/sys/net/ipv4/conf/lo/route_localnet
+//
+// These tests require CAP_NET_RAW to run.
+
+// TODO(gvisor.dev/issue/173): gVisor support.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::AnyOf;
+using ::testing::Eq;
+
+constexpr char kMessage[] = "soweoneul malhaebwa";
+constexpr in_port_t kPort = 0x409c;  // htons(40000)
+
+//
+// "Cooked" tests. Cooked AF_PACKET sockets do not contain link layer
+// headers, and provide link layer destination/source information via a
+// returned struct sockaddr_ll.
+//
+
+// Send kMessage via sock to loopback
+void SendUDPMessage(int sock) {
+  struct sockaddr_in dest = {};
+  dest.sin_port = kPort;
+  dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  dest.sin_family = AF_INET;
+  EXPECT_THAT(sendto(sock, kMessage, sizeof(kMessage), 0,
+                     reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)),
+              SyscallSucceedsWithValue(sizeof(kMessage)));
+}
+
+// Send an IP packet and make sure ETH_P_<something else> doesn't pick it up.
+TEST(BasicCookedPacketTest, WrongType) {
+  if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    ASSERT_THAT(socket(AF_PACKET, SOCK_DGRAM, ETH_P_PUP),
+                SyscallFailsWithErrno(EPERM));
+    GTEST_SKIP();
+  }
+
+  FileDescriptor sock = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_PUP)));
+
+  // Let's use a simple IP payload: a UDP datagram.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  SendUDPMessage(udp_sock.get());
+
+  // Wait and make sure the socket never becomes readable.
+  struct pollfd pfd = {};
+  pfd.fd = sock.get();
+  pfd.events = POLLIN;
+  EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0));
+}
+
+// Tests for "cooked" (SOCK_DGRAM) packet(7) sockets.
+class CookedPacketTest : public ::testing::TestWithParam<int> {
+ protected:
+  // Creates a socket to be used in tests.
+  void SetUp() override;
+
+  // Closes the socket created by SetUp().
+  void TearDown() override;
+
+  // Gets the device index of the loopback device.
+  int GetLoopbackIndex();
+
+  // The socket used for both reading and writing.
+  int socket_;
+};
+
+void CookedPacketTest::SetUp() {
+  if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    ASSERT_THAT(socket(AF_PACKET, SOCK_DGRAM, htons(GetParam())),
+                SyscallFailsWithErrno(EPERM));
+    GTEST_SKIP();
+  }
+
+  if (!IsRunningOnGvisor()) {
+    FileDescriptor acceptLocal = ASSERT_NO_ERRNO_AND_VALUE(
+        Open("/proc/sys/net/ipv4/conf/lo/accept_local", O_RDONLY));
+    FileDescriptor routeLocalnet = ASSERT_NO_ERRNO_AND_VALUE(
+        Open("/proc/sys/net/ipv4/conf/lo/route_localnet", O_RDONLY));
+    char enabled;
+    ASSERT_THAT(read(acceptLocal.get(), &enabled, 1),
+                SyscallSucceedsWithValue(1));
+    ASSERT_EQ(enabled, '1');
+    ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1),
+                SyscallSucceedsWithValue(1));
+    ASSERT_EQ(enabled, '1');
+  }
+
+  ASSERT_THAT(socket_ = socket(AF_PACKET, SOCK_DGRAM, htons(GetParam())),
+              SyscallSucceeds());
+}
+
+void CookedPacketTest::TearDown() {
+  // TearDown will be run even if we skip the test.
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    EXPECT_THAT(close(socket_), SyscallSucceeds());
+  }
+}
+
+int CookedPacketTest::GetLoopbackIndex() {
+  struct ifreq ifr;
+  snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+  EXPECT_THAT(ioctl(socket_, SIOCGIFINDEX, &ifr), SyscallSucceeds());
+  EXPECT_NE(ifr.ifr_ifindex, 0);
+  return ifr.ifr_ifindex;
+}
+
+// Receive and verify the message via packet socket on interface.
+void ReceiveMessage(int sock, int ifindex) {
+  // Wait for the socket to become readable.
+  struct pollfd pfd = {};
+  pfd.fd = sock;
+  pfd.events = POLLIN;
+  EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 2000), SyscallSucceedsWithValue(1));
+
+  // Read and verify the data.
+  constexpr size_t packet_size =
+      sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kMessage);
+  char buf[64];
+  struct sockaddr_ll src = {};
+  socklen_t src_len = sizeof(src);
+  ASSERT_THAT(recvfrom(sock, buf, sizeof(buf), 0,
+                       reinterpret_cast<struct sockaddr*>(&src), &src_len),
+              SyscallSucceedsWithValue(packet_size));
+
+  // sockaddr_ll ends with an 8 byte physical address field, but ethernet
+  // addresses only use 6 bytes.  Linux used to return sizeof(sockaddr_ll)-2
+  // here, but since commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c returns
+  // sizeof(sockaddr_ll).
+  ASSERT_THAT(src_len, AnyOf(Eq(sizeof(src)), Eq(sizeof(src) - 2)));
+
+  // TODO(b/129292371): Verify protocol once we return it.
+  // Verify the source address.
+  EXPECT_EQ(src.sll_family, AF_PACKET);
+  EXPECT_EQ(src.sll_ifindex, ifindex);
+  EXPECT_EQ(src.sll_halen, ETH_ALEN);
+  // This came from the loopback device, so the address is all 0s.
+  for (int i = 0; i < src.sll_halen; i++) {
+    EXPECT_EQ(src.sll_addr[i], 0);
+  }
+
+  // Verify the IP header. We memcpy to deal with pointer aligment.
+  struct iphdr ip = {};
+  memcpy(&ip, buf, sizeof(ip));
+  EXPECT_EQ(ip.ihl, 5);
+  EXPECT_EQ(ip.version, 4);
+  EXPECT_EQ(ip.tot_len, htons(packet_size));
+  EXPECT_EQ(ip.protocol, IPPROTO_UDP);
+  EXPECT_EQ(ip.daddr, htonl(INADDR_LOOPBACK));
+  EXPECT_EQ(ip.saddr, htonl(INADDR_LOOPBACK));
+
+  // Verify the UDP header. We memcpy to deal with pointer aligment.
+  struct udphdr udp = {};
+  memcpy(&udp, buf + sizeof(iphdr), sizeof(udp));
+  EXPECT_EQ(udp.dest, kPort);
+  EXPECT_EQ(udp.len, htons(sizeof(udphdr) + sizeof(kMessage)));
+
+  // Verify the payload.
+  char* payload = reinterpret_cast<char*>(buf + sizeof(iphdr) + sizeof(udphdr));
+  EXPECT_EQ(strncmp(payload, kMessage, sizeof(kMessage)), 0);
+}
+
+// Receive via a packet socket.
+TEST_P(CookedPacketTest, Receive) {
+  // Let's use a simple IP payload: a UDP datagram.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  SendUDPMessage(udp_sock.get());
+
+  // Receive and verify the data.
+  int loopback_index = GetLoopbackIndex();
+  ReceiveMessage(socket_, loopback_index);
+}
+
+// Send via a packet socket.
+TEST_P(CookedPacketTest, Send) {
+  // TODO(b/129292371): Remove once we support packet socket writing.
+  SKIP_IF(IsRunningOnGvisor());
+
+  // Let's send a UDP packet and receive it using a regular UDP socket.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  struct sockaddr_in bind_addr = {};
+  bind_addr.sin_family = AF_INET;
+  bind_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  bind_addr.sin_port = kPort;
+  ASSERT_THAT(
+      bind(udp_sock.get(), reinterpret_cast<struct sockaddr*>(&bind_addr),
+           sizeof(bind_addr)),
+      SyscallSucceeds());
+
+  // Set up the destination physical address.
+  struct sockaddr_ll dest = {};
+  dest.sll_family = AF_PACKET;
+  dest.sll_halen = ETH_ALEN;
+  dest.sll_ifindex = GetLoopbackIndex();
+  dest.sll_protocol = htons(ETH_P_IP);
+  // We're sending to the loopback device, so the address is all 0s.
+  memset(dest.sll_addr, 0x00, ETH_ALEN);
+
+  // Set up the IP header.
+  struct iphdr iphdr = {0};
+  iphdr.ihl = 5;
+  iphdr.version = 4;
+  iphdr.tos = 0;
+  iphdr.tot_len =
+      htons(sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kMessage));
+  // Get a pseudo-random ID. If we clash with an in-use ID the test will fail,
+  // but we have no way of getting an ID we know to be good.
+  srand(*reinterpret_cast<unsigned int*>(&iphdr));
+  iphdr.id = rand();
+  // Linux sets this bit ("do not fragment") for small packets.
+  iphdr.frag_off = 1 << 6;
+  iphdr.ttl = 64;
+  iphdr.protocol = IPPROTO_UDP;
+  iphdr.daddr = htonl(INADDR_LOOPBACK);
+  iphdr.saddr = htonl(INADDR_LOOPBACK);
+  iphdr.check = IPChecksum(iphdr);
+
+  // Set up the UDP header.
+  struct udphdr udphdr = {};
+  udphdr.source = kPort;
+  udphdr.dest = kPort;
+  udphdr.len = htons(sizeof(udphdr) + sizeof(kMessage));
+  udphdr.check = UDPChecksum(iphdr, udphdr, kMessage, sizeof(kMessage));
+
+  // Copy both headers and the payload into our packet buffer.
+  char send_buf[sizeof(iphdr) + sizeof(udphdr) + sizeof(kMessage)];
+  memcpy(send_buf, &iphdr, sizeof(iphdr));
+  memcpy(send_buf + sizeof(iphdr), &udphdr, sizeof(udphdr));
+  memcpy(send_buf + sizeof(iphdr) + sizeof(udphdr), kMessage, sizeof(kMessage));
+
+  // Send it.
+  ASSERT_THAT(sendto(socket_, send_buf, sizeof(send_buf), 0,
+                     reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Wait for the packet to become available on both sockets.
+  struct pollfd pfd = {};
+  pfd.fd = udp_sock.get();
+  pfd.events = POLLIN;
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1));
+  pfd.fd = socket_;
+  pfd.events = POLLIN;
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1));
+
+  // Receive on the packet socket.
+  char recv_buf[sizeof(send_buf)];
+  ASSERT_THAT(recv(socket_, recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+  ASSERT_EQ(memcmp(recv_buf, send_buf, sizeof(send_buf)), 0);
+
+  // Receive on the UDP socket.
+  struct sockaddr_in src;
+  socklen_t src_len = sizeof(src);
+  ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT,
+                       reinterpret_cast<struct sockaddr*>(&src), &src_len),
+              SyscallSucceedsWithValue(sizeof(kMessage)));
+  // Check src and payload.
+  EXPECT_EQ(strncmp(recv_buf, kMessage, sizeof(kMessage)), 0);
+  EXPECT_EQ(src.sin_family, AF_INET);
+  EXPECT_EQ(src.sin_port, kPort);
+  EXPECT_EQ(src.sin_addr.s_addr, htonl(INADDR_LOOPBACK));
+}
+
+// Bind and receive via packet socket.
+TEST_P(CookedPacketTest, BindReceive) {
+  struct sockaddr_ll bind_addr = {};
+  bind_addr.sll_family = AF_PACKET;
+  bind_addr.sll_protocol = htons(GetParam());
+  bind_addr.sll_ifindex = GetLoopbackIndex();
+
+  ASSERT_THAT(bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr),
+                   sizeof(bind_addr)),
+              SyscallSucceeds());
+
+  // Let's use a simple IP payload: a UDP datagram.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  SendUDPMessage(udp_sock.get());
+
+  // Receive and verify the data.
+  ReceiveMessage(socket_, bind_addr.sll_ifindex);
+}
+
+// Double Bind socket.
+TEST_P(CookedPacketTest, DoubleBind) {
+  struct sockaddr_ll bind_addr = {};
+  bind_addr.sll_family = AF_PACKET;
+  bind_addr.sll_protocol = htons(GetParam());
+  bind_addr.sll_ifindex = GetLoopbackIndex();
+
+  ASSERT_THAT(bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr),
+                   sizeof(bind_addr)),
+              SyscallSucceeds());
+
+  // Binding socket again should fail.
+  ASSERT_THAT(
+      bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr),
+           sizeof(bind_addr)),
+      // Linux 4.09 returns EINVAL here, but some time before 4.19 it switched
+      // to EADDRINUSE.
+      AnyOf(SyscallFailsWithErrno(EADDRINUSE), SyscallFailsWithErrno(EINVAL)));
+}
+
+// Bind and verify we do not receive data on interface which is not bound
+TEST_P(CookedPacketTest, BindDrop) {
+  // Let's use a simple IP payload: a UDP datagram.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+  struct ifaddrs* if_addr_list = nullptr;
+  auto cleanup = Cleanup([&if_addr_list]() { freeifaddrs(if_addr_list); });
+
+  ASSERT_THAT(getifaddrs(&if_addr_list), SyscallSucceeds());
+
+  // Get interface other than loopback.
+  struct ifreq ifr = {};
+  for (struct ifaddrs* i = if_addr_list; i; i = i->ifa_next) {
+    if (strcmp(i->ifa_name, "lo") != 0) {
+      strncpy(ifr.ifr_name, i->ifa_name, sizeof(ifr.ifr_name));
+      break;
+    }
+  }
+
+  // Skip if no interface is available other than loopback.
+  if (strlen(ifr.ifr_name) == 0) {
+    GTEST_SKIP();
+  }
+
+  // Get interface index.
+  EXPECT_THAT(ioctl(socket_, SIOCGIFINDEX, &ifr), SyscallSucceeds());
+  EXPECT_NE(ifr.ifr_ifindex, 0);
+
+  // Bind to packet socket requires only family, protocol and ifindex.
+  struct sockaddr_ll bind_addr = {};
+  bind_addr.sll_family = AF_PACKET;
+  bind_addr.sll_protocol = htons(GetParam());
+  bind_addr.sll_ifindex = ifr.ifr_ifindex;
+
+  ASSERT_THAT(bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr),
+                   sizeof(bind_addr)),
+              SyscallSucceeds());
+
+  // Send to loopback interface.
+  struct sockaddr_in dest = {};
+  dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  dest.sin_family = AF_INET;
+  dest.sin_port = kPort;
+  EXPECT_THAT(sendto(udp_sock.get(), kMessage, sizeof(kMessage), 0,
+                     reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)),
+              SyscallSucceedsWithValue(sizeof(kMessage)));
+
+  // Wait and make sure the socket never receives any data.
+  struct pollfd pfd = {};
+  pfd.fd = socket_;
+  pfd.events = POLLIN;
+  EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0));
+}
+
+// Bind with invalid address.
+TEST_P(CookedPacketTest, BindFail) {
+  // Null address.
+  ASSERT_THAT(
+      bind(socket_, nullptr, sizeof(struct sockaddr)),
+      AnyOf(SyscallFailsWithErrno(EFAULT), SyscallFailsWithErrno(EINVAL)));
+
+  // Address of size 1.
+  uint8_t addr = 0;
+  ASSERT_THAT(
+      bind(socket_, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+      SyscallFailsWithErrno(EINVAL));
+}
+
+INSTANTIATE_TEST_SUITE_P(AllInetTests, CookedPacketTest,
+                         ::testing::Values(ETH_P_IP, ETH_P_ALL));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc
new file mode 100644
index 000000000..4093ac813
--- /dev/null
+++ b/test/syscalls/linux/packet_socket_raw.cc
@@ -0,0 +1,565 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <linux/capability.h>
+#include <linux/if_arp.h>
+#include <linux/if_packet.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/endian.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+// Some of these tests involve sending packets via AF_PACKET sockets and the
+// loopback interface. Because AF_PACKET circumvents so much of the networking
+// stack, Linux sees these packets as "martian", i.e. they claim to be to/from
+// localhost but don't have the usual associated data. Thus Linux drops them by
+// default. You can see where this happens by following the code at:
+//
+// - net/ipv4/ip_input.c:ip_rcv_finish, which calls
+// - net/ipv4/route.c:ip_route_input_noref, which calls
+// - net/ipv4/route.c:ip_route_input_slow, which finds and drops martian
+//   packets.
+//
+// To tell Linux not to drop these packets, you need to tell it to accept our
+// funny packets (which are completely valid and correct, but lack associated
+// in-kernel data because we use AF_PACKET):
+//
+// echo 1 >> /proc/sys/net/ipv4/conf/lo/accept_local
+// echo 1 >> /proc/sys/net/ipv4/conf/lo/route_localnet
+//
+// These tests require CAP_NET_RAW to run.
+
+// TODO(gvisor.dev/issue/173): gVisor support.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::AnyOf;
+using ::testing::Eq;
+
+constexpr char kMessage[] = "soweoneul malhaebwa";
+constexpr in_port_t kPort = 0x409c;  // htons(40000)
+
+// Send kMessage via sock to loopback
+void SendUDPMessage(int sock) {
+  struct sockaddr_in dest = {};
+  dest.sin_port = kPort;
+  dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  dest.sin_family = AF_INET;
+  EXPECT_THAT(sendto(sock, kMessage, sizeof(kMessage), 0,
+                     reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)),
+              SyscallSucceedsWithValue(sizeof(kMessage)));
+}
+
+//
+// Raw tests. Packets sent with raw AF_PACKET sockets always include link layer
+// headers.
+//
+
+// Tests for "raw" (SOCK_RAW) packet(7) sockets.
+class RawPacketTest : public ::testing::TestWithParam<int> {
+ protected:
+  // Creates a socket to be used in tests.
+  void SetUp() override;
+
+  // Closes the socket created by SetUp().
+  void TearDown() override;
+
+  // Gets the device index of the loopback device.
+  int GetLoopbackIndex();
+
+  // The socket used for both reading and writing.
+  int s_;
+};
+
+void RawPacketTest::SetUp() {
+  if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, htons(GetParam())),
+                SyscallFailsWithErrno(EPERM));
+    GTEST_SKIP();
+  }
+
+  if (!IsRunningOnGvisor()) {
+    // Ensure that looped back packets aren't rejected by the kernel.
+    FileDescriptor acceptLocal = ASSERT_NO_ERRNO_AND_VALUE(
+        Open("/proc/sys/net/ipv4/conf/lo/accept_local", O_RDWR));
+    FileDescriptor routeLocalnet = ASSERT_NO_ERRNO_AND_VALUE(
+        Open("/proc/sys/net/ipv4/conf/lo/route_localnet", O_RDWR));
+    char enabled;
+    ASSERT_THAT(read(acceptLocal.get(), &enabled, 1),
+                SyscallSucceedsWithValue(1));
+    if (enabled != '1') {
+      enabled = '1';
+      ASSERT_THAT(lseek(acceptLocal.get(), 0, SEEK_SET),
+                  SyscallSucceedsWithValue(0));
+      ASSERT_THAT(write(acceptLocal.get(), &enabled, 1),
+                  SyscallSucceedsWithValue(1));
+      ASSERT_THAT(lseek(acceptLocal.get(), 0, SEEK_SET),
+                  SyscallSucceedsWithValue(0));
+      ASSERT_THAT(read(acceptLocal.get(), &enabled, 1),
+                  SyscallSucceedsWithValue(1));
+      ASSERT_EQ(enabled, '1');
+    }
+
+    ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1),
+                SyscallSucceedsWithValue(1));
+    if (enabled != '1') {
+      enabled = '1';
+      ASSERT_THAT(lseek(routeLocalnet.get(), 0, SEEK_SET),
+                  SyscallSucceedsWithValue(0));
+      ASSERT_THAT(write(routeLocalnet.get(), &enabled, 1),
+                  SyscallSucceedsWithValue(1));
+      ASSERT_THAT(lseek(routeLocalnet.get(), 0, SEEK_SET),
+                  SyscallSucceedsWithValue(0));
+      ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1),
+                  SyscallSucceedsWithValue(1));
+      ASSERT_EQ(enabled, '1');
+    }
+  }
+
+  ASSERT_THAT(s_ = socket(AF_PACKET, SOCK_RAW, htons(GetParam())),
+              SyscallSucceeds());
+}
+
+void RawPacketTest::TearDown() {
+  // TearDown will be run even if we skip the test.
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    EXPECT_THAT(close(s_), SyscallSucceeds());
+  }
+}
+
+int RawPacketTest::GetLoopbackIndex() {
+  struct ifreq ifr;
+  snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+  EXPECT_THAT(ioctl(s_, SIOCGIFINDEX, &ifr), SyscallSucceeds());
+  EXPECT_NE(ifr.ifr_ifindex, 0);
+  return ifr.ifr_ifindex;
+}
+
+// Receive via a packet socket.
+TEST_P(RawPacketTest, Receive) {
+  // Let's use a simple IP payload: a UDP datagram.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  SendUDPMessage(udp_sock.get());
+
+  // Wait for the socket to become readable.
+  struct pollfd pfd = {};
+  pfd.fd = s_;
+  pfd.events = POLLIN;
+  EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 2000), SyscallSucceedsWithValue(1));
+
+  // Read and verify the data.
+  constexpr size_t packet_size = sizeof(struct ethhdr) + sizeof(struct iphdr) +
+                                 sizeof(struct udphdr) + sizeof(kMessage);
+  char buf[64];
+  struct sockaddr_ll src = {};
+  socklen_t src_len = sizeof(src);
+  ASSERT_THAT(recvfrom(s_, buf, sizeof(buf), 0,
+                       reinterpret_cast<struct sockaddr*>(&src), &src_len),
+              SyscallSucceedsWithValue(packet_size));
+  // sockaddr_ll ends with an 8 byte physical address field, but ethernet
+  // addresses only use 6 bytes.  Linux used to return sizeof(sockaddr_ll)-2
+  // here, but since commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c returns
+  // sizeof(sockaddr_ll).
+  ASSERT_THAT(src_len, AnyOf(Eq(sizeof(src)), Eq(sizeof(src) - 2)));
+
+  // TODO(b/129292371): Verify protocol once we return it.
+  // Verify the source address.
+  EXPECT_EQ(src.sll_family, AF_PACKET);
+  EXPECT_EQ(src.sll_ifindex, GetLoopbackIndex());
+  EXPECT_EQ(src.sll_halen, ETH_ALEN);
+  // This came from the loopback device, so the address is all 0s.
+  for (int i = 0; i < src.sll_halen; i++) {
+    EXPECT_EQ(src.sll_addr[i], 0);
+  }
+
+  // Verify the ethernet header. We memcpy to deal with pointer alignment.
+  struct ethhdr eth = {};
+  memcpy(&eth, buf, sizeof(eth));
+  // The destination and source address should be 0, for loopback.
+  for (int i = 0; i < ETH_ALEN; i++) {
+    EXPECT_EQ(eth.h_dest[i], 0);
+    EXPECT_EQ(eth.h_source[i], 0);
+  }
+  EXPECT_EQ(eth.h_proto, htons(ETH_P_IP));
+
+  // Verify the IP header. We memcpy to deal with pointer aligment.
+  struct iphdr ip = {};
+  memcpy(&ip, buf + sizeof(ethhdr), sizeof(ip));
+  EXPECT_EQ(ip.ihl, 5);
+  EXPECT_EQ(ip.version, 4);
+  EXPECT_EQ(ip.tot_len, htons(packet_size - sizeof(eth)));
+  EXPECT_EQ(ip.protocol, IPPROTO_UDP);
+  EXPECT_EQ(ip.daddr, htonl(INADDR_LOOPBACK));
+  EXPECT_EQ(ip.saddr, htonl(INADDR_LOOPBACK));
+
+  // Verify the UDP header. We memcpy to deal with pointer aligment.
+  struct udphdr udp = {};
+  memcpy(&udp, buf + sizeof(eth) + sizeof(iphdr), sizeof(udp));
+  EXPECT_EQ(udp.dest, kPort);
+  EXPECT_EQ(udp.len, htons(sizeof(udphdr) + sizeof(kMessage)));
+
+  // Verify the payload.
+  char* payload = reinterpret_cast<char*>(buf + sizeof(eth) + sizeof(iphdr) +
+                                          sizeof(udphdr));
+  EXPECT_EQ(strncmp(payload, kMessage, sizeof(kMessage)), 0);
+}
+
+// Send via a packet socket.
+TEST_P(RawPacketTest, Send) {
+  // TODO(b/129292371): Remove once we support packet socket writing.
+  SKIP_IF(IsRunningOnGvisor());
+
+  // Let's send a UDP packet and receive it using a regular UDP socket.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  struct sockaddr_in bind_addr = {};
+  bind_addr.sin_family = AF_INET;
+  bind_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  bind_addr.sin_port = kPort;
+  ASSERT_THAT(
+      bind(udp_sock.get(), reinterpret_cast<struct sockaddr*>(&bind_addr),
+           sizeof(bind_addr)),
+      SyscallSucceeds());
+
+  // Set up the destination physical address.
+  struct sockaddr_ll dest = {};
+  dest.sll_family = AF_PACKET;
+  dest.sll_halen = ETH_ALEN;
+  dest.sll_ifindex = GetLoopbackIndex();
+  dest.sll_protocol = htons(ETH_P_IP);
+  // We're sending to the loopback device, so the address is all 0s.
+  memset(dest.sll_addr, 0x00, ETH_ALEN);
+
+  // Set up the ethernet header. The kernel takes care of the footer.
+  // We're sending to and from hardware address 0 (loopback).
+  struct ethhdr eth = {};
+  eth.h_proto = htons(ETH_P_IP);
+
+  // Set up the IP header.
+  struct iphdr iphdr = {};
+  iphdr.ihl = 5;
+  iphdr.version = 4;
+  iphdr.tos = 0;
+  iphdr.tot_len =
+      htons(sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kMessage));
+  // Get a pseudo-random ID. If we clash with an in-use ID the test will fail,
+  // but we have no way of getting an ID we know to be good.
+  srand(*reinterpret_cast<unsigned int*>(&iphdr));
+  iphdr.id = rand();
+  // Linux sets this bit ("do not fragment") for small packets.
+  iphdr.frag_off = 1 << 6;
+  iphdr.ttl = 64;
+  iphdr.protocol = IPPROTO_UDP;
+  iphdr.daddr = htonl(INADDR_LOOPBACK);
+  iphdr.saddr = htonl(INADDR_LOOPBACK);
+  iphdr.check = IPChecksum(iphdr);
+
+  // Set up the UDP header.
+  struct udphdr udphdr = {};
+  udphdr.source = kPort;
+  udphdr.dest = kPort;
+  udphdr.len = htons(sizeof(udphdr) + sizeof(kMessage));
+  udphdr.check = UDPChecksum(iphdr, udphdr, kMessage, sizeof(kMessage));
+
+  // Copy both headers and the payload into our packet buffer.
+  char
+      send_buf[sizeof(eth) + sizeof(iphdr) + sizeof(udphdr) + sizeof(kMessage)];
+  memcpy(send_buf, &eth, sizeof(eth));
+  memcpy(send_buf + sizeof(ethhdr), &iphdr, sizeof(iphdr));
+  memcpy(send_buf + sizeof(ethhdr) + sizeof(iphdr), &udphdr, sizeof(udphdr));
+  memcpy(send_buf + sizeof(ethhdr) + sizeof(iphdr) + sizeof(udphdr), kMessage,
+         sizeof(kMessage));
+
+  // Send it.
+  ASSERT_THAT(sendto(s_, send_buf, sizeof(send_buf), 0,
+                     reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Wait for the packet to become available on both sockets.
+  struct pollfd pfd = {};
+  pfd.fd = udp_sock.get();
+  pfd.events = POLLIN;
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1));
+  pfd.fd = s_;
+  pfd.events = POLLIN;
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1));
+
+  // Receive on the packet socket.
+  char recv_buf[sizeof(send_buf)];
+  ASSERT_THAT(recv(s_, recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+  ASSERT_EQ(memcmp(recv_buf, send_buf, sizeof(send_buf)), 0);
+
+  // Receive on the UDP socket.
+  struct sockaddr_in src;
+  socklen_t src_len = sizeof(src);
+  ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT,
+                       reinterpret_cast<struct sockaddr*>(&src), &src_len),
+              SyscallSucceedsWithValue(sizeof(kMessage)));
+  // Check src and payload.
+  EXPECT_EQ(strncmp(recv_buf, kMessage, sizeof(kMessage)), 0);
+  EXPECT_EQ(src.sin_family, AF_INET);
+  EXPECT_EQ(src.sin_port, kPort);
+  EXPECT_EQ(src.sin_addr.s_addr, htonl(INADDR_LOOPBACK));
+}
+
+// Check that setting SO_RCVBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(RawPacketTest, SetSocketRecvBufBelowMin) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Discover minimum receive buf size by trying to set it to zero.
+  // See:
+  // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
+  constexpr int kRcvBufSz = 0;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+      SyscallSucceeds());
+
+  int min = 0;
+  socklen_t min_len = sizeof(min);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value so let's use a value that when doubled will still
+  // be smaller than min.
+  int below_min = min / 2 - 1;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_RCVBUF above max is clamped to the maximum
+// receive buffer size.
+TEST_P(RawPacketTest, SetSocketRecvBufAboveMax) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Discover max buf size by trying to set the largest possible buffer size.
+  constexpr int kRcvBufSz = 0xffffffff;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+      SyscallSucceeds());
+
+  int max = 0;
+  socklen_t max_len = sizeof(max);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+              SyscallSucceeds());
+
+  int above_max = max + 1;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+              SyscallSucceeds());
+  ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored.
+TEST_P(RawPacketTest, SetSocketRecvBuf) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int max = 0;
+  int min = 0;
+  {
+    // Discover max buf size by trying to set a really large buffer size.
+    constexpr int kRcvBufSz = 0xffffffff;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+        SyscallSucceeds());
+
+    max = 0;
+    socklen_t max_len = sizeof(max);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+                SyscallSucceeds());
+  }
+
+  {
+    // Discover minimum buffer size by trying to set a zero size receive buffer
+    // size.
+    // See:
+    // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
+    constexpr int kRcvBufSz = 0;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+        SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  int quarter_sz = min + (max - min) / 4;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+  // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior.
+  if (!IsRunningOnGvisor()) {
+    quarter_sz *= 2;
+  }
+  ASSERT_EQ(quarter_sz, val);
+}
+
+// Check that setting SO_SNDBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(RawPacketTest, SetSocketSendBufBelowMin) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Discover minimum buffer size by trying to set it to zero.
+  constexpr int kSndBufSz = 0;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+      SyscallSucceeds());
+
+  int min = 0;
+  socklen_t min_len = sizeof(min);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value so let's use a value that when doubled will still
+  // be smaller than min.
+  int below_min = min / 2 - 1;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_SNDBUF above max is clamped to the maximum
+// send buffer size.
+TEST_P(RawPacketTest, SetSocketSendBufAboveMax) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Discover maximum buffer size by trying to set it to a large value.
+  constexpr int kSndBufSz = 0xffffffff;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+      SyscallSucceeds());
+
+  int max = 0;
+  socklen_t max_len = sizeof(max);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+              SyscallSucceeds());
+
+  int above_max = max + 1;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+              SyscallSucceeds());
+  ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
+TEST_P(RawPacketTest, SetSocketSendBuf) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int max = 0;
+  int min = 0;
+  {
+    // Discover maximum buffer size by trying to set it to a large value.
+    constexpr int kSndBufSz = 0xffffffff;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+        SyscallSucceeds());
+
+    max = 0;
+    socklen_t max_len = sizeof(max);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+                SyscallSucceeds());
+  }
+
+  {
+    // Discover minimum buffer size by trying to set it to zero.
+    constexpr int kSndBufSz = 0;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+        SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  int quarter_sz = min + (max - min) / 4;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+  // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack
+  // matches linux behavior.
+  if (!IsRunningOnGvisor()) {
+    quarter_sz *= 2;
+  }
+
+  ASSERT_EQ(quarter_sz, val);
+}
+
+INSTANTIATE_TEST_SUITE_P(AllInetTests, RawPacketTest,
+                         ::testing::Values(ETH_P_IP, ETH_P_ALL));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/partial_bad_buffer.cc b/test/syscalls/linux/partial_bad_buffer.cc
new file mode 100644
index 000000000..df7129acc
--- /dev/null
+++ b/test/syscalls/linux/partial_bad_buffer.cc
@@ -0,0 +1,405 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::Gt;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr char kMessage[] = "hello world";
+
+// PartialBadBufferTest checks the result of various IO syscalls when passed a
+// buffer that does not have the space specified in the syscall (most of it is
+// PROT_NONE). Linux is annoyingly inconsistent among different syscalls, so we
+// test all of them.
+class PartialBadBufferTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // Create and open a directory for getdents cases.
+    directory_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+    ASSERT_THAT(
+        directory_fd_ = open(directory_.path().c_str(), O_RDONLY | O_DIRECTORY),
+        SyscallSucceeds());
+
+    // Create and open a normal file, placing it in the directory
+    // so the getdents cases have some dirents.
+    name_ = JoinPath(directory_.path(), "a");
+    ASSERT_THAT(fd_ = open(name_.c_str(), O_RDWR | O_CREAT, 0644),
+                SyscallSucceeds());
+
+    // Write some initial data.
+    size_t size = sizeof(kMessage) - 1;
+    EXPECT_THAT(WriteFd(fd_, &kMessage, size), SyscallSucceedsWithValue(size));
+    ASSERT_THAT(lseek(fd_, 0, SEEK_SET), SyscallSucceeds());
+
+    // Map a useable buffer.
+    addr_ = mmap(0, 2 * kPageSize, PROT_READ | PROT_WRITE,
+                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    ASSERT_NE(addr_, MAP_FAILED);
+    char* buf = reinterpret_cast<char*>(addr_);
+
+    // Guard page for our read to run into.
+    ASSERT_THAT(mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize,
+                         PROT_NONE),
+                SyscallSucceeds());
+
+    // Leave only one free byte in the buffer.
+    bad_buffer_ = buf + kPageSize - 1;
+  }
+
+  off_t Size() {
+    struct stat st;
+    int rc = fstat(fd_, &st);
+    if (rc < 0) {
+      return static_cast<off_t>(rc);
+    }
+    return st.st_size;
+  }
+
+  void TearDown() override {
+    EXPECT_THAT(munmap(addr_, 2 * kPageSize), SyscallSucceeds()) << addr_;
+    EXPECT_THAT(close(fd_), SyscallSucceeds());
+    EXPECT_THAT(unlink(name_.c_str()), SyscallSucceeds());
+    EXPECT_THAT(close(directory_fd_), SyscallSucceeds());
+  }
+
+  // Return buffer with n bytes of free space.
+  // N.B. this is the same buffer used to back bad_buffer_.
+  char* FreeBytes(size_t n) {
+    TEST_CHECK(n <= static_cast<size_t>(4096));
+    return reinterpret_cast<char*>(addr_) + kPageSize - n;
+  }
+
+  std::string name_;
+  int fd_;
+  TempPath directory_;
+  int directory_fd_;
+  void* addr_;
+  char* bad_buffer_;
+};
+
+// We do both "big" and "small" tests to try to hit the "zero copy" and
+// non-"zero copy" paths, which have different code paths for handling faults.
+
+TEST_F(PartialBadBufferTest, ReadBig) {
+  EXPECT_THAT(RetryEINTR(read)(fd_, bad_buffer_, kPageSize),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, ReadSmall) {
+  EXPECT_THAT(RetryEINTR(read)(fd_, bad_buffer_, 10),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, PreadBig) {
+  EXPECT_THAT(RetryEINTR(pread)(fd_, bad_buffer_, kPageSize, 0),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, PreadSmall) {
+  EXPECT_THAT(RetryEINTR(pread)(fd_, bad_buffer_, 10, 0),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, ReadvBig) {
+  struct iovec vec;
+  vec.iov_base = bad_buffer_;
+  vec.iov_len = kPageSize;
+
+  EXPECT_THAT(RetryEINTR(readv)(fd_, &vec, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, ReadvSmall) {
+  struct iovec vec;
+  vec.iov_base = bad_buffer_;
+  vec.iov_len = 10;
+
+  EXPECT_THAT(RetryEINTR(readv)(fd_, &vec, 1), SyscallSucceedsWithValue(1));
+  EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, PreadvBig) {
+  struct iovec vec;
+  vec.iov_base = bad_buffer_;
+  vec.iov_len = kPageSize;
+
+  EXPECT_THAT(RetryEINTR(preadv)(fd_, &vec, 1, 0), SyscallSucceedsWithValue(1));
+  EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, PreadvSmall) {
+  struct iovec vec;
+  vec.iov_base = bad_buffer_;
+  vec.iov_len = 10;
+
+  EXPECT_THAT(RetryEINTR(preadv)(fd_, &vec, 1, 0), SyscallSucceedsWithValue(1));
+  EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, WriteBig) {
+  off_t orig_size = Size();
+  int n;
+
+  ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(
+      (n = RetryEINTR(write)(fd_, bad_buffer_, kPageSize)),
+      AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1)));
+  EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0));
+}
+
+TEST_F(PartialBadBufferTest, WriteSmall) {
+  off_t orig_size = Size();
+  int n;
+
+  ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(
+      (n = RetryEINTR(write)(fd_, bad_buffer_, 10)),
+      AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1)));
+  EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0));
+}
+
+TEST_F(PartialBadBufferTest, PwriteBig) {
+  off_t orig_size = Size();
+  int n;
+
+  EXPECT_THAT(
+      (n = RetryEINTR(pwrite)(fd_, bad_buffer_, kPageSize, orig_size)),
+      AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1)));
+  EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0));
+}
+
+TEST_F(PartialBadBufferTest, PwriteSmall) {
+  off_t orig_size = Size();
+  int n;
+
+  EXPECT_THAT(
+      (n = RetryEINTR(pwrite)(fd_, bad_buffer_, 10, orig_size)),
+      AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1)));
+  EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0));
+}
+
+TEST_F(PartialBadBufferTest, WritevBig) {
+  struct iovec vec;
+  vec.iov_base = bad_buffer_;
+  vec.iov_len = kPageSize;
+  off_t orig_size = Size();
+  int n;
+
+  ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(
+      (n = RetryEINTR(writev)(fd_, &vec, 1)),
+      AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1)));
+  EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0));
+}
+
+TEST_F(PartialBadBufferTest, WritevSmall) {
+  struct iovec vec;
+  vec.iov_base = bad_buffer_;
+  vec.iov_len = 10;
+  off_t orig_size = Size();
+  int n;
+
+  ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds());
+  EXPECT_THAT(
+      (n = RetryEINTR(writev)(fd_, &vec, 1)),
+      AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1)));
+  EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0));
+}
+
+TEST_F(PartialBadBufferTest, PwritevBig) {
+  struct iovec vec;
+  vec.iov_base = bad_buffer_;
+  vec.iov_len = kPageSize;
+  off_t orig_size = Size();
+  int n;
+
+  EXPECT_THAT(
+      (n = RetryEINTR(pwritev)(fd_, &vec, 1, orig_size)),
+      AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1)));
+  EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0));
+}
+
+TEST_F(PartialBadBufferTest, PwritevSmall) {
+  struct iovec vec;
+  vec.iov_base = bad_buffer_;
+  vec.iov_len = 10;
+  off_t orig_size = Size();
+  int n;
+
+  EXPECT_THAT(
+      (n = RetryEINTR(pwritev)(fd_, &vec, 1, orig_size)),
+      AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1)));
+  EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0));
+}
+
+// getdents returns EFAULT when the you claim the buffer is large enough, but
+// it actually isn't.
+TEST_F(PartialBadBufferTest, GetdentsBig) {
+  EXPECT_THAT(RetryEINTR(syscall)(SYS_getdents64, directory_fd_, bad_buffer_,
+                                  kPageSize),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+// getdents returns EINVAL when the you claim the buffer is too small.
+TEST_F(PartialBadBufferTest, GetdentsSmall) {
+  EXPECT_THAT(
+      RetryEINTR(syscall)(SYS_getdents64, directory_fd_, bad_buffer_, 10),
+      SyscallFailsWithErrno(EINVAL));
+}
+
+// getdents will write entries into a buffer if there is space before it faults.
+TEST_F(PartialBadBufferTest, GetdentsOneEntry) {
+  // 30 bytes is enough for one (small) entry.
+  char* buf = FreeBytes(30);
+
+  EXPECT_THAT(
+      RetryEINTR(syscall)(SYS_getdents64, directory_fd_, buf, kPageSize),
+      SyscallSucceedsWithValue(Gt(0)));
+}
+
+PosixErrorOr<sockaddr_storage> InetLoopbackAddr(int family) {
+  struct sockaddr_storage addr;
+  memset(&addr, 0, sizeof(addr));
+  addr.ss_family = family;
+  switch (family) {
+    case AF_INET:
+      reinterpret_cast<struct sockaddr_in*>(&addr)->sin_addr.s_addr =
+          htonl(INADDR_LOOPBACK);
+      break;
+    case AF_INET6:
+      reinterpret_cast<struct sockaddr_in6*>(&addr)->sin6_addr =
+          in6addr_loopback;
+      break;
+    default:
+      return PosixError(EINVAL,
+                        absl::StrCat("unknown socket family: ", family));
+  }
+  return addr;
+}
+
+// SendMsgTCP verifies that calling sendmsg with a bad address returns an
+// EFAULT. It also verifies that passing a buffer which is made up of 2
+// pages one valid and one guard page succeeds as long as the write is
+// for exactly the size of 1 page.
+TEST_F(PartialBadBufferTest, SendMsgTCP) {
+  auto listen_socket =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
+
+  // Initialize address to the loopback one.
+  sockaddr_storage addr = ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(AF_INET));
+  socklen_t addrlen = sizeof(addr);
+
+  // Bind to some port then start listening.
+  ASSERT_THAT(bind(listen_socket.get(),
+                   reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(listen_socket.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the address we're listening on, then connect to it. We need to do this
+  // because we're allowing the stack to pick a port for us.
+  ASSERT_THAT(getsockname(listen_socket.get(),
+                          reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+              SyscallSucceeds());
+
+  auto send_socket =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
+
+  ASSERT_THAT(
+      RetryEINTR(connect)(send_socket.get(),
+                          reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+      SyscallSucceeds());
+
+  // Accept the connection.
+  auto recv_socket =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_socket.get(), nullptr, nullptr));
+
+  // TODO(gvisor.dev/issue/674): Update this once Netstack matches linux
+  //   behaviour on a setsockopt of SO_RCVBUF/SO_SNDBUF.
+  //
+  // Set SO_SNDBUF for socket to exactly kPageSize+1.
+  //
+  // gVisor does not double the value passed in SO_SNDBUF like linux does so we
+  // just increase it by 1 byte here for gVisor so that we can test writing 1
+  // byte past the valid page and check that it triggers an EFAULT
+  // correctly. Otherwise in gVisor the sendmsg call will just return with no
+  // error with kPageSize bytes written successfully.
+  const uint32_t buf_size = kPageSize + 1;
+  ASSERT_THAT(setsockopt(send_socket.get(), SOL_SOCKET, SO_SNDBUF, &buf_size,
+                         sizeof(buf_size)),
+              SyscallSucceedsWithValue(0));
+
+  struct msghdr hdr = {};
+  struct iovec iov = {};
+  iov.iov_base = bad_buffer_;
+  iov.iov_len = kPageSize;
+  hdr.msg_iov = &iov;
+  hdr.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0),
+              SyscallFailsWithErrno(EFAULT));
+
+  // Now assert that writing kPageSize from addr_ succeeds.
+  iov.iov_base = addr_;
+  ASSERT_THAT(RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0),
+              SyscallSucceedsWithValue(kPageSize));
+  // Read all the data out so that we drain the socket SND_BUF on the sender.
+  std::vector<char> buffer(kPageSize);
+  ASSERT_THAT(RetryEINTR(read)(recv_socket.get(), buffer.data(), kPageSize),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Sleep for a shortwhile to ensure that we have time to process the
+  // ACKs. This is not strictly required unless running under gotsan which is a
+  // lot slower and can result in the next write to write only 1 byte instead of
+  // our intended kPageSize + 1.
+  absl::SleepFor(absl::Milliseconds(50));
+
+  // Now assert that writing > kPageSize results in EFAULT.
+  iov.iov_len = kPageSize + 1;
+  ASSERT_THAT(RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/pause.cc b/test/syscalls/linux/pause.cc
new file mode 100644
index 000000000..8c05efd6f
--- /dev/null
+++ b/test/syscalls/linux/pause.cc
@@ -0,0 +1,88 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <atomic>
+
+#include "gtest/gtest.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void NoopSignalHandler(int sig, siginfo_t* info, void* context) {}
+
+}  // namespace
+
+TEST(PauseTest, OnlyReturnsWhenSignalHandled) {
+  struct sigaction sa;
+  sigfillset(&sa.sa_mask);
+
+  // Ensure that SIGUSR1 is ignored.
+  sa.sa_handler = SIG_IGN;
+  ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+  // Register a handler for SIGUSR2.
+  sa.sa_sigaction = NoopSignalHandler;
+  sa.sa_flags = SA_SIGINFO;
+  ASSERT_THAT(sigaction(SIGUSR2, &sa, nullptr), SyscallSucceeds());
+
+  // The child sets their own tid.
+  absl::Mutex mu;
+  pid_t child_tid = 0;
+  bool child_tid_available = false;
+  std::atomic<int> sent_signal{0};
+  std::atomic<int> waking_signal{0};
+  ScopedThread t([&] {
+    mu.Lock();
+    child_tid = gettid();
+    child_tid_available = true;
+    mu.Unlock();
+    EXPECT_THAT(pause(), SyscallFailsWithErrno(EINTR));
+    waking_signal.store(sent_signal.load());
+  });
+  mu.Lock();
+  mu.Await(absl::Condition(&child_tid_available));
+  mu.Unlock();
+
+  // Wait a bit to let the child enter pause().
+  absl::SleepFor(absl::Seconds(3));
+
+  // The child should not be woken by SIGUSR1.
+  sent_signal.store(SIGUSR1);
+  ASSERT_THAT(tgkill(getpid(), child_tid, SIGUSR1), SyscallSucceeds());
+  absl::SleepFor(absl::Seconds(3));
+
+  // The child should be woken by SIGUSR2.
+  sent_signal.store(SIGUSR2);
+  ASSERT_THAT(tgkill(getpid(), child_tid, SIGUSR2), SyscallSucceeds());
+  absl::SleepFor(absl::Seconds(3));
+
+  EXPECT_EQ(SIGUSR2, waking_signal.load());
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/ping_socket.cc b/test/syscalls/linux/ping_socket.cc
new file mode 100644
index 000000000..a9bfdb37b
--- /dev/null
+++ b/test/syscalls/linux/ping_socket.cc
@@ -0,0 +1,91 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/save_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+class PingSocket : public ::testing::Test {
+ protected:
+  // Creates a socket to be used in tests.
+  void SetUp() override;
+
+  // Closes the socket created by SetUp().
+  void TearDown() override;
+
+  // The loopback address.
+  struct sockaddr_in addr_;
+};
+
+void PingSocket::SetUp() {
+  // On some hosts ping sockets are restricted to specific groups using the
+  // sysctl "ping_group_range".
+  int s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
+  if (s < 0 && errno == EPERM) {
+    GTEST_SKIP();
+  }
+  close(s);
+
+  addr_ = {};
+  // Just a random port as the destination port number is irrelevant for ping
+  // sockets.
+  addr_.sin_port = 12345;
+  addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  addr_.sin_family = AF_INET;
+}
+
+void PingSocket::TearDown() {}
+
+// Test ICMP port exhaustion returns EAGAIN.
+//
+// We disable both random/cooperative S/R for this test as it makes way too many
+// syscalls.
+TEST_F(PingSocket, ICMPPortExhaustion_NoRandomSave) {
+  DisableSave ds;
+  std::vector<FileDescriptor> sockets;
+  constexpr int kSockets = 65536;
+  addr_.sin_port = 0;
+  for (int i = 0; i < kSockets; i++) {
+    auto s =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP));
+    int ret = connect(s.get(), reinterpret_cast<struct sockaddr*>(&addr_),
+                      sizeof(addr_));
+    if (ret == 0) {
+      sockets.push_back(std::move(s));
+      continue;
+    }
+    ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN));
+    break;
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc
new file mode 100644
index 000000000..34291850d
--- /dev/null
+++ b/test/syscalls/linux/pipe.cc
@@ -0,0 +1,670 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h> /* Obtain O_* constant definitions */
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/synchronization/notification.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Used as a non-zero sentinel value, below.
+constexpr int kTestValue = 0x12345678;
+
+// Used for synchronization in race tests.
+const absl::Duration syncDelay = absl::Seconds(2);
+
+struct PipeCreator {
+  std::string name_;
+
+  // void (fds, is_blocking, is_namedpipe).
+  std::function<void(int[2], bool*, bool*)> create_;
+};
+
+class PipeTest : public ::testing::TestWithParam<PipeCreator> {
+ public:
+  static void SetUpTestSuite() {
+    // Tests intentionally generate SIGPIPE.
+    TEST_PCHECK(signal(SIGPIPE, SIG_IGN) != SIG_ERR);
+  }
+
+  // Initializes rfd_ and wfd_ as a blocking pipe.
+  //
+  // The return value indicates success: the test should be skipped otherwise.
+  bool CreateBlocking() { return create(true); }
+
+  // Initializes rfd_ and wfd_ as a non-blocking pipe.
+  //
+  // The return value is per CreateBlocking.
+  bool CreateNonBlocking() { return create(false); }
+
+  // Returns true iff the pipe represents a named pipe.
+  bool IsNamedPipe() const { return named_pipe_; }
+
+  int Size() const {
+    int s1 = fcntl(rfd_.get(), F_GETPIPE_SZ);
+    int s2 = fcntl(wfd_.get(), F_GETPIPE_SZ);
+    EXPECT_GT(s1, 0);
+    EXPECT_GT(s2, 0);
+    EXPECT_EQ(s1, s2);
+    return s1;
+  }
+
+  static void TearDownTestSuite() {
+    TEST_PCHECK(signal(SIGPIPE, SIG_DFL) != SIG_ERR);
+  }
+
+ private:
+  bool create(bool wants_blocking) {
+    // Generate the pipe.
+    int fds[2] = {-1, -1};
+    bool is_blocking = false;
+    GetParam().create_(fds, &is_blocking, &named_pipe_);
+    if (fds[0] < 0 || fds[1] < 0) {
+      return false;
+    }
+
+    // Save descriptors.
+    rfd_.reset(fds[0]);
+    wfd_.reset(fds[1]);
+
+    // Adjust blocking, if needed.
+    if (!is_blocking && wants_blocking) {
+      // Clear the blocking flag.
+      EXPECT_THAT(fcntl(fds[0], F_SETFL, 0), SyscallSucceeds());
+      EXPECT_THAT(fcntl(fds[1], F_SETFL, 0), SyscallSucceeds());
+    } else if (is_blocking && !wants_blocking) {
+      // Set the descriptors to blocking.
+      EXPECT_THAT(fcntl(fds[0], F_SETFL, O_NONBLOCK), SyscallSucceeds());
+      EXPECT_THAT(fcntl(fds[1], F_SETFL, O_NONBLOCK), SyscallSucceeds());
+    }
+
+    return true;
+  }
+
+ protected:
+  FileDescriptor rfd_;
+  FileDescriptor wfd_;
+
+ private:
+  bool named_pipe_ = false;
+};
+
+TEST_P(PipeTest, Inode) {
+  SKIP_IF(!CreateBlocking());
+
+  // Ensure that the inode number is the same for each end.
+  struct stat rst;
+  ASSERT_THAT(fstat(rfd_.get(), &rst), SyscallSucceeds());
+  struct stat wst;
+  ASSERT_THAT(fstat(wfd_.get(), &wst), SyscallSucceeds());
+  EXPECT_EQ(rst.st_ino, wst.st_ino);
+}
+
+TEST_P(PipeTest, Permissions) {
+  SKIP_IF(!CreateBlocking());
+
+  // Attempt bad operations.
+  int buf = kTestValue;
+  ASSERT_THAT(write(rfd_.get(), &buf, sizeof(buf)),
+              SyscallFailsWithErrno(EBADF));
+  EXPECT_THAT(read(wfd_.get(), &buf, sizeof(buf)),
+              SyscallFailsWithErrno(EBADF));
+}
+
+TEST_P(PipeTest, Flags) {
+  SKIP_IF(!CreateBlocking());
+
+  if (IsNamedPipe()) {
+    // May be stubbed to zero; define locally.
+    EXPECT_THAT(fcntl(rfd_.get(), F_GETFL),
+                SyscallSucceedsWithValue(kOLargeFile | O_RDONLY));
+    EXPECT_THAT(fcntl(wfd_.get(), F_GETFL),
+                SyscallSucceedsWithValue(kOLargeFile | O_WRONLY));
+  } else {
+    EXPECT_THAT(fcntl(rfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_RDONLY));
+    EXPECT_THAT(fcntl(wfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_WRONLY));
+  }
+}
+
+TEST_P(PipeTest, Write) {
+  SKIP_IF(!CreateBlocking());
+
+  int wbuf = kTestValue;
+  int rbuf = ~kTestValue;
+  ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)),
+              SyscallSucceedsWithValue(sizeof(wbuf)));
+  ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(sizeof(rbuf)));
+  EXPECT_EQ(wbuf, rbuf);
+}
+
+TEST_P(PipeTest, WritePage) {
+  SKIP_IF(!CreateBlocking());
+
+  std::vector<char> wbuf(kPageSize);
+  RandomizeBuffer(wbuf.data(), wbuf.size());
+  std::vector<char> rbuf(wbuf.size());
+
+  ASSERT_THAT(write(wfd_.get(), wbuf.data(), wbuf.size()),
+              SyscallSucceedsWithValue(wbuf.size()));
+  ASSERT_THAT(read(rfd_.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(rbuf.size()));
+  EXPECT_EQ(memcmp(rbuf.data(), wbuf.data(), wbuf.size()), 0);
+}
+
+TEST_P(PipeTest, NonBlocking) {
+  SKIP_IF(!CreateNonBlocking());
+
+  int wbuf = kTestValue;
+  int rbuf = ~kTestValue;
+  EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+  ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)),
+              SyscallSucceedsWithValue(sizeof(wbuf)));
+
+  ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(sizeof(rbuf)));
+  EXPECT_EQ(wbuf, rbuf);
+  EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST(Pipe2Test, CloExec) {
+  int fds[2];
+  ASSERT_THAT(pipe2(fds, O_CLOEXEC), SyscallSucceeds());
+  EXPECT_THAT(fcntl(fds[0], F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+  EXPECT_THAT(fcntl(fds[1], F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+  EXPECT_THAT(close(fds[0]), SyscallSucceeds());
+  EXPECT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST(Pipe2Test, BadOptions) {
+  int fds[2];
+  EXPECT_THAT(pipe2(fds, 0xDEAD), SyscallFailsWithErrno(EINVAL));
+}
+
+// Tests that opening named pipes with O_TRUNC shouldn't cause an error, but
+// calls to (f)truncate should.
+TEST(NamedPipeTest, Truncate) {
+  const std::string tmp_path = NewTempAbsPath();
+  SKIP_IF(mkfifo(tmp_path.c_str(), 0644) != 0);
+
+  ASSERT_THAT(open(tmp_path.c_str(), O_NONBLOCK | O_RDONLY), SyscallSucceeds());
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(tmp_path.c_str(), O_RDWR | O_NONBLOCK | O_TRUNC));
+
+  ASSERT_THAT(truncate(tmp_path.c_str(), 0), SyscallFailsWithErrno(EINVAL));
+  ASSERT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(PipeTest, Seek) {
+  SKIP_IF(!CreateBlocking());
+
+  for (int i = 0; i < 4; i++) {
+    // Attempt absolute seeks.
+    EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+    EXPECT_THAT(lseek(rfd_.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+    EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+    EXPECT_THAT(lseek(wfd_.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+
+    // Attempt relative seeks.
+    EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+    EXPECT_THAT(lseek(rfd_.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+    EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+    EXPECT_THAT(lseek(wfd_.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+
+    // Attempt end-of-file seeks.
+    EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+    EXPECT_THAT(lseek(rfd_.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE));
+    EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+    EXPECT_THAT(lseek(wfd_.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE));
+
+    // Add some more data to the pipe.
+    int buf = kTestValue;
+    ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)),
+                SyscallSucceedsWithValue(sizeof(buf)));
+  }
+}
+
+TEST_P(PipeTest, OffsetCalls) {
+  SKIP_IF(!CreateBlocking());
+
+  int buf;
+  EXPECT_THAT(pread(wfd_.get(), &buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(ESPIPE));
+  EXPECT_THAT(pwrite(rfd_.get(), &buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(ESPIPE));
+
+  struct iovec iov;
+  iov.iov_base = &buf;
+  iov.iov_len = sizeof(buf);
+  EXPECT_THAT(preadv(wfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE));
+  EXPECT_THAT(pwritev(rfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST_P(PipeTest, WriterSideCloses) {
+  SKIP_IF(!CreateBlocking());
+
+  ScopedThread t([this]() {
+    int buf = ~kTestValue;
+    ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)),
+                SyscallSucceedsWithValue(sizeof(buf)));
+    EXPECT_EQ(buf, kTestValue);
+    // This will return when the close() completes.
+    ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), SyscallSucceeds());
+    // This will return straight away.
+    ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)),
+                SyscallSucceedsWithValue(0));
+  });
+
+  // Sleep a bit so the thread can block.
+  absl::SleepFor(syncDelay);
+
+  // Write to unblock.
+  int buf = kTestValue;
+  ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Sleep a bit so the thread can block again.
+  absl::SleepFor(syncDelay);
+
+  // Allow the thread to complete.
+  ASSERT_THAT(close(wfd_.release()), SyscallSucceeds());
+  t.Join();
+}
+
+TEST_P(PipeTest, WriterSideClosesReadDataFirst) {
+  SKIP_IF(!CreateBlocking());
+
+  int wbuf = kTestValue;
+  ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)),
+              SyscallSucceedsWithValue(sizeof(wbuf)));
+  ASSERT_THAT(close(wfd_.release()), SyscallSucceeds());
+
+  int rbuf;
+  ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(sizeof(rbuf)));
+  EXPECT_EQ(wbuf, rbuf);
+  EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(PipeTest, ReaderSideCloses) {
+  SKIP_IF(!CreateBlocking());
+
+  ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
+  int buf = kTestValue;
+  EXPECT_THAT(write(wfd_.get(), &buf, sizeof(buf)),
+              SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(PipeTest, CloseTwice) {
+  SKIP_IF(!CreateBlocking());
+
+  int reader = rfd_.release();
+  int writer = wfd_.release();
+  ASSERT_THAT(close(reader), SyscallSucceeds());
+  ASSERT_THAT(close(writer), SyscallSucceeds());
+  EXPECT_THAT(close(reader), SyscallFailsWithErrno(EBADF));
+  EXPECT_THAT(close(writer), SyscallFailsWithErrno(EBADF));
+}
+
+// Blocking write returns EPIPE when read end is closed if nothing has been
+// written.
+TEST_P(PipeTest, BlockWriteClosed) {
+  SKIP_IF(!CreateBlocking());
+
+  absl::Notification notify;
+  ScopedThread t([this, &notify]() {
+    std::vector<char> buf(Size());
+    // Exactly fill the pipe buffer.
+    ASSERT_THAT(WriteFd(wfd_.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(buf.size()));
+
+    notify.Notify();
+
+    // Attempt to write one more byte. Blocks.
+    // N.B. Don't use WriteFd, we don't want a retry.
+    EXPECT_THAT(write(wfd_.get(), buf.data(), 1), SyscallFailsWithErrno(EPIPE));
+  });
+
+  notify.WaitForNotification();
+  ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
+  t.Join();
+}
+
+// Blocking write returns EPIPE when read end is closed even if something has
+// been written.
+TEST_P(PipeTest, BlockPartialWriteClosed) {
+  SKIP_IF(!CreateBlocking());
+
+  ScopedThread t([this]() {
+    const int pipe_size = Size();
+    std::vector<char> buf(2 * pipe_size);
+
+    // Write more than fits in the buffer. Blocks then returns partial write
+    // when the other end is closed. The next call returns EPIPE.
+    ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(pipe_size));
+    EXPECT_THAT(write(wfd_.get(), buf.data(), buf.size()),
+                SyscallFailsWithErrno(EPIPE));
+  });
+
+  // Leave time for write to become blocked.
+  absl::SleepFor(syncDelay);
+
+  // Unblock the above.
+  ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
+  t.Join();
+}
+
+TEST_P(PipeTest, ReadFromClosedFd_NoRandomSave) {
+  SKIP_IF(!CreateBlocking());
+
+  absl::Notification notify;
+  ScopedThread t([this, &notify]() {
+    notify.Notify();
+    int buf;
+    ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)),
+                SyscallSucceedsWithValue(sizeof(buf)));
+    ASSERT_EQ(kTestValue, buf);
+  });
+  notify.WaitForNotification();
+
+  // Make sure that the thread gets to read().
+  absl::SleepFor(syncDelay);
+
+  {
+    // We cannot save/restore here as the read end of pipe is closed but there
+    // is ongoing read() above. We will not be able to restart the read()
+    // successfully in restore run since the read fd is closed.
+    const DisableSave ds;
+    ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
+    int buf = kTestValue;
+    ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)),
+                SyscallSucceedsWithValue(sizeof(buf)));
+    t.Join();
+  }
+}
+
+TEST_P(PipeTest, FionRead) {
+  SKIP_IF(!CreateBlocking());
+
+  int n;
+  ASSERT_THAT(ioctl(rfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+  ASSERT_THAT(ioctl(wfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  std::vector<char> buf(Size());
+  ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  EXPECT_THAT(ioctl(rfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, buf.size());
+  EXPECT_THAT(ioctl(wfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, buf.size());
+}
+
+// Test that opening an empty anonymous pipe RDONLY via /proc/self/fd/N does not
+// block waiting for a writer.
+TEST_P(PipeTest, OpenViaProcSelfFD) {
+  SKIP_IF(!CreateBlocking());
+  SKIP_IF(IsNamedPipe());
+
+  // Close the write end of the pipe.
+  ASSERT_THAT(close(wfd_.release()), SyscallSucceeds());
+
+  // Open other side via /proc/self/fd.  It should not block.
+  FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(absl::StrCat("/proc/self/fd/", rfd_.get()), O_RDONLY));
+}
+
+// Test that opening and reading from an anonymous pipe (with existing writes)
+// RDONLY via /proc/self/fd/N returns the existing data.
+TEST_P(PipeTest, OpenViaProcSelfFDWithWrites) {
+  SKIP_IF(!CreateBlocking());
+  SKIP_IF(IsNamedPipe());
+
+  // Write to the pipe and then close the write fd.
+  int wbuf = kTestValue;
+  ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)),
+              SyscallSucceedsWithValue(sizeof(wbuf)));
+  ASSERT_THAT(close(wfd_.release()), SyscallSucceeds());
+
+  // Open read side via /proc/self/fd, and read from it.
+  FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(absl::StrCat("/proc/self/fd/", rfd_.get()), O_RDONLY));
+  int rbuf;
+  ASSERT_THAT(read(proc_self_fd.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(sizeof(rbuf)));
+  EXPECT_EQ(wbuf, rbuf);
+}
+
+// Test that accesses of /proc/<PID>/fd correctly decrement the refcount.
+TEST_P(PipeTest, ProcFDReleasesFile) {
+  SKIP_IF(!CreateBlocking());
+
+  // Stat the pipe FD, which shouldn't alter the refcount.
+  struct stat wst;
+  ASSERT_THAT(lstat(absl::StrCat("/proc/self/fd/", wfd_.get()).c_str(), &wst),
+              SyscallSucceeds());
+
+  // Close the write end and ensure that read indicates EOF.
+  wfd_.reset();
+  char buf;
+  ASSERT_THAT(read(rfd_.get(), &buf, 1), SyscallSucceedsWithValue(0));
+}
+
+// Same for /proc/<PID>/fdinfo.
+TEST_P(PipeTest, ProcFDInfoReleasesFile) {
+  SKIP_IF(!CreateBlocking());
+
+  // Stat the pipe FD, which shouldn't alter the refcount.
+  struct stat wst;
+  ASSERT_THAT(
+      lstat(absl::StrCat("/proc/self/fdinfo/", wfd_.get()).c_str(), &wst),
+      SyscallSucceeds());
+
+  // Close the write end and ensure that read indicates EOF.
+  wfd_.reset();
+  char buf;
+  ASSERT_THAT(read(rfd_.get(), &buf, 1), SyscallSucceedsWithValue(0));
+}
+
+TEST_P(PipeTest, SizeChange) {
+  SKIP_IF(!CreateBlocking());
+
+  // Set the minimum possible size.
+  ASSERT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, 0), SyscallSucceeds());
+  int min = Size();
+  EXPECT_GT(min, 0);  // Should be rounded up.
+
+  // Set from the read end.
+  ASSERT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, min + 1), SyscallSucceeds());
+  int med = Size();
+  EXPECT_GT(med, min);  // Should have grown, may be rounded.
+
+  // Set from the write end.
+  ASSERT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, med + 1), SyscallSucceeds());
+  int max = Size();
+  EXPECT_GT(max, med);  // Ditto.
+}
+
+TEST_P(PipeTest, SizeChangeMax) {
+  SKIP_IF(!CreateBlocking());
+
+  // Assert there's some maximum.
+  EXPECT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, 0x7fffffffffffffff),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, 0x7fffffffffffffff),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(PipeTest, SizeChangeFull) {
+  SKIP_IF(!CreateBlocking());
+
+  // Ensure that we adjust to a large enough size to avoid rounding when we
+  // perform the size decrease. If rounding occurs, we may not actually
+  // adjust the size and the call below will return success. It was found via
+  // experimentation that this granularity avoids the rounding for Linux.
+  constexpr int kDelta = 64 * 1024;
+  ASSERT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, Size() + kDelta),
+              SyscallSucceeds());
+
+  // Fill the buffer and try to change down.
+  std::vector<char> buf(Size());
+  ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+  EXPECT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, Size() - kDelta),
+              SyscallFailsWithErrno(EBUSY));
+}
+
+TEST_P(PipeTest, Streaming) {
+  SKIP_IF(!CreateBlocking());
+
+  // We make too many calls to go through full save cycles.
+  DisableSave ds;
+
+  // Size() requires 2 syscalls, call it once and remember the value.
+  const int pipe_size = Size();
+
+  absl::Notification notify;
+  ScopedThread t([this, &notify, pipe_size]() {
+    // Don't start until it's full.
+    notify.WaitForNotification();
+    for (int i = 0; i < pipe_size; i++) {
+      int rbuf;
+      ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
+                  SyscallSucceedsWithValue(sizeof(rbuf)));
+      EXPECT_EQ(rbuf, i);
+    }
+  });
+
+  // Write 4 bytes * pipe_size. It will fill up the pipe once, notify the reader
+  // to start. Then we write pipe size worth 3 more times to ensure the reader
+  // can follow along.
+  ssize_t total = 0;
+  for (int i = 0; i < pipe_size; i++) {
+    ssize_t written = write(wfd_.get(), &i, sizeof(i));
+    ASSERT_THAT(written, SyscallSucceedsWithValue(sizeof(i)));
+    total += written;
+
+    // Is the next write about to fill up the buffer? Wake up the reader once.
+    if (total < pipe_size && (total + written) >= pipe_size) {
+      notify.Notify();
+    }
+  }
+}
+
+std::string PipeCreatorName(::testing::TestParamInfo<PipeCreator> info) {
+  return info.param.name_;  // Use the name specified.
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    Pipes, PipeTest,
+    ::testing::Values(
+        PipeCreator{
+            "pipe",
+            [](int fds[2], bool* is_blocking, bool* is_namedpipe) {
+              ASSERT_THAT(pipe(fds), SyscallSucceeds());
+              *is_blocking = true;
+              *is_namedpipe = false;
+            },
+        },
+        PipeCreator{
+            "pipe2blocking",
+            [](int fds[2], bool* is_blocking, bool* is_namedpipe) {
+              ASSERT_THAT(pipe2(fds, 0), SyscallSucceeds());
+              *is_blocking = true;
+              *is_namedpipe = false;
+            },
+        },
+        PipeCreator{
+            "pipe2nonblocking",
+            [](int fds[2], bool* is_blocking, bool* is_namedpipe) {
+              ASSERT_THAT(pipe2(fds, O_NONBLOCK), SyscallSucceeds());
+              *is_blocking = false;
+              *is_namedpipe = false;
+            },
+        },
+        PipeCreator{
+            "smallbuffer",
+            [](int fds[2], bool* is_blocking, bool* is_namedpipe) {
+              // Set to the minimum available size (will round up).
+              ASSERT_THAT(pipe(fds), SyscallSucceeds());
+              ASSERT_THAT(fcntl(fds[0], F_SETPIPE_SZ, 0), SyscallSucceeds());
+              *is_blocking = true;
+              *is_namedpipe = false;
+            },
+        },
+        PipeCreator{
+            "namednonblocking",
+            [](int fds[2], bool* is_blocking, bool* is_namedpipe) {
+              // Create a new file-based pipe (non-blocking).
+              std::string path;
+              {
+                auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+                path = file.path();
+              }
+              SKIP_IF(mkfifo(path.c_str(), 0644) != 0);
+              fds[0] = open(path.c_str(), O_NONBLOCK | O_RDONLY);
+              fds[1] = open(path.c_str(), O_NONBLOCK | O_WRONLY);
+              MaybeSave();
+              *is_blocking = false;
+              *is_namedpipe = true;
+            },
+        },
+        PipeCreator{
+            "namedblocking",
+            [](int fds[2], bool* is_blocking, bool* is_namedpipe) {
+              // Create a new file-based pipe (blocking).
+              std::string path;
+              {
+                auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+                path = file.path();
+              }
+              SKIP_IF(mkfifo(path.c_str(), 0644) != 0);
+              ScopedThread t(
+                  [&path, &fds]() { fds[1] = open(path.c_str(), O_WRONLY); });
+              fds[0] = open(path.c_str(), O_RDONLY);
+              t.Join();
+              MaybeSave();
+              *is_blocking = true;
+              *is_namedpipe = true;
+            },
+        }),
+    PipeCreatorName);
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/poll.cc b/test/syscalls/linux/poll.cc
new file mode 100644
index 000000000..7a316427d
--- /dev/null
+++ b/test/syscalls/linux/poll.cc
@@ -0,0 +1,294 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <poll.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <iostream>
+
+#include "gtest/gtest.h"
+#include "absl/synchronization/notification.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/base_poll_test.h"
+#include "test/util/eventfd_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+class PollTest : public BasePollTest {
+ protected:
+  void SetUp() override { BasePollTest::SetUp(); }
+  void TearDown() override { BasePollTest::TearDown(); }
+};
+
+TEST_F(PollTest, InvalidFds) {
+  // fds is invalid because it's null, but we tell ppoll the length is non-zero.
+  EXPECT_THAT(poll(nullptr, 1, 1), SyscallFailsWithErrno(EFAULT));
+  EXPECT_THAT(poll(nullptr, -1, 1), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(PollTest, NullFds) {
+  EXPECT_THAT(poll(nullptr, 0, 10), SyscallSucceeds());
+}
+
+TEST_F(PollTest, ZeroTimeout) {
+  EXPECT_THAT(poll(nullptr, 0, 0), SyscallSucceeds());
+}
+
+// If random S/R interrupts the poll, SIGALRM may be delivered before poll
+// restarts, causing the poll to hang forever.
+TEST_F(PollTest, NegativeTimeout_NoRandomSave) {
+  // Negative timeout mean wait forever so set a timer.
+  SetTimer(absl::Milliseconds(100));
+  EXPECT_THAT(poll(nullptr, 0, -1), SyscallFailsWithErrno(EINTR));
+  EXPECT_TRUE(TimerFired());
+}
+
+TEST_F(PollTest, NonBlockingEventPOLLIN) {
+  // Create a pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+  FileDescriptor fd0(fds[0]);
+  FileDescriptor fd1(fds[1]);
+
+  // Write some data to the pipe.
+  char s[] = "foo\n";
+  ASSERT_THAT(WriteFd(fd1.get(), s, strlen(s) + 1), SyscallSucceeds());
+
+  // Poll on the reader fd with POLLIN event.
+  struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));
+
+  // Should trigger POLLIN event.
+  EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+}
+
+TEST_F(PollTest, BlockingEventPOLLIN) {
+  // Create a pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+  FileDescriptor fd0(fds[0]);
+  FileDescriptor fd1(fds[1]);
+
+  // Start a blocking poll on the read fd.
+  absl::Notification notify;
+  ScopedThread t([&fd0, &notify]() {
+    notify.Notify();
+
+    // Poll on the reader fd with POLLIN event.
+    struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
+    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, -1), SyscallSucceedsWithValue(1));
+
+    // Should trigger POLLIN event.
+    EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+  });
+
+  notify.WaitForNotification();
+  absl::SleepFor(absl::Seconds(1.0));
+
+  // Write some data to the pipe.
+  char s[] = "foo\n";
+  ASSERT_THAT(WriteFd(fd1.get(), s, strlen(s) + 1), SyscallSucceeds());
+}
+
+TEST_F(PollTest, NonBlockingEventPOLLHUP) {
+  // Create a pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+  FileDescriptor fd0(fds[0]);
+  FileDescriptor fd1(fds[1]);
+
+  // Close the writer fd.
+  fd1.reset();
+
+  // Poll on the reader fd with POLLIN event.
+  struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));
+
+  // Should trigger POLLHUP event.
+  EXPECT_EQ(poll_fd.revents & POLLHUP, POLLHUP);
+
+  // Should not trigger POLLIN event.
+  EXPECT_EQ(poll_fd.revents & POLLIN, 0);
+}
+
+TEST_F(PollTest, BlockingEventPOLLHUP) {
+  // Create a pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+  FileDescriptor fd0(fds[0]);
+  FileDescriptor fd1(fds[1]);
+
+  // Start a blocking poll on the read fd.
+  absl::Notification notify;
+  ScopedThread t([&fd0, &notify]() {
+    notify.Notify();
+
+    // Poll on the reader fd with POLLIN event.
+    struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
+    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, -1), SyscallSucceedsWithValue(1));
+
+    // Should trigger POLLHUP event.
+    EXPECT_EQ(poll_fd.revents & POLLHUP, POLLHUP);
+
+    // Should not trigger POLLIN event.
+    EXPECT_EQ(poll_fd.revents & POLLIN, 0);
+  });
+
+  notify.WaitForNotification();
+  absl::SleepFor(absl::Seconds(1.0));
+
+  // Write some data and close the writer fd.
+  fd1.reset();
+}
+
+TEST_F(PollTest, NonBlockingEventPOLLERR) {
+  // Create a pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+  FileDescriptor fd0(fds[0]);
+  FileDescriptor fd1(fds[1]);
+
+  // Close the reader fd.
+  fd0.reset();
+
+  // Poll on the writer fd with POLLOUT event.
+  struct pollfd poll_fd = {fd1.get(), POLLOUT, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));
+
+  // Should trigger POLLERR event.
+  EXPECT_EQ(poll_fd.revents & POLLERR, POLLERR);
+
+  // Should also trigger POLLOUT event.
+  EXPECT_EQ(poll_fd.revents & POLLOUT, POLLOUT);
+}
+
+// This test will validate that if an FD is already ready on some event, whether
+// it's POLLIN or POLLOUT it will not immediately return unless that's actually
+// what the caller was interested in.
+TEST_F(PollTest, ImmediatelyReturnOnlyOnPollEvents) {
+  // Create a pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+  FileDescriptor fd0(fds[0]);
+  FileDescriptor fd1(fds[1]);
+
+  // Wait for read related event on the write side of the pipe, since a write
+  // is possible on fds[1] it would mean that POLLOUT would return immediately.
+  // We should make sure that we're not woken up with that state that we didn't
+  // specificially request.
+  constexpr int kTimeoutMs = 100;
+  struct pollfd poll_fd = {fd1.get(), POLLIN | POLLPRI | POLLRDHUP, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMs),
+              SyscallSucceedsWithValue(0));  // We should timeout.
+  EXPECT_EQ(poll_fd.revents, 0);  // Nothing should be in returned events.
+
+  // Now let's poll on POLLOUT and we should get back 1 fd as being ready and
+  // it should contain POLLOUT in the revents.
+  poll_fd.events = POLLOUT;
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMs),
+              SyscallSucceedsWithValue(1));  // 1 fd should have an event.
+  EXPECT_EQ(poll_fd.revents, POLLOUT);       // POLLOUT should be in revents.
+}
+
+// This test validates that poll(2) while data is available immediately returns.
+TEST_F(PollTest, PollLevelTriggered) {
+  int fds[2] = {};
+  ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, /*protocol=*/0, fds),
+              SyscallSucceeds());
+
+  FileDescriptor fd0(fds[0]);
+  FileDescriptor fd1(fds[1]);
+
+  // Write two bytes to the socket.
+  const char* kBuf = "aa";
+  ASSERT_THAT(RetryEINTR(send)(fd0.get(), kBuf, /*len=*/2, /*flags=*/0),
+              SyscallSucceedsWithValue(2));  // 2 bytes should be written.
+
+  // Poll(2) should immediately return as there is data available to read.
+  constexpr int kInfiniteTimeout = -1;
+  struct pollfd poll_fd = {fd1.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, /*nfds=*/1, kInfiniteTimeout),
+              SyscallSucceedsWithValue(1));  // 1 fd should be ready to read.
+  EXPECT_NE(poll_fd.revents & POLLIN, 0);
+
+  // Read a single byte.
+  char read_byte = 0;
+  ASSERT_THAT(RetryEINTR(recv)(fd1.get(), &read_byte, /*len=*/1, /*flags=*/0),
+              SyscallSucceedsWithValue(1));  // 1 byte should be read.
+  ASSERT_EQ(read_byte, 'a');  // We should have read a single 'a'.
+
+  // Create a separate pollfd for our second poll.
+  struct pollfd poll_fd_after = {fd1.get(), POLLIN, 0};
+
+  // Poll(2) should again immediately return since we only read one byte.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd_after, /*nfds=*/1, kInfiniteTimeout),
+              SyscallSucceedsWithValue(1));  // 1 fd should be ready to read.
+  EXPECT_NE(poll_fd_after.revents & POLLIN, 0);
+}
+
+TEST_F(PollTest, Nfds) {
+  // Stash value of RLIMIT_NOFILES.
+  struct rlimit rlim;
+  TEST_PCHECK(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
+
+  // gVisor caps the number of FDs that epoll can use beyond RLIMIT_NOFILE.
+  constexpr rlim_t maxFD = 4096;
+  if (rlim.rlim_cur > maxFD) {
+    rlim.rlim_cur = maxFD;
+    TEST_PCHECK(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
+  }
+
+  rlim_t max_fds = rlim.rlim_cur;
+  std::cout << "Using limit: " << max_fds << std::endl;
+
+  // Create an eventfd. Since its value is initially zero, it is writable.
+  FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+
+  // Create the biggest possible pollfd array such that each element is valid.
+  // Each entry in the 'fds' array refers to the eventfd and polls for
+  // "writable" events (events=POLLOUT). This essentially guarantees that the
+  // poll() is a no-op and allows negative testing of the 'nfds' parameter.
+  std::vector<struct pollfd> fds(max_fds + 1,
+                                 {.fd = efd.get(), .events = POLLOUT});
+
+  // Verify that 'nfds' up to RLIMIT_NOFILE are allowed.
+  EXPECT_THAT(RetryEINTR(poll)(fds.data(), 1, 1), SyscallSucceedsWithValue(1));
+  EXPECT_THAT(RetryEINTR(poll)(fds.data(), max_fds / 2, 1),
+              SyscallSucceedsWithValue(max_fds / 2));
+  EXPECT_THAT(RetryEINTR(poll)(fds.data(), max_fds, 1),
+              SyscallSucceedsWithValue(max_fds));
+
+  // If 'nfds' exceeds RLIMIT_NOFILE then it must fail with EINVAL.
+  EXPECT_THAT(poll(fds.data(), max_fds + 1, 1), SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/ppoll.cc b/test/syscalls/linux/ppoll.cc
new file mode 100644
index 000000000..8245a11e8
--- /dev/null
+++ b/test/syscalls/linux/ppoll.cc
@@ -0,0 +1,155 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <poll.h>
+#include <signal.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/base_poll_test.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// Linux and glibc have a different idea of the sizeof sigset_t. When calling
+// the syscall directly, use what the kernel expects.
+unsigned kSigsetSize = SIGRTMAX / 8;
+
+// Linux ppoll(2) differs from the glibc wrapper function in that Linux updates
+// the timeout with the amount of time remaining. In order to test this behavior
+// we need to use the syscall directly.
+int syscallPpoll(struct pollfd* fds, nfds_t nfds, struct timespec* timeout_ts,
+                 const sigset_t* sigmask, unsigned mask_size) {
+  return syscall(SYS_ppoll, fds, nfds, timeout_ts, sigmask, mask_size);
+}
+
+class PpollTest : public BasePollTest {
+ protected:
+  void SetUp() override { BasePollTest::SetUp(); }
+  void TearDown() override { BasePollTest::TearDown(); }
+};
+
+TEST_F(PpollTest, InvalidFds) {
+  // fds is invalid because it's null, but we tell ppoll the length is non-zero.
+  struct timespec timeout = {};
+  sigset_t sigmask;
+  TEST_PCHECK(sigemptyset(&sigmask) == 0);
+  EXPECT_THAT(syscallPpoll(nullptr, 1, &timeout, &sigmask, kSigsetSize),
+              SyscallFailsWithErrno(EFAULT));
+  EXPECT_THAT(syscallPpoll(nullptr, -1, &timeout, &sigmask, kSigsetSize),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// See that when fds is null, ppoll behaves like sleep.
+TEST_F(PpollTest, NullFds) {
+  struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10));
+  ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0),
+              SyscallSucceeds());
+  EXPECT_EQ(timeout.tv_sec, 0);
+  EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+TEST_F(PpollTest, ZeroTimeout) {
+  struct timespec timeout = {};
+  ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0),
+              SyscallSucceeds());
+  EXPECT_EQ(timeout.tv_sec, 0);
+  EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+// If random S/R interrupts the ppoll, SIGALRM may be delivered before ppoll
+// restarts, causing the ppoll to hang forever.
+TEST_F(PpollTest, NoTimeout_NoRandomSave) {
+  // When there's no timeout, ppoll may never return so set a timer.
+  SetTimer(absl::Milliseconds(100));
+  // See that we get interrupted by the timer.
+  ASSERT_THAT(syscallPpoll(nullptr, 0, nullptr, nullptr, 0),
+              SyscallFailsWithErrno(EINTR));
+  EXPECT_TRUE(TimerFired());
+}
+
+TEST_F(PpollTest, InvalidTimeoutNegative) {
+  struct timespec timeout = absl::ToTimespec(absl::Nanoseconds(-1));
+  EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(PpollTest, InvalidTimeoutNotNormalized) {
+  struct timespec timeout = {0, 1000000001};
+  EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(PpollTest, InvalidMaskSize) {
+  struct timespec timeout = {};
+  sigset_t sigmask;
+  TEST_PCHECK(sigemptyset(&sigmask) == 0);
+  EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, &sigmask, 128),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Verify that signals blocked by the ppoll mask (that would otherwise be
+// allowed) do not interrupt ppoll.
+TEST_F(PpollTest, SignalMaskBlocksSignal) {
+  absl::Duration duration(absl::Seconds(30));
+  struct timespec timeout = absl::ToTimespec(duration);
+  absl::Duration timer_duration(absl::Seconds(10));
+
+  // Call with a mask that blocks SIGALRM. See that ppoll is not interrupted
+  // (i.e. returns 0) and that upon completion, the timer has fired.
+  sigset_t mask;
+  ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+  TEST_PCHECK(sigaddset(&mask, SIGALRM) == 0);
+  SetTimer(timer_duration);
+  MaybeSave();
+  ASSERT_FALSE(TimerFired());
+  ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, &mask, kSigsetSize),
+              SyscallSucceeds());
+  EXPECT_TRUE(TimerFired());
+  EXPECT_EQ(absl::DurationFromTimespec(timeout), absl::Duration());
+}
+
+// Verify that signals allowed by the ppoll mask (that would otherwise be
+// blocked) interrupt ppoll.
+TEST_F(PpollTest, SignalMaskAllowsSignal) {
+  absl::Duration duration(absl::Seconds(30));
+  struct timespec timeout = absl::ToTimespec(duration);
+  absl::Duration timer_duration(absl::Seconds(10));
+
+  sigset_t mask;
+  ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+
+  // Block SIGALRM.
+  auto cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGALRM));
+
+  // Call with a mask that unblocks SIGALRM. See that ppoll is interrupted.
+  SetTimer(timer_duration);
+  MaybeSave();
+  ASSERT_FALSE(TimerFired());
+  ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, &mask, kSigsetSize),
+              SyscallFailsWithErrno(EINTR));
+  EXPECT_TRUE(TimerFired());
+  EXPECT_GT(absl::DurationFromTimespec(timeout), absl::Duration());
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/prctl.cc b/test/syscalls/linux/prctl.cc
new file mode 100644
index 000000000..04c5161f5
--- /dev/null
+++ b/test/syscalls/linux/prctl.cc
@@ -0,0 +1,230 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+ABSL_FLAG(bool, prctl_no_new_privs_test_child, false,
+          "If true, exit with the return value of prctl(PR_GET_NO_NEW_PRIVS) "
+          "plus an offset (see test source).");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#ifndef SUID_DUMP_DISABLE
+#define SUID_DUMP_DISABLE 0
+#endif /* SUID_DUMP_DISABLE */
+#ifndef SUID_DUMP_USER
+#define SUID_DUMP_USER 1
+#endif /* SUID_DUMP_USER */
+#ifndef SUID_DUMP_ROOT
+#define SUID_DUMP_ROOT 2
+#endif /* SUID_DUMP_ROOT */
+
+TEST(PrctlTest, NameInitialized) {
+  const size_t name_length = 20;
+  char name[name_length] = {};
+  ASSERT_THAT(prctl(PR_GET_NAME, name), SyscallSucceeds());
+  ASSERT_NE(std::string(name), "");
+}
+
+TEST(PrctlTest, SetNameLongName) {
+  const size_t name_length = 20;
+  const std::string long_name(name_length, 'A');
+  ASSERT_THAT(prctl(PR_SET_NAME, long_name.c_str()), SyscallSucceeds());
+  char truncated_name[name_length] = {};
+  ASSERT_THAT(prctl(PR_GET_NAME, truncated_name), SyscallSucceeds());
+  const size_t truncated_length = 15;
+  ASSERT_EQ(long_name.substr(0, truncated_length), std::string(truncated_name));
+}
+
+TEST(PrctlTest, ChildProcessName) {
+  constexpr size_t kMaxNameLength = 15;
+
+  char parent_name[kMaxNameLength + 1] = {};
+  memset(parent_name, 'a', kMaxNameLength);
+
+  ASSERT_THAT(prctl(PR_SET_NAME, parent_name), SyscallSucceeds());
+
+  pid_t child_pid = fork();
+  TEST_PCHECK(child_pid >= 0);
+  if (child_pid == 0) {
+    char child_name[kMaxNameLength + 1] = {};
+    TEST_PCHECK(prctl(PR_GET_NAME, child_name) >= 0);
+    TEST_CHECK(memcmp(parent_name, child_name, sizeof(parent_name)) == 0);
+    _exit(0);
+  }
+
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status =" << status;
+}
+
+// Offset added to exit code from test child to distinguish from other abnormal
+// exits.
+constexpr int kPrctlNoNewPrivsTestChildExitBase = 100;
+
+TEST(PrctlTest, NoNewPrivsPreservedAcrossCloneForkAndExecve) {
+  // Check if no_new_privs is already set. If it is, we can still test that it's
+  // preserved across clone/fork/execve, but we also expect it to still be set
+  // at the end of the test. Otherwise, call prctl(PR_SET_NO_NEW_PRIVS) so as
+  // not to contaminate the original thread.
+  int no_new_privs;
+  ASSERT_THAT(no_new_privs = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+              SyscallSucceeds());
+  ScopedThread([] {
+    ASSERT_THAT(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), SyscallSucceeds());
+    EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+                SyscallSucceedsWithValue(1));
+    ScopedThread([] {
+      EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+                  SyscallSucceedsWithValue(1));
+      // Note that these ASSERT_*s failing will only return from this thread,
+      // but this is the intended behavior.
+      pid_t child_pid = -1;
+      int execve_errno = 0;
+      auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+          ForkAndExec("/proc/self/exe",
+                      {"/proc/self/exe", "--prctl_no_new_privs_test_child"}, {},
+                      nullptr, &child_pid, &execve_errno));
+
+      ASSERT_GT(child_pid, 0);
+      ASSERT_EQ(execve_errno, 0);
+
+      int status = 0;
+      ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+                  SyscallSucceeds());
+      ASSERT_TRUE(WIFEXITED(status));
+      ASSERT_EQ(WEXITSTATUS(status), kPrctlNoNewPrivsTestChildExitBase + 1);
+
+      EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+                  SyscallSucceedsWithValue(1));
+    });
+    EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+                SyscallSucceedsWithValue(1));
+  });
+  EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+              SyscallSucceedsWithValue(no_new_privs));
+}
+
+TEST(PrctlTest, PDeathSig) {
+  pid_t child_pid;
+
+  // Make the new process' parent a separate thread since the parent death
+  // signal fires when the parent *thread* exits.
+  ScopedThread([&] {
+    child_pid = fork();
+    TEST_CHECK(child_pid >= 0);
+    if (child_pid == 0) {
+      // In child process.
+      TEST_CHECK(prctl(PR_SET_PDEATHSIG, SIGKILL) >= 0);
+      int signo;
+      TEST_CHECK(prctl(PR_GET_PDEATHSIG, &signo) >= 0);
+      TEST_CHECK(signo == SIGKILL);
+      // Enable tracing, then raise SIGSTOP and expect our parent to suppress
+      // it.
+      TEST_CHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) >= 0);
+      raise(SIGSTOP);
+      // Sleep until killed by our parent death signal. sleep(3) is
+      // async-signal-safe, absl::SleepFor isn't.
+      while (true) {
+        sleep(10);
+      }
+    }
+    // In parent process.
+
+    // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+    int status;
+    ASSERT_THAT(waitpid(child_pid, &status, 0),
+                SyscallSucceedsWithValue(child_pid));
+    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+        << "status = " << status;
+
+    // Suppress the SIGSTOP and detach from the child.
+    ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+  });
+
+  // The child should have been killed by its parent death SIGKILL.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+      << "status = " << status;
+}
+
+// This test is to validate that calling prctl with PR_SET_MM without the
+// CAP_SYS_RESOURCE returns EPERM.
+TEST(PrctlTest, InvalidPrSetMM) {
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))) {
+    ASSERT_NO_ERRNO(SetCapability(CAP_SYS_RESOURCE,
+                                  false));  // Drop capability to test below.
+  }
+  ASSERT_THAT(prctl(PR_SET_MM, 0, 0, 0, 0), SyscallFailsWithErrno(EPERM));
+}
+
+// Sanity check that dumpability is remembered.
+TEST(PrctlTest, SetGetDumpability) {
+  int before;
+  ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds());
+  auto cleanup = Cleanup([before] {
+    ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds());
+  });
+
+  EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds());
+  EXPECT_THAT(prctl(PR_GET_DUMPABLE),
+              SyscallSucceedsWithValue(SUID_DUMP_DISABLE));
+
+  EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds());
+  EXPECT_THAT(prctl(PR_GET_DUMPABLE), SyscallSucceedsWithValue(SUID_DUMP_USER));
+}
+
+// SUID_DUMP_ROOT cannot be set via PR_SET_DUMPABLE.
+TEST(PrctlTest, RootDumpability) {
+  EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_ROOT),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  gvisor::testing::TestInit(&argc, &argv);
+
+  if (absl::GetFlag(FLAGS_prctl_no_new_privs_test_child)) {
+    exit(gvisor::testing::kPrctlNoNewPrivsTestChildExitBase +
+         prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0));
+  }
+
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/prctl_setuid.cc b/test/syscalls/linux/prctl_setuid.cc
new file mode 100644
index 000000000..c4e9cf528
--- /dev/null
+++ b/test/syscalls/linux/prctl_setuid.cc
@@ -0,0 +1,268 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+#include <sys/prctl.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "test/util/capability_util.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+ABSL_FLAG(int32_t, scratch_uid, 65534, "scratch UID");
+// This flag is used to verify that after an exec PR_GET_KEEPCAPS
+// returns 0, the return code will be offset by kPrGetKeepCapsExitBase.
+ABSL_FLAG(bool, prctl_pr_get_keepcaps, false,
+          "If true the test will verify that prctl with pr_get_keepcaps"
+          "returns 0. The test will exit with the result of that check.");
+
+// These tests exist seperately from prctl because we need to start
+// them as root. Setuid() has the behavior that permissions are fully
+// removed if one of the UIDs were 0 before a setuid() call. This
+// behavior can be changed by using PR_SET_KEEPCAPS and that is what
+// is tested here.
+//
+// Reference setuid(2):
+// The setuid() function checks the effective user ID of
+// the caller and if it is the superuser, all process-related user ID's
+// are set to uid.  After this has occurred, it is impossible for the
+// program to regain root privileges.
+//
+// Thus, a set-user-ID-root program wishing to temporarily drop root
+// privileges, assume the identity of an unprivileged user, and then
+// regain root privileges afterward cannot use setuid().  You can
+// accomplish this with seteuid(2).
+namespace gvisor {
+namespace testing {
+
+// Offset added to exit code from test child to distinguish from other abnormal
+// exits.
+constexpr int kPrGetKeepCapsExitBase = 100;
+
+namespace {
+
+class PrctlKeepCapsSetuidTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // PR_GET_KEEPCAPS will only return 0 or 1 (on success).
+    ASSERT_THAT(original_keepcaps_ = prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+                SyscallSucceeds());
+    ASSERT_TRUE(original_keepcaps_ == 0 || original_keepcaps_ == 1);
+  }
+
+  void TearDown() override {
+    // Restore PR_SET_KEEPCAPS.
+    ASSERT_THAT(prctl(PR_SET_KEEPCAPS, original_keepcaps_, 0, 0, 0),
+                SyscallSucceeds());
+
+    // Verify that it was restored.
+    ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+                SyscallSucceedsWithValue(original_keepcaps_));
+  }
+
+  // The original keep caps value exposed so tests can use it if they need.
+  int original_keepcaps_ = 0;
+};
+
+// This test will verify that a bad value, eg. not 0 or 1 for
+// PR_SET_KEEPCAPS will return EINVAL as required by prctl(2).
+TEST_F(PrctlKeepCapsSetuidTest, PrctlBadArgsToKeepCaps) {
+  ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 2, 0, 0, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// This test will verify that a setuid(2) without PR_SET_KEEPCAPS will cause
+// all capabilities to be dropped.
+TEST_F(PrctlKeepCapsSetuidTest, SetUidNoKeepCaps) {
+  // getuid(2) never fails.
+  if (getuid() != 0) {
+    SKIP_IF(!IsRunningOnGvisor());
+    FAIL() << "User is not root on gvisor platform.";
+  }
+
+  // Do setuid in a separate thread so that after finishing this test, the
+  // process can still open files the test harness created before starting
+  // this test. Otherwise, the files are created by root (UID before the
+  // test), but cannot be opened by the `uid` set below after the test. After
+  // calling setuid(non-zero-UID), there is no way to get root privileges
+  // back.
+  ScopedThread([] {
+    // Start by verifying we have a capability.
+    TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+
+    // Verify that PR_GET_KEEPCAPS is disabled.
+    ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+                SyscallSucceedsWithValue(0));
+
+    // Use syscall instead of glibc setuid wrapper because we want this setuid
+    // call to only apply to this task. POSIX threads, however, require that
+    // all threads have the same UIDs, so using the setuid wrapper sets all
+    // threads' real UID.
+    EXPECT_THAT(syscall(SYS_setuid, absl::GetFlag(FLAGS_scratch_uid)),
+                SyscallSucceeds());
+
+    // Verify that we changed uid.
+    EXPECT_THAT(getuid(),
+                SyscallSucceedsWithValue(absl::GetFlag(FLAGS_scratch_uid)));
+
+    // Verify we lost the capability in the effective set, this always happens.
+    TEST_CHECK(!HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+
+    // We should have also lost it in the permitted set by the setuid() so
+    // SetCapability should fail when we try to add it back to the effective set
+    ASSERT_FALSE(SetCapability(CAP_SYS_ADMIN, true).ok());
+  });
+}
+
+// This test will verify that a setuid with PR_SET_KEEPCAPS will cause
+// capabilities to be retained after we switch away from the root user.
+TEST_F(PrctlKeepCapsSetuidTest, SetUidKeepCaps) {
+  // getuid(2) never fails.
+  if (getuid() != 0) {
+    SKIP_IF(!IsRunningOnGvisor());
+    FAIL() << "User is not root on gvisor platform.";
+  }
+
+  // Do setuid in a separate thread so that after finishing this test, the
+  // process can still open files the test harness created before starting
+  // this test. Otherwise, the files are created by root (UID before the
+  // test), but cannot be opened by the `uid` set below after the test. After
+  // calling setuid(non-zero-UID), there is no way to get root privileges
+  // back.
+  ScopedThread([] {
+    // Start by verifying we have a capability.
+    TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+
+    // Set PR_SET_KEEPCAPS.
+    ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds());
+
+    // Verify PR_SET_KEEPCAPS was set before we proceed.
+    ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+                SyscallSucceedsWithValue(1));
+
+    // Use syscall instead of glibc setuid wrapper because we want this setuid
+    // call to only apply to this task. POSIX threads, however, require that
+    // all threads have the same UIDs, so using the setuid wrapper sets all
+    // threads' real UID.
+    EXPECT_THAT(syscall(SYS_setuid, absl::GetFlag(FLAGS_scratch_uid)),
+                SyscallSucceeds());
+
+    // Verify that we changed uid.
+    EXPECT_THAT(getuid(),
+                SyscallSucceedsWithValue(absl::GetFlag(FLAGS_scratch_uid)));
+
+    // Verify we lost the capability in the effective set, this always happens.
+    TEST_CHECK(!HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+
+    // We lost the capability in the effective set, but it will still
+    // exist in the permitted set so we can elevate the capability.
+    ASSERT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, true));
+
+    // Verify we got back the capability in the effective set.
+    TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+  });
+}
+
+// This test will verify that PR_SET_KEEPCAPS is not retained
+// across an execve. According to prctl(2):
+// "The "keep capabilities" value will  be reset to 0 on subsequent
+// calls to execve(2)."
+TEST_F(PrctlKeepCapsSetuidTest, NoKeepCapsAfterExec) {
+  ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds());
+
+  // Verify PR_SET_KEEPCAPS was set before we proceed.
+  ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), SyscallSucceedsWithValue(1));
+
+  pid_t child_pid = -1;
+  int execve_errno = 0;
+  // Do an exec and then verify that PR_GET_KEEPCAPS returns 0
+  // see the body of main below.
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+      "/proc/self/exe", {"/proc/self/exe", "--prctl_pr_get_keepcaps"}, {},
+      nullptr, &child_pid, &execve_errno));
+
+  ASSERT_GT(child_pid, 0);
+  ASSERT_EQ(execve_errno, 0);
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  ASSERT_TRUE(WIFEXITED(status));
+  // PR_SET_KEEPCAPS should have been cleared by the exec.
+  // Success should return gvisor::testing::kPrGetKeepCapsExitBase + 0
+  ASSERT_EQ(WEXITSTATUS(status), kPrGetKeepCapsExitBase);
+}
+
+TEST_F(PrctlKeepCapsSetuidTest, NoKeepCapsAfterNewUserNamespace) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+
+  // Fork to avoid changing the user namespace of the original test process.
+  pid_t const child_pid = fork();
+
+  if (child_pid == 0) {
+    // Verify that the keepcaps flag is set to 0 when we change user namespaces.
+    TEST_PCHECK(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == 0);
+    MaybeSave();
+
+    TEST_PCHECK(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0) == 1);
+    MaybeSave();
+
+    TEST_PCHECK(unshare(CLONE_NEWUSER) == 0);
+    MaybeSave();
+
+    TEST_PCHECK(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0) == 0);
+    MaybeSave();
+
+    _exit(0);
+  }
+
+  int status;
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status = " << status;
+}
+
+// This test will verify that PR_SET_KEEPCAPS and PR_GET_KEEPCAPS work correctly
+TEST_F(PrctlKeepCapsSetuidTest, PrGetKeepCaps) {
+  // Set PR_SET_KEEPCAPS to the negation of the original.
+  ASSERT_THAT(prctl(PR_SET_KEEPCAPS, !original_keepcaps_, 0, 0, 0),
+              SyscallSucceeds());
+
+  // Verify it was set.
+  ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+              SyscallSucceedsWithValue(!original_keepcaps_));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  gvisor::testing::TestInit(&argc, &argv);
+
+  if (absl::GetFlag(FLAGS_prctl_pr_get_keepcaps)) {
+    return gvisor::testing::kPrGetKeepCapsExitBase +
+           prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0);
+  }
+
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/pread64.cc b/test/syscalls/linux/pread64.cc
new file mode 100644
index 000000000..bcdbbb044
--- /dev/null
+++ b/test/syscalls/linux/pread64.cc
@@ -0,0 +1,167 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class Pread64Test : public ::testing::Test {
+  void SetUp() override {
+    name_ = NewTempAbsPath();
+    ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_CREAT, 0644));
+  }
+
+  void TearDown() override { unlink(name_.c_str()); }
+
+ public:
+  std::string name_;
+};
+
+TEST(Pread64TestNoTempFile, BadFileDescriptor) {
+  char buf[1024];
+  EXPECT_THAT(pread64(-1, buf, 1024, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(Pread64Test, ZeroBuffer) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR));
+
+  char msg[] = "hello world";
+  EXPECT_THAT(pwrite64(fd.get(), msg, strlen(msg), 0),
+              SyscallSucceedsWithValue(strlen(msg)));
+
+  char buf[10];
+  EXPECT_THAT(pread64(fd.get(), buf, 0, 0), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(Pread64Test, BadBuffer) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR));
+
+  char msg[] = "hello world";
+  EXPECT_THAT(pwrite64(fd.get(), msg, strlen(msg), 0),
+              SyscallSucceedsWithValue(strlen(msg)));
+
+  char* bad_buffer = nullptr;
+  EXPECT_THAT(pread64(fd.get(), bad_buffer, 1024, 0),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(Pread64Test, WriteOnlyNotReadable) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_WRONLY));
+
+  char buf[1024];
+  EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(Pread64Test, DirNotReadable) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY));
+
+  char buf[1024];
+  EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallFailsWithErrno(EISDIR));
+}
+
+TEST_F(Pread64Test, BadOffset) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDONLY));
+
+  char buf[1024];
+  EXPECT_THAT(pread64(fd.get(), buf, 1024, -1), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(Pread64Test, OffsetNotIncremented) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR));
+
+  char msg[] = "hello world";
+  EXPECT_THAT(write(fd.get(), msg, strlen(msg)),
+              SyscallSucceedsWithValue(strlen(msg)));
+  int offset;
+  EXPECT_THAT(offset = lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+
+  char buf1[1024];
+  EXPECT_THAT(pread64(fd.get(), buf1, 1024, 0),
+              SyscallSucceedsWithValue(strlen(msg)));
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(offset));
+
+  char buf2[1024];
+  EXPECT_THAT(pread64(fd.get(), buf2, 1024, 3),
+              SyscallSucceedsWithValue(strlen(msg) - 3));
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(offset));
+}
+
+TEST_F(Pread64Test, EndOfFile) {
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDONLY));
+
+  char buf[1024];
+  EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallSucceedsWithValue(0));
+}
+
+int memfd_create(const std::string& name, unsigned int flags) {
+  return syscall(__NR_memfd_create, name.c_str(), flags);
+}
+
+TEST_F(Pread64Test, Overflow) {
+  int f = memfd_create("negative", 0);
+  const FileDescriptor fd(f);
+
+  EXPECT_THAT(ftruncate(fd.get(), 0x7fffffffffffffffull), SyscallSucceeds());
+
+  char buf[10];
+  EXPECT_THAT(pread64(fd.get(), buf, sizeof(buf), 0x7fffffffffffffffull),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Pread64TestNoTempFile, CantReadSocketPair_NoRandomSave) {
+  int sock_fds[2];
+  EXPECT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds), SyscallSucceeds());
+
+  char buf[1024];
+  EXPECT_THAT(pread64(sock_fds[0], buf, 1024, 0),
+              SyscallFailsWithErrno(ESPIPE));
+  EXPECT_THAT(pread64(sock_fds[1], buf, 1024, 0),
+              SyscallFailsWithErrno(ESPIPE));
+
+  EXPECT_THAT(close(sock_fds[0]), SyscallSucceeds());
+  EXPECT_THAT(close(sock_fds[1]), SyscallSucceeds());
+}
+
+TEST(Pread64TestNoTempFile, CantReadPipe) {
+  char buf[1024];
+
+  int pipe_fds[2];
+  EXPECT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+  EXPECT_THAT(pread64(pipe_fds[0], buf, 1024, 0),
+              SyscallFailsWithErrno(ESPIPE));
+
+  EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds());
+  EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/preadv.cc b/test/syscalls/linux/preadv.cc
new file mode 100644
index 000000000..5b0743fe9
--- /dev/null
+++ b/test/syscalls/linux/preadv.cc
@@ -0,0 +1,95 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <atomic>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Stress copy-on-write. Attempts to reproduce b/38430174.
+TEST(PreadvTest, MMConcurrencyStress) {
+  // Fill a one-page file with zeroes (the contents don't really matter).
+  const auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      /* parent = */ GetAbsoluteTestTmpdir(),
+      /* content = */ std::string(kPageSize, 0), TempPath::kDefaultFileMode));
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+  // Get a one-page private mapping to read to.
+  const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+
+  // Repeatedly fork in a separate thread to force the mapping to become
+  // copy-on-write.
+  std::atomic<bool> done(false);
+  const ScopedThread t([&] {
+    while (!done.load()) {
+      const pid_t pid = fork();
+      TEST_CHECK(pid >= 0);
+      if (pid == 0) {
+        // In child. The parent was obviously multithreaded, so it's neither
+        // safe nor necessary to do much more than exit.
+        syscall(SYS_exit_group, 0);
+      }
+      int status;
+      ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+                  SyscallSucceedsWithValue(pid));
+      EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+          << "status = " << status;
+    }
+  });
+
+  // Repeatedly read to the mapping.
+  struct iovec iov[2];
+  iov[0].iov_base = m.ptr();
+  iov[0].iov_len = kPageSize / 2;
+  iov[1].iov_base = reinterpret_cast<void*>(m.addr() + kPageSize / 2);
+  iov[1].iov_len = kPageSize / 2;
+  constexpr absl::Duration kTestDuration = absl::Seconds(5);
+  const absl::Time end = absl::Now() + kTestDuration;
+  while (absl::Now() < end) {
+    // Among other causes, save/restore cycles may cause interruptions resulting
+    // in partial reads, so we don't expect any particular return value.
+    EXPECT_THAT(RetryEINTR(preadv)(fd.get(), iov, 2, 0), SyscallSucceeds());
+  }
+
+  // Stop the other thread.
+  done.store(true);
+
+  // The test passes if it neither deadlocks nor crashes the OS.
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/preadv2.cc b/test/syscalls/linux/preadv2.cc
new file mode 100644
index 000000000..4a9acd7ae
--- /dev/null
+++ b/test/syscalls/linux/preadv2.cc
@@ -0,0 +1,280 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#ifndef SYS_preadv2
+#if defined(__x86_64__)
+#define SYS_preadv2 327
+#elif defined(__aarch64__)
+#define SYS_preadv2 286
+#else
+#error "Unknown architecture"
+#endif
+#endif  // SYS_preadv2
+
+#ifndef RWF_HIPRI
+#define RWF_HIPRI 0x1
+#endif  // RWF_HIPRI
+
+constexpr int kBufSize = 1024;
+
+std::string SetContent() {
+  std::string content;
+  for (int i = 0; i < kBufSize; i++) {
+    content += static_cast<char>((i % 10) + '0');
+  }
+  return content;
+}
+
+ssize_t preadv2(unsigned long fd, const struct iovec* iov, unsigned long iovcnt,
+                off_t offset, unsigned long flags) {
+  // syscall on preadv2 does some weird things (see man syscall and search
+  // preadv2), so we insert a 0 to word align the flags argument on native.
+  return syscall(SYS_preadv2, fd, iov, iovcnt, offset, 0, flags);
+}
+
+// This test is the base case where we call preadv (no offset, no flags).
+TEST(Preadv2Test, TestBaseCall) {
+  SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  std::string content = SetContent();
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), content, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  std::vector<char> buf(kBufSize);
+  struct iovec iov[2];
+  iov[0].iov_base = buf.data();
+  iov[0].iov_len = buf.size() / 2;
+  iov[1].iov_base = static_cast<char*>(iov[0].iov_base) + (content.size() / 2);
+  iov[1].iov_len = content.size() / 2;
+
+  EXPECT_THAT(preadv2(fd.get(), iov, /*iovcnt*/ 2, /*offset=*/0, /*flags=*/0),
+              SyscallSucceedsWithValue(kBufSize));
+
+  EXPECT_EQ(content, std::string(buf.data(), buf.size()));
+}
+
+// This test is where we call preadv with an offset and no flags.
+TEST(Preadv2Test, TestValidPositiveOffset) {
+  SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  std::string content = SetContent();
+  const std::string prefix = "0";
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), prefix + content, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  std::vector<char> buf(kBufSize, '0');
+  struct iovec iov;
+  iov.iov_base = buf.data();
+  iov.iov_len = buf.size();
+
+  EXPECT_THAT(preadv2(fd.get(), &iov, /*iovcnt=*/1, /*offset=*/prefix.size(),
+                      /*flags=*/0),
+              SyscallSucceedsWithValue(kBufSize));
+
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+  EXPECT_EQ(content, std::string(buf.data(), buf.size()));
+}
+
+// This test is the base case where we call readv by using -1 as the offset. The
+// read should use the file offset, so the test increments it by one prior to
+// calling preadv2.
+TEST(Preadv2Test, TestNegativeOneOffset) {
+  SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  std::string content = SetContent();
+  const std::string prefix = "231";
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), prefix + content, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  ASSERT_THAT(lseek(fd.get(), prefix.size(), SEEK_SET),
+              SyscallSucceedsWithValue(prefix.size()));
+
+  std::vector<char> buf(kBufSize, '0');
+  struct iovec iov;
+  iov.iov_base = buf.data();
+  iov.iov_len = buf.size();
+
+  EXPECT_THAT(preadv2(fd.get(), &iov, /*iovcnt=*/1, /*offset=*/-1, /*flags=*/0),
+              SyscallSucceedsWithValue(kBufSize));
+
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR),
+              SyscallSucceedsWithValue(prefix.size() + buf.size()));
+
+  EXPECT_EQ(content, std::string(buf.data(), buf.size()));
+}
+
+// preadv2 requires if the RWF_HIPRI flag is passed, the fd must be opened with
+// O_DIRECT. This test implements a correct call with the RWF_HIPRI flag.
+TEST(Preadv2Test, TestCallWithRWF_HIPRI) {
+  SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  std::string content = SetContent();
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), content, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  EXPECT_THAT(fsync(fd.get()), SyscallSucceeds());
+
+  std::vector<char> buf(kBufSize, '0');
+  struct iovec iov;
+  iov.iov_base = buf.data();
+  iov.iov_len = buf.size();
+
+  EXPECT_THAT(
+      preadv2(fd.get(), &iov, /*iovcnt=*/1, /*offset=*/0, /*flags=*/RWF_HIPRI),
+      SyscallSucceedsWithValue(kBufSize));
+
+  EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+  EXPECT_EQ(content, std::string(buf.data(), buf.size()));
+}
+// This test calls preadv2 with an invalid flag.
+TEST(Preadv2Test, TestInvalidFlag) {
+  SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY | O_DIRECT));
+
+  std::vector<char> buf(kBufSize, '0');
+  struct iovec iov;
+  iov.iov_base = buf.data();
+  iov.iov_len = buf.size();
+
+  EXPECT_THAT(preadv2(fd.get(), &iov, /*iovcnt=*/1,
+                      /*offset=*/0, /*flags=*/0xF0),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+// This test calls preadv2 with an invalid offset.
+TEST(Preadv2Test, TestInvalidOffset) {
+  SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY | O_DIRECT));
+
+  auto iov = absl::make_unique<struct iovec[]>(1);
+  iov[0].iov_base = nullptr;
+  iov[0].iov_len = 0;
+
+  EXPECT_THAT(preadv2(fd.get(), iov.get(), /*iovcnt=*/1, /*offset=*/-8,
+                      /*flags=*/0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// This test calls preadv with a file set O_WRONLY.
+TEST(Preadv2Test, TestUnreadableFile) {
+  SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY));
+
+  auto iov = absl::make_unique<struct iovec[]>(1);
+  iov[0].iov_base = nullptr;
+  iov[0].iov_len = 0;
+
+  EXPECT_THAT(preadv2(fd.get(), iov.get(), /*iovcnt=*/1,
+                      /*offset=*/0, /*flags=*/0),
+              SyscallFailsWithErrno(EBADF));
+}
+
+// Calling preadv2 with a non-negative offset calls preadv.  Calling preadv with
+// an unseekable file is not allowed. A pipe is used for an unseekable file.
+TEST(Preadv2Test, TestUnseekableFileInvalid) {
+  SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  int pipe_fds[2];
+
+  ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+  auto iov = absl::make_unique<struct iovec[]>(1);
+  iov[0].iov_base = nullptr;
+  iov[0].iov_len = 0;
+
+  EXPECT_THAT(preadv2(pipe_fds[0], iov.get(), /*iovcnt=*/1,
+                      /*offset=*/2, /*flags=*/0),
+              SyscallFailsWithErrno(ESPIPE));
+
+  EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds());
+  EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+}
+
+TEST(Preadv2Test, TestUnseekableFileValid) {
+  SKIP_IF(preadv2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  int pipe_fds[2];
+
+  ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+  std::vector<char> content(32, 'X');
+
+  EXPECT_THAT(write(pipe_fds[1], content.data(), content.size()),
+              SyscallSucceedsWithValue(content.size()));
+
+  std::vector<char> buf(content.size());
+  auto iov = absl::make_unique<struct iovec[]>(1);
+  iov[0].iov_base = buf.data();
+  iov[0].iov_len = buf.size();
+
+  EXPECT_THAT(preadv2(pipe_fds[0], iov.get(), /*iovcnt=*/1,
+                      /*offset=*/static_cast<off_t>(-1), /*flags=*/0),
+              SyscallSucceedsWithValue(buf.size()));
+
+  EXPECT_EQ(content, buf);
+
+  EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds());
+  EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/priority.cc b/test/syscalls/linux/priority.cc
new file mode 100644
index 000000000..1d9bdfa70
--- /dev/null
+++ b/test/syscalls/linux/priority.cc
@@ -0,0 +1,216 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_split.h"
+#include "test/util/capability_util.h"
+#include "test/util/fs_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// These tests are for both the getpriority(2) and setpriority(2) syscalls
+// These tests are very rudimentary because getpriority and setpriority
+// have not yet been fully implemented.
+
+// Getpriority does something
+TEST(GetpriorityTest, Implemented) {
+  // "getpriority() can legitimately return the value -1, it is necessary to
+  // clear the external variable errno prior to the call"
+  errno = 0;
+  EXPECT_THAT(getpriority(PRIO_PROCESS, /*who=*/0), SyscallSucceeds());
+}
+
+// Invalid which
+TEST(GetpriorityTest, InvalidWhich) {
+  errno = 0;
+  EXPECT_THAT(getpriority(/*which=*/3, /*who=*/0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Process is found when which=PRIO_PROCESS
+TEST(GetpriorityTest, ValidWho) {
+  errno = 0;
+  EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), SyscallSucceeds());
+}
+
+// Process is not found when which=PRIO_PROCESS
+TEST(GetpriorityTest, InvalidWho) {
+  errno = 0;
+  // Flaky, but it's tough to avoid a race condition when finding an unused pid
+  EXPECT_THAT(getpriority(PRIO_PROCESS, /*who=*/INT_MAX - 1),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+// Setpriority does something
+TEST(SetpriorityTest, Implemented) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+  // No need to clear errno for setpriority():
+  // "The setpriority() call returns 0 if there is no error, or -1 if there is"
+  EXPECT_THAT(setpriority(PRIO_PROCESS, /*who=*/0, /*nice=*/16),
+              SyscallSucceeds());
+}
+
+// Invalid which
+TEST(Setpriority, InvalidWhich) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+  EXPECT_THAT(setpriority(/*which=*/3, /*who=*/0, /*nice=*/16),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Process is found when which=PRIO_PROCESS
+TEST(SetpriorityTest, ValidWho) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+  EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/16),
+              SyscallSucceeds());
+}
+
+// niceval is within the range [-20, 19]
+TEST(SetpriorityTest, InsideRange) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+  // Set 0 < niceval < 19
+  int nice = 12;
+  EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), nice), SyscallSucceeds());
+
+  errno = 0;
+  EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()),
+              SyscallSucceedsWithValue(nice));
+
+  // Set -20 < niceval < 0
+  nice = -12;
+  EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), nice), SyscallSucceeds());
+
+  errno = 0;
+  EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()),
+              SyscallSucceedsWithValue(nice));
+}
+
+// Verify that priority/niceness are exposed via /proc/PID/stat.
+TEST(SetpriorityTest, NicenessExposedViaProcfs) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+  constexpr int kNiceVal = 12;
+  ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), kNiceVal), SyscallSucceeds());
+
+  errno = 0;
+  ASSERT_THAT(getpriority(PRIO_PROCESS, getpid()),
+              SyscallSucceedsWithValue(kNiceVal));
+
+  // Now verify we can read that same value via /proc/self/stat.
+  std::string proc_stat;
+  ASSERT_NO_ERRNO(GetContents("/proc/self/stat", &proc_stat));
+  std::vector<std::string> pieces = absl::StrSplit(proc_stat, ' ');
+  ASSERT_GT(pieces.size(), 20);
+
+  int niceness_procfs = 0;
+  ASSERT_TRUE(absl::SimpleAtoi(pieces[18], &niceness_procfs));
+  EXPECT_EQ(niceness_procfs, kNiceVal);
+}
+
+// In the kernel's implementation, values outside the range of [-20, 19] are
+// truncated to these minimum and maximum values. See
+// https://elixir.bootlin.com/linux/v4.4/source/kernel/sys.c#L190
+TEST(SetpriorityTest, OutsideRange) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+  // Set niceval > 19
+  EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/100),
+              SyscallSucceeds());
+
+  errno = 0;
+  // Test niceval truncated to 19
+  EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()),
+              SyscallSucceedsWithValue(/*maxnice=*/19));
+
+  // Set niceval < -20
+  EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/-100),
+              SyscallSucceeds());
+
+  errno = 0;
+  // Test niceval truncated to -20
+  EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()),
+              SyscallSucceedsWithValue(/*minnice=*/-20));
+}
+
+// Process is not found when which=PRIO_PROCESS
+TEST(SetpriorityTest, InvalidWho) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+  // Flaky, but it's tough to avoid a race condition when finding an unused pid
+  EXPECT_THAT(setpriority(PRIO_PROCESS,
+                          /*who=*/INT_MAX - 1,
+                          /*nice=*/16),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+// Nice succeeds, correctly modifies (or in this case does not
+// modify priority of process
+TEST(SetpriorityTest, NiceSucceeds) {
+  errno = 0;
+  const int priority_before = getpriority(PRIO_PROCESS, /*who=*/0);
+  ASSERT_THAT(nice(/*inc=*/0), SyscallSucceeds());
+
+  // nice(0) should not change priority
+  EXPECT_EQ(priority_before, getpriority(PRIO_PROCESS, /*who=*/0));
+}
+
+// Threads resulting from clone() maintain parent's priority
+// Changes to child priority do not affect parent's priority
+TEST(GetpriorityTest, CloneMaintainsPriority) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+  constexpr int kParentPriority = 16;
+  constexpr int kChildPriority = 14;
+  ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), kParentPriority),
+              SyscallSucceeds());
+
+  ScopedThread th([]() {
+    // Check that priority equals that of parent thread
+    pid_t my_tid;
+    EXPECT_THAT(my_tid = syscall(__NR_gettid), SyscallSucceeds());
+    EXPECT_THAT(getpriority(PRIO_PROCESS, my_tid),
+                SyscallSucceedsWithValue(kParentPriority));
+
+    // Change the child thread's priority
+    EXPECT_THAT(setpriority(PRIO_PROCESS, my_tid, kChildPriority),
+                SyscallSucceeds());
+  });
+  th.Join();
+
+  // Check that parent's priority reemained the same even though
+  // the child's priority was altered
+  EXPECT_EQ(kParentPriority, getpriority(PRIO_PROCESS, syscall(__NR_gettid)));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/priority_execve.cc b/test/syscalls/linux/priority_execve.cc
new file mode 100644
index 000000000..5cb343bad
--- /dev/null
+++ b/test/syscalls/linux/priority_execve.cc
@@ -0,0 +1,42 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+int main(int argc, char** argv, char** envp) {
+  errno = 0;
+  int prio = getpriority(PRIO_PROCESS, getpid());
+
+  // NOTE: getpriority() can legitimately return negative values
+  // in the range [-20, 0). If errno is set, exit with a value that
+  // could not be reached by a valid priority. Valid exit values
+  // for the test are in the range [1, 40], so we'll use 0.
+  if (errno != 0) {
+    printf("getpriority() failed with errno = %d\n", errno);
+    exit(0);
+  }
+
+  // Used by test to verify priority is being maintained through
+  // calls to execve(). Since prio should always be in the range
+  // [-20, 19], we offset by 20 so as not to have negative exit codes.
+  exit(20 - prio);
+
+  return 0;
+}
diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc
new file mode 100644
index 000000000..d6b875dbf
--- /dev/null
+++ b/test/syscalls/linux/proc.cc
@@ -0,0 +1,2173 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <atomic>
+#include <functional>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <ostream>
+#include <regex>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/synchronization/notification.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/time_util.h"
+#include "test/util/timer_util.h"
+
+// NOTE(magi): No, this isn't really a syscall but this is a really simple
+// way to get it tested on both gVisor, PTrace and Linux.
+
+using ::testing::AllOf;
+using ::testing::AnyOf;
+using ::testing::ContainerEq;
+using ::testing::Contains;
+using ::testing::ContainsRegex;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsSupersetOf;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+using ::testing::UnorderedElementsAreArray;
+
+// Exported by glibc.
+extern char** environ;
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+#ifndef SUID_DUMP_DISABLE
+#define SUID_DUMP_DISABLE 0
+#endif /* SUID_DUMP_DISABLE */
+#ifndef SUID_DUMP_USER
+#define SUID_DUMP_USER 1
+#endif /* SUID_DUMP_USER */
+#ifndef SUID_DUMP_ROOT
+#define SUID_DUMP_ROOT 2
+#endif /* SUID_DUMP_ROOT */
+
+#if defined(__x86_64__) || defined(__i386__)
+// This list of "required" fields is taken from reading the file
+// arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally
+// printed by the kernel.
+static const char* required_fields[] = {
+    "processor",
+    "vendor_id",
+    "cpu family",
+    "model\t\t:",
+    "model name",
+    "stepping",
+    "cpu MHz",
+    "fpu\t\t:",
+    "fpu_exception",
+    "cpuid level",
+    "wp",
+    "bogomips",
+    "clflush size",
+    "cache_alignment",
+    "address sizes",
+    "power management",
+};
+#elif __aarch64__
+// This list of "required" fields is taken from reading the file
+// arch/arm64/kernel/cpuinfo.c and seeing which fields will be unconditionally
+// printed by the kernel.
+static const char* required_fields[] = {
+    "processor",        "BogoMIPS",    "Features", "CPU implementer",
+    "CPU architecture", "CPU variant", "CPU part", "CPU revision",
+};
+#else
+#error "Unknown architecture"
+#endif
+
+// Takes the subprocess command line and pid.
+// If it returns !OK, WithSubprocess returns immediately.
+using SubprocessCallback = std::function<PosixError(int)>;
+
+std::vector<std::string> saved_argv;  // NOLINT
+
+// Helper function to dump /proc/{pid}/status and check the
+// state data. State should = "Z" for zombied or "RSD" for
+// running, interruptible sleeping (S), or uninterruptible sleep
+// (D).
+void CompareProcessState(absl::string_view state, int pid) {
+  auto status_file = ASSERT_NO_ERRNO_AND_VALUE(
+      GetContents(absl::StrCat("/proc/", pid, "/status")));
+  // N.B. POSIX extended regexes don't support shorthand character classes (\w)
+  // inside of brackets.
+  EXPECT_THAT(status_file,
+              ContainsRegex(absl::StrCat("State:.[", state,
+                                         R"EOL(]\s+\([a-zA-Z ]+\))EOL")));
+}
+
+// Run callbacks while a subprocess is running, zombied, and/or exited.
+PosixError WithSubprocess(SubprocessCallback const& running,
+                          SubprocessCallback const& zombied,
+                          SubprocessCallback const& exited) {
+  int pipe_fds[2] = {};
+  if (pipe(pipe_fds) < 0) {
+    return PosixError(errno, "pipe");
+  }
+
+  int child_pid = fork();
+  if (child_pid < 0) {
+    return PosixError(errno, "fork");
+  }
+
+  if (child_pid == 0) {
+    close(pipe_fds[0]);    // Close the read end.
+    const DisableSave ds;  // Timing issues.
+
+    // Write to the pipe to tell it we're ready.
+    char buf = 'a';
+    int res = 0;
+    res = WriteFd(pipe_fds[1], &buf, sizeof(buf));
+    TEST_CHECK_MSG(res == sizeof(buf), "Write failure in subprocess");
+
+    while (true) {
+      SleepSafe(absl::Milliseconds(100));
+    }
+  }
+
+  close(pipe_fds[1]);  // Close the write end.
+
+  int status = 0;
+  auto wait_cleanup = Cleanup([child_pid, &status] {
+    EXPECT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
+  });
+  auto kill_cleanup = Cleanup([child_pid] {
+    EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+  });
+
+  // Wait for the child.
+  char buf = 0;
+  int res = ReadFd(pipe_fds[0], &buf, sizeof(buf));
+  if (res < 0) {
+    return PosixError(errno, "Read from pipe");
+  } else if (res == 0) {
+    return PosixError(EPIPE, "Unable to read from pipe: EOF");
+  }
+
+  if (running) {
+    // The first arg, RSD, refers to a "running process", or a process with a
+    // state of Running (R), Interruptable Sleep (S) or Uninterruptable
+    // Sleep (D).
+    CompareProcessState("RSD", child_pid);
+    RETURN_IF_ERRNO(running(child_pid));
+  }
+
+  // Kill the process.
+  kill_cleanup.Release()();
+  siginfo_t info;
+  // Wait until the child process has exited (WEXITED flag) but don't
+  // reap the child (WNOWAIT flag).
+  EXPECT_THAT(waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED),
+              SyscallSucceeds());
+
+  if (zombied) {
+    // Arg of "Z" refers to a Zombied Process.
+    CompareProcessState("Z", child_pid);
+    RETURN_IF_ERRNO(zombied(child_pid));
+  }
+
+  // Wait on the process.
+  wait_cleanup.Release()();
+  // If the process is reaped, then then this should return
+  // with ECHILD.
+  EXPECT_THAT(waitpid(child_pid, &status, WNOHANG),
+              SyscallFailsWithErrno(ECHILD));
+
+  if (exited) {
+    RETURN_IF_ERRNO(exited(child_pid));
+  }
+
+  return NoError();
+}
+
+// Access the file returned by name when a subprocess is running.
+PosixError AccessWhileRunning(std::function<std::string(int pid)> name,
+                              int flags, std::function<void(int fd)> access) {
+  FileDescriptor fd;
+  return WithSubprocess(
+      [&](int pid) -> PosixError {
+        // Running.
+        ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags));
+
+        access(fd.get());
+        return NoError();
+      },
+      nullptr, nullptr);
+}
+
+// Access the file returned by name when the a subprocess is zombied.
+PosixError AccessWhileZombied(std::function<std::string(int pid)> name,
+                              int flags, std::function<void(int fd)> access) {
+  FileDescriptor fd;
+  return WithSubprocess(
+      [&](int pid) -> PosixError {
+        // Running.
+        ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags));
+        return NoError();
+      },
+      [&](int pid) -> PosixError {
+        // Zombied.
+        access(fd.get());
+        return NoError();
+      },
+      nullptr);
+}
+
+// Access the file returned by name when the a subprocess is exited.
+PosixError AccessWhileExited(std::function<std::string(int pid)> name,
+                             int flags, std::function<void(int fd)> access) {
+  FileDescriptor fd;
+  return WithSubprocess(
+      [&](int pid) -> PosixError {
+        // Running.
+        ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags));
+        return NoError();
+      },
+      nullptr,
+      [&](int pid) -> PosixError {
+        // Exited.
+        access(fd.get());
+        return NoError();
+      });
+}
+
+// ReadFd(fd=/proc/PID/basename) while PID is running.
+int ReadWhileRunning(std::string const& basename, void* buf, size_t count) {
+  int ret = 0;
+  int err = 0;
+  EXPECT_NO_ERRNO(AccessWhileRunning(
+      [&](int pid) -> std::string {
+        return absl::StrCat("/proc/", pid, "/", basename);
+      },
+      O_RDONLY,
+      [&](int fd) {
+        ret = ReadFd(fd, buf, count);
+        err = errno;
+      }));
+  errno = err;
+  return ret;
+}
+
+// ReadFd(fd=/proc/PID/basename) while PID is zombied.
+int ReadWhileZombied(std::string const& basename, void* buf, size_t count) {
+  int ret = 0;
+  int err = 0;
+  EXPECT_NO_ERRNO(AccessWhileZombied(
+      [&](int pid) -> std::string {
+        return absl::StrCat("/proc/", pid, "/", basename);
+      },
+      O_RDONLY,
+      [&](int fd) {
+        ret = ReadFd(fd, buf, count);
+        err = errno;
+      }));
+  errno = err;
+  return ret;
+}
+
+// ReadFd(fd=/proc/PID/basename) while PID is exited.
+int ReadWhileExited(std::string const& basename, void* buf, size_t count) {
+  int ret = 0;
+  int err = 0;
+  EXPECT_NO_ERRNO(AccessWhileExited(
+      [&](int pid) -> std::string {
+        return absl::StrCat("/proc/", pid, "/", basename);
+      },
+      O_RDONLY,
+      [&](int fd) {
+        ret = ReadFd(fd, buf, count);
+        err = errno;
+      }));
+  errno = err;
+  return ret;
+}
+
+// readlinkat(fd=/proc/PID/, basename) while PID is running.
+int ReadlinkWhileRunning(std::string const& basename, char* buf, size_t count) {
+  int ret = 0;
+  int err = 0;
+  EXPECT_NO_ERRNO(AccessWhileRunning(
+      [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); },
+      O_DIRECTORY,
+      [&](int fd) {
+        ret = readlinkat(fd, basename.c_str(), buf, count);
+        err = errno;
+      }));
+  errno = err;
+  return ret;
+}
+
+// readlinkat(fd=/proc/PID/, basename) while PID is zombied.
+int ReadlinkWhileZombied(std::string const& basename, char* buf, size_t count) {
+  int ret = 0;
+  int err = 0;
+  EXPECT_NO_ERRNO(AccessWhileZombied(
+      [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); },
+      O_DIRECTORY,
+      [&](int fd) {
+        ret = readlinkat(fd, basename.c_str(), buf, count);
+        err = errno;
+      }));
+  errno = err;
+  return ret;
+}
+
+// readlinkat(fd=/proc/PID/, basename) while PID is exited.
+int ReadlinkWhileExited(std::string const& basename, char* buf, size_t count) {
+  int ret = 0;
+  int err = 0;
+  EXPECT_NO_ERRNO(AccessWhileExited(
+      [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); },
+      O_DIRECTORY,
+      [&](int fd) {
+        ret = readlinkat(fd, basename.c_str(), buf, count);
+        err = errno;
+      }));
+  errno = err;
+  return ret;
+}
+
+TEST(ProcTest, NotFoundInRoot) {
+  struct stat s;
+  EXPECT_THAT(stat("/proc/foobar", &s), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(ProcSelfTest, IsThreadGroupLeader) {
+  ScopedThread([] {
+    const pid_t tgid = getpid();
+    const pid_t tid = syscall(SYS_gettid);
+    EXPECT_NE(tgid, tid);
+    auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self"));
+    EXPECT_EQ(link, absl::StrCat(tgid));
+  });
+}
+
+TEST(ProcThreadSelfTest, Basic) {
+  const pid_t tgid = getpid();
+  const pid_t tid = syscall(SYS_gettid);
+  EXPECT_EQ(tgid, tid);
+  auto link_threadself =
+      ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self"));
+  EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid));
+  // Just read one file inside thread-self to ensure that the link is valid.
+  auto link_threadself_exe =
+      ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe"));
+  auto link_procself_exe =
+      ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
+  EXPECT_EQ(link_threadself_exe, link_procself_exe);
+}
+
+TEST(ProcThreadSelfTest, Thread) {
+  ScopedThread([] {
+    const pid_t tgid = getpid();
+    const pid_t tid = syscall(SYS_gettid);
+    EXPECT_NE(tgid, tid);
+    auto link_threadself =
+        ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self"));
+
+    EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid));
+    // Just read one file inside thread-self to ensure that the link is valid.
+    auto link_threadself_exe =
+        ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe"));
+    auto link_procself_exe =
+        ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
+    EXPECT_EQ(link_threadself_exe, link_procself_exe);
+    // A thread should not have "/proc/<tid>/task".
+    struct stat s;
+    EXPECT_THAT(stat("/proc/thread-self/task", &s),
+                SyscallFailsWithErrno(ENOENT));
+  });
+}
+
+// Returns the /proc/PID/maps entry for the MAP_PRIVATE | MAP_ANONYMOUS mapping
+// m with start address addr and length len.
+std::string AnonymousMapsEntry(uintptr_t addr, size_t len, int prot) {
+  return absl::StrCat(absl::Hex(addr, absl::PadSpec::kZeroPad8), "-",
+                      absl::Hex(addr + len, absl::PadSpec::kZeroPad8), " ",
+                      prot & PROT_READ ? "r" : "-",
+                      prot & PROT_WRITE ? "w" : "-",
+                      prot & PROT_EXEC ? "x" : "-", "p 00000000 00:00 0 ");
+}
+
+std::string AnonymousMapsEntryForMapping(const Mapping& m, int prot) {
+  return AnonymousMapsEntry(m.addr(), m.len(), prot);
+}
+
+PosixErrorOr<std::map<uint64_t, uint64_t>> ReadProcSelfAuxv() {
+  std::string auxv_file;
+  RETURN_IF_ERRNO(GetContents("/proc/self/auxv", &auxv_file));
+  const Elf64_auxv_t* auxv_data =
+      reinterpret_cast<const Elf64_auxv_t*>(auxv_file.data());
+  std::map<uint64_t, uint64_t> auxv_entries;
+  for (int i = 0; auxv_data[i].a_type != AT_NULL; i++) {
+    auto a_type = auxv_data[i].a_type;
+    EXPECT_EQ(0, auxv_entries.count(a_type)) << "a_type: " << a_type;
+    auxv_entries.emplace(a_type, auxv_data[i].a_un.a_val);
+  }
+  return auxv_entries;
+}
+
+TEST(ProcSelfAuxv, EntryPresence) {
+  auto auxv_entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv());
+
+  EXPECT_EQ(auxv_entries.count(AT_ENTRY), 1);
+  EXPECT_EQ(auxv_entries.count(AT_PHDR), 1);
+  EXPECT_EQ(auxv_entries.count(AT_PHENT), 1);
+  EXPECT_EQ(auxv_entries.count(AT_PHNUM), 1);
+  EXPECT_EQ(auxv_entries.count(AT_BASE), 1);
+  EXPECT_EQ(auxv_entries.count(AT_UID), 1);
+  EXPECT_EQ(auxv_entries.count(AT_EUID), 1);
+  EXPECT_EQ(auxv_entries.count(AT_GID), 1);
+  EXPECT_EQ(auxv_entries.count(AT_EGID), 1);
+  EXPECT_EQ(auxv_entries.count(AT_SECURE), 1);
+  EXPECT_EQ(auxv_entries.count(AT_CLKTCK), 1);
+  EXPECT_EQ(auxv_entries.count(AT_RANDOM), 1);
+  EXPECT_EQ(auxv_entries.count(AT_EXECFN), 1);
+  EXPECT_EQ(auxv_entries.count(AT_PAGESZ), 1);
+  EXPECT_EQ(auxv_entries.count(AT_SYSINFO_EHDR), 1);
+}
+
+TEST(ProcSelfAuxv, EntryValues) {
+  auto proc_auxv = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv());
+
+  // We need to find the ELF auxiliary vector. The section of memory pointed to
+  // by envp contains some pointers to non-null pointers, followed by a single
+  // pointer to a null pointer, followed by the auxiliary vector.
+  char** envpi = environ;
+  while (*envpi) {
+    ++envpi;
+  }
+
+  const Elf64_auxv_t* envp_auxv =
+      reinterpret_cast<const Elf64_auxv_t*>(envpi + 1);
+  int i;
+  for (i = 0; envp_auxv[i].a_type != AT_NULL; i++) {
+    auto a_type = envp_auxv[i].a_type;
+    EXPECT_EQ(proc_auxv.count(a_type), 1);
+    EXPECT_EQ(proc_auxv[a_type], envp_auxv[i].a_un.a_val)
+        << "a_type: " << a_type;
+  }
+  EXPECT_EQ(i, proc_auxv.size());
+}
+
+// Just open and read /proc/self/maps, check that we can find [stack]
+TEST(ProcSelfMaps, Basic) {
+  auto proc_self_maps =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+
+  std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+  std::vector<std::string> stacks;
+  // Make sure there's a stack in there.
+  for (const auto& str : strings) {
+    if (str.find("[stack]") != std::string::npos) {
+      stacks.push_back(str);
+    }
+  }
+  ASSERT_EQ(1, stacks.size()) << "[stack] not found in: " << proc_self_maps;
+  // Linux pads to 73 characters then we add 7.
+  EXPECT_EQ(80, stacks[0].length());
+}
+
+TEST(ProcSelfMaps, Map1) {
+  Mapping mapping =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_READ, MAP_PRIVATE));
+  auto proc_self_maps =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+  std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+  std::vector<std::string> addrs;
+  // Make sure if is listed.
+  for (const auto& str : strings) {
+    if (str == AnonymousMapsEntryForMapping(mapping, PROT_READ)) {
+      addrs.push_back(str);
+    }
+  }
+  ASSERT_EQ(1, addrs.size());
+}
+
+TEST(ProcSelfMaps, Map2) {
+  // NOTE(magi): The permissions must be different or the pages will get merged.
+  Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE));
+  Mapping map2 =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE));
+
+  auto proc_self_maps =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+  std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+  std::vector<std::string> addrs;
+  // Make sure if is listed.
+  for (const auto& str : strings) {
+    if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) {
+      addrs.push_back(str);
+    }
+  }
+  ASSERT_EQ(1, addrs.size());
+  addrs.clear();
+  for (const auto& str : strings) {
+    if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) {
+      addrs.push_back(str);
+    }
+  }
+  ASSERT_EQ(1, addrs.size());
+}
+
+TEST(ProcSelfMaps, MapUnmap) {
+  Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE));
+  Mapping map2 =
+      ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE));
+
+  auto proc_self_maps =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+  std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+  std::vector<std::string> addrs;
+  // Make sure if is listed.
+  for (const auto& str : strings) {
+    if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) {
+      addrs.push_back(str);
+    }
+  }
+  ASSERT_EQ(1, addrs.size()) << proc_self_maps;
+  addrs.clear();
+  for (const auto& str : strings) {
+    if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) {
+      addrs.push_back(str);
+    }
+  }
+  ASSERT_EQ(1, addrs.size());
+
+  map2.reset();
+
+  // Read it again.
+  proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+  strings = absl::StrSplit(proc_self_maps, '\n');
+  // First entry should be there.
+  addrs.clear();
+  for (const auto& str : strings) {
+    if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) {
+      addrs.push_back(str);
+    }
+  }
+  ASSERT_EQ(1, addrs.size());
+  addrs.clear();
+  // But not the second.
+  for (const auto& str : strings) {
+    if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) {
+      addrs.push_back(str);
+    }
+  }
+  ASSERT_EQ(0, addrs.size());
+}
+
+TEST(ProcSelfMaps, Mprotect) {
+  // FIXME(jamieliu): Linux's mprotect() sometimes fails to merge VMAs in this
+  // case.
+  SKIP_IF(!IsRunningOnGvisor());
+
+  // Reserve 5 pages of address space.
+  Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(5 * kPageSize, PROT_NONE, MAP_PRIVATE));
+
+  // Change the permissions on the middle 3 pages. (The first and last pages may
+  // be merged with other vmas on either side, so they aren't tested directly;
+  // they just ensure that the middle 3 pages are bracketed by VMAs with
+  // incompatible permissions.)
+  ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + kPageSize),
+                       3 * kPageSize, PROT_READ),
+              SyscallSucceeds());
+
+  // Check that the middle 3 pages make up a single VMA.
+  auto proc_self_maps =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+  std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+  EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize,
+                                                   3 * kPageSize, PROT_READ)));
+
+  // Change the permissions on the middle page only.
+  ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize),
+                       kPageSize, PROT_READ | PROT_WRITE),
+              SyscallSucceeds());
+
+  // Check that the single VMA has been split into 3 VMAs.
+  proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+  strings = absl::StrSplit(proc_self_maps, '\n');
+  EXPECT_THAT(
+      strings,
+      IsSupersetOf(
+          {AnonymousMapsEntry(m.addr() + kPageSize, kPageSize, PROT_READ),
+           AnonymousMapsEntry(m.addr() + 2 * kPageSize, kPageSize,
+                              PROT_READ | PROT_WRITE),
+           AnonymousMapsEntry(m.addr() + 3 * kPageSize, kPageSize,
+                              PROT_READ)}));
+
+  // Change the permissions on the middle page back.
+  ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize),
+                       kPageSize, PROT_READ),
+              SyscallSucceeds());
+
+  // Check that the 3 VMAs have been merged back into a single VMA.
+  proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+  strings = absl::StrSplit(proc_self_maps, '\n');
+  EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize,
+                                                   3 * kPageSize, PROT_READ)));
+}
+
+TEST(ProcSelfFd, OpenFd) {
+  int pipe_fds[2];
+  ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds());
+
+  // Reopen the write end.
+  const std::string path = absl::StrCat("/proc/self/fd/", pipe_fds[1]);
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_WRONLY));
+
+  // Ensure that a read/write works.
+  const std::string data = "hello";
+  std::unique_ptr<char[]> buffer(new char[data.size()]);
+  EXPECT_THAT(write(fd.get(), data.c_str(), data.size()),
+              SyscallSucceedsWithValue(5));
+  EXPECT_THAT(read(pipe_fds[0], buffer.get(), data.size()),
+              SyscallSucceedsWithValue(5));
+  EXPECT_EQ(strncmp(buffer.get(), data.c_str(), data.size()), 0);
+
+  // Cleanup.
+  ASSERT_THAT(close(pipe_fds[0]), SyscallSucceeds());
+  ASSERT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+}
+
+TEST(ProcSelfFdInfo, CorrectFds) {
+  // Make sure there is at least one open file.
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+  // Get files in /proc/self/fd.
+  auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fd", false));
+
+  // Get files in /proc/self/fdinfo.
+  auto fdinfo_files =
+      ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fdinfo", false));
+
+  // They should contain the same fds.
+  EXPECT_THAT(fd_files, UnorderedElementsAreArray(fdinfo_files));
+
+  // Both should contain fd.
+  auto fd_s = absl::StrCat(fd.get());
+  EXPECT_THAT(fd_files, Contains(fd_s));
+}
+
+TEST(ProcSelfFdInfo, Flags) {
+  std::string path = NewTempAbsPath();
+
+  // Create file here with O_CREAT to test that O_CREAT does not appear in
+  // fdinfo flags.
+  int flags = O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC;
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, flags, 0644));
+
+  // Automatically delete path.
+  TempPath temp_path(path);
+
+  // O_CREAT does not appear in fdinfo flags.
+  flags &= ~O_CREAT;
+
+  // O_LARGEFILE always appears (on x86_64).
+  flags |= kOLargeFile;
+
+  auto fd_info = ASSERT_NO_ERRNO_AND_VALUE(
+      GetContents(absl::StrCat("/proc/self/fdinfo/", fd.get())));
+  EXPECT_THAT(fd_info, HasSubstr(absl::StrFormat("flags:\t%#o", flags)));
+}
+
+TEST(ProcSelfExe, Absolute) {
+  auto exe = ASSERT_NO_ERRNO_AND_VALUE(
+      ReadLink(absl::StrCat("/proc/", getpid(), "/exe")));
+  EXPECT_EQ(exe[0], '/');
+}
+
+// Sanity check for /proc/cpuinfo fields that must be present.
+TEST(ProcCpuinfo, RequiredFieldsArePresent) {
+  std::string proc_cpuinfo =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo"));
+  ASSERT_FALSE(proc_cpuinfo.empty());
+  std::vector<std::string> cpuinfo_fields = absl::StrSplit(proc_cpuinfo, '\n');
+
+  // Check that the usual fields are there. We don't really care about the
+  // contents.
+  for (const std::string& field : required_fields) {
+    EXPECT_THAT(proc_cpuinfo, HasSubstr(field));
+  }
+}
+
+TEST(ProcCpuinfo, DeniesWriteNonRoot) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER)));
+
+  // Do setuid in a separate thread so that after finishing this test, the
+  // process can still open files the test harness created before starting this
+  // test. Otherwise, the files are created by root (UID before the test), but
+  // cannot be opened by the `uid` set below after the test. After calling
+  // setuid(non-zero-UID), there is no way to get root privileges back.
+  ScopedThread([&] {
+    // Use syscall instead of glibc setuid wrapper because we want this setuid
+    // call to only apply to this task. POSIX threads, however, require that all
+    // threads have the same UIDs, so using the setuid wrapper sets all threads'
+    // real UID.
+    // Also drops capabilities.
+    constexpr int kNobody = 65534;
+    EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds());
+    EXPECT_THAT(open("/proc/cpuinfo", O_WRONLY), SyscallFailsWithErrno(EACCES));
+    // TODO(gvisor.dev/issue/1193): Properly support setting size attributes in
+    // kernfs.
+    if (!IsRunningOnGvisor() || IsRunningWithVFS1()) {
+      EXPECT_THAT(truncate("/proc/cpuinfo", 123),
+                  SyscallFailsWithErrno(EACCES));
+    }
+  });
+}
+
+// With root privileges, it is possible to open /proc/cpuinfo with write mode,
+// but all write operations will return EIO.
+TEST(ProcCpuinfo, DeniesWriteRoot) {
+  // VFS1 does not behave differently for root/non-root.
+  SKIP_IF(IsRunningWithVFS1());
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER)));
+
+  int fd;
+  EXPECT_THAT(fd = open("/proc/cpuinfo", O_WRONLY), SyscallSucceeds());
+  if (fd > 0) {
+    EXPECT_THAT(write(fd, "x", 1), SyscallFailsWithErrno(EIO));
+    EXPECT_THAT(pwrite(fd, "x", 1, 123), SyscallFailsWithErrno(EIO));
+  }
+  // TODO(gvisor.dev/issue/1193): Properly support setting size attributes in
+  // kernfs.
+  if (!IsRunningOnGvisor() || IsRunningWithVFS1()) {
+    if (fd > 0) {
+      EXPECT_THAT(ftruncate(fd, 123), SyscallFailsWithErrno(EIO));
+    }
+    EXPECT_THAT(truncate("/proc/cpuinfo", 123), SyscallFailsWithErrno(EIO));
+  }
+}
+
+// Sanity checks that uptime is present.
+TEST(ProcUptime, IsPresent) {
+  std::string proc_uptime =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime"));
+  ASSERT_FALSE(proc_uptime.empty());
+  std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' ');
+
+  // Parse once.
+  double uptime0, uptime1, idletime0, idletime1;
+  ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime0));
+  ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime0));
+
+  // Sleep for one second.
+  absl::SleepFor(absl::Seconds(1));
+
+  // Parse again.
+  proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime"));
+  ASSERT_FALSE(proc_uptime.empty());
+  uptime_parts = absl::StrSplit(proc_uptime, ' ');
+  ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime1));
+  ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime1));
+
+  // Sanity check.
+  //
+  // We assert that between 0.99 and 59.99 seconds have passed. If more than a
+  // minute has passed, then we must be executing really, really slowly.
+  EXPECT_GE(uptime0, 0.0);
+  EXPECT_GE(idletime0, 0.0);
+  EXPECT_GT(uptime1, uptime0);
+  EXPECT_GE(uptime1, uptime0 + 0.99);
+  EXPECT_LE(uptime1, uptime0 + 59.99);
+  EXPECT_GE(idletime1, idletime0);
+}
+
+TEST(ProcMeminfo, ContainsBasicFields) {
+  std::string proc_meminfo =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo"));
+  EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"),
+                                  ContainsRegex(R"(MemFree:\s+[0-9]+ kB)")));
+}
+
+TEST(ProcStat, ContainsBasicFields) {
+  std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
+
+  std::vector<std::string> names;
+  for (auto const& line : absl::StrSplit(proc_stat, '\n')) {
+    std::vector<std::string> fields =
+        absl::StrSplit(line, ' ', absl::SkipWhitespace());
+    if (fields.empty()) {
+      continue;
+    }
+    names.push_back(fields[0]);
+  }
+
+  EXPECT_THAT(names,
+              IsSupersetOf({"cpu", "intr", "ctxt", "btime", "processes",
+                            "procs_running", "procs_blocked", "softirq"}));
+}
+
+TEST(ProcStat, EndsWithNewline) {
+  std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
+  EXPECT_EQ(proc_stat.back(), '\n');
+}
+
+TEST(ProcStat, Fields) {
+  std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
+
+  std::vector<std::string> names;
+  for (auto const& line : absl::StrSplit(proc_stat, '\n')) {
+    std::vector<std::string> fields =
+        absl::StrSplit(line, ' ', absl::SkipWhitespace());
+    if (fields.empty()) {
+      continue;
+    }
+
+    if (absl::StartsWith(fields[0], "cpu")) {
+      // As of Linux 3.11, each CPU entry has 10 fields, plus the name.
+      EXPECT_GE(fields.size(), 11) << proc_stat;
+    } else if (fields[0] == "ctxt") {
+      // Single field.
+      EXPECT_EQ(fields.size(), 2) << proc_stat;
+    } else if (fields[0] == "btime") {
+      // Single field.
+      EXPECT_EQ(fields.size(), 2) << proc_stat;
+    } else if (fields[0] == "itime") {
+      // Single field.
+      ASSERT_EQ(fields.size(), 2) << proc_stat;
+      // This is the only floating point field.
+      double val;
+      EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_stat;
+      continue;
+    } else if (fields[0] == "processes") {
+      // Single field.
+      EXPECT_EQ(fields.size(), 2) << proc_stat;
+    } else if (fields[0] == "procs_running") {
+      // Single field.
+      EXPECT_EQ(fields.size(), 2) << proc_stat;
+    } else if (fields[0] == "procs_blocked") {
+      // Single field.
+      EXPECT_EQ(fields.size(), 2) << proc_stat;
+    } else if (fields[0] == "softirq") {
+      // As of Linux 3.11, there are 10 softirqs. 12 fields for name + total.
+      EXPECT_GE(fields.size(), 12) << proc_stat;
+    }
+
+    // All fields besides itime are valid base 10 numbers.
+    for (size_t i = 1; i < fields.size(); i++) {
+      uint64_t val;
+      EXPECT_TRUE(absl::SimpleAtoi(fields[i], &val)) << proc_stat;
+    }
+  }
+}
+
+TEST(ProcLoadavg, EndsWithNewline) {
+  std::string proc_loadvg =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
+  EXPECT_EQ(proc_loadvg.back(), '\n');
+}
+
+TEST(ProcLoadavg, Fields) {
+  std::string proc_loadvg =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
+  std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n');
+
+  // Single line.
+  EXPECT_EQ(lines.size(), 2) << proc_loadvg;
+
+  std::vector<std::string> fields =
+      absl::StrSplit(lines[0], absl::ByAnyChar(" /"), absl::SkipWhitespace());
+
+  // Six fields.
+  EXPECT_EQ(fields.size(), 6) << proc_loadvg;
+
+  double val;
+  uint64_t val2;
+  // First three fields are floating point numbers.
+  EXPECT_TRUE(absl::SimpleAtod(fields[0], &val)) << proc_loadvg;
+  EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_loadvg;
+  EXPECT_TRUE(absl::SimpleAtod(fields[2], &val)) << proc_loadvg;
+  // Rest of the fields are valid base 10 numbers.
+  EXPECT_TRUE(absl::SimpleAtoi(fields[3], &val2)) << proc_loadvg;
+  EXPECT_TRUE(absl::SimpleAtoi(fields[4], &val2)) << proc_loadvg;
+  EXPECT_TRUE(absl::SimpleAtoi(fields[5], &val2)) << proc_loadvg;
+}
+
+// NOTE: Tests in priority.cc also check certain priority related fields in
+// /proc/self/stat.
+
+class ProcPidStatTest : public ::testing::TestWithParam<std::string> {};
+
+TEST_P(ProcPidStatTest, HasBasicFields) {
+  std::string proc_pid_stat = ASSERT_NO_ERRNO_AND_VALUE(
+      GetContents(absl::StrCat("/proc/", GetParam(), "/stat")));
+
+  ASSERT_FALSE(proc_pid_stat.empty());
+  std::vector<std::string> fields = absl::StrSplit(proc_pid_stat, ' ');
+  ASSERT_GE(fields.size(), 24);
+  EXPECT_EQ(absl::StrCat(getpid()), fields[0]);
+  // fields[1] is the thread name.
+  EXPECT_EQ("R", fields[2]);  // task state
+  EXPECT_EQ(absl::StrCat(getppid()), fields[3]);
+
+  // If the test starts up quickly, then the process start time and the kernel
+  // boot time will be very close, and the proc starttime field (which is the
+  // delta of the two times) will be 0.  For that unfortunate reason, we can
+  // only check that starttime >= 0, and not that it is strictly > 0.
+  uint64_t starttime;
+  ASSERT_TRUE(absl::SimpleAtoi(fields[21], &starttime));
+  EXPECT_GE(starttime, 0);
+
+  uint64_t vss;
+  ASSERT_TRUE(absl::SimpleAtoi(fields[22], &vss));
+  EXPECT_GT(vss, 0);
+
+  uint64_t rss;
+  ASSERT_TRUE(absl::SimpleAtoi(fields[23], &rss));
+  EXPECT_GT(rss, 0);
+
+  uint64_t rsslim;
+  ASSERT_TRUE(absl::SimpleAtoi(fields[24], &rsslim));
+  EXPECT_GT(rsslim, 0);
+}
+
+INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatTest,
+                         ::testing::Values("self", absl::StrCat(getpid())));
+
+using ProcPidStatmTest = ::testing::TestWithParam<std::string>;
+
+TEST_P(ProcPidStatmTest, HasBasicFields) {
+  std::string proc_pid_statm = ASSERT_NO_ERRNO_AND_VALUE(
+      GetContents(absl::StrCat("/proc/", GetParam(), "/statm")));
+  ASSERT_FALSE(proc_pid_statm.empty());
+  std::vector<std::string> fields = absl::StrSplit(proc_pid_statm, ' ');
+  ASSERT_GE(fields.size(), 7);
+
+  uint64_t vss;
+  ASSERT_TRUE(absl::SimpleAtoi(fields[0], &vss));
+  EXPECT_GT(vss, 0);
+
+  uint64_t rss;
+  ASSERT_TRUE(absl::SimpleAtoi(fields[1], &rss));
+  EXPECT_GT(rss, 0);
+}
+
+INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatmTest,
+                         ::testing::Values("self", absl::StrCat(getpid())));
+
+PosixErrorOr<uint64_t> CurrentRSS() {
+  ASSIGN_OR_RETURN_ERRNO(auto proc_self_stat, GetContents("/proc/self/stat"));
+  if (proc_self_stat.empty()) {
+    return PosixError(EINVAL, "empty /proc/self/stat");
+  }
+
+  std::vector<std::string> fields = absl::StrSplit(proc_self_stat, ' ');
+  if (fields.size() < 24) {
+    return PosixError(
+        EINVAL,
+        absl::StrCat("/proc/self/stat has too few fields: ", proc_self_stat));
+  }
+
+  uint64_t rss;
+  if (!absl::SimpleAtoi(fields[23], &rss)) {
+    return PosixError(
+        EINVAL, absl::StrCat("/proc/self/stat RSS field is not a number: ",
+                             fields[23]));
+  }
+
+  // RSS is given in number of pages.
+  return rss * kPageSize;
+}
+
+// The size of mapping created by MapPopulateRSS.
+constexpr uint64_t kMappingSize = 100 << 20;
+
+// Tolerance on RSS comparisons to account for background thread mappings,
+// reclaimed pages, newly faulted pages, etc.
+constexpr uint64_t kRSSTolerance = 10 << 20;
+
+// Capture RSS before and after an anonymous mapping with passed prot.
+void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) {
+  *before = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS());
+
+  // N.B. The kernel asynchronously accumulates per-task RSS counters into the
+  // mm RSS, which is exposed by /proc/PID/stat. Task exit is a synchronization
+  // point (kernel/exit.c:do_exit -> sync_mm_rss), so perform the mapping on
+  // another thread to ensure it is reflected in RSS after the thread exits.
+  Mapping mapping;
+  ScopedThread t([&mapping, prot] {
+    mapping = ASSERT_NO_ERRNO_AND_VALUE(
+        MmapAnon(kMappingSize, prot, MAP_PRIVATE | MAP_POPULATE));
+  });
+  t.Join();
+
+  *after = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS());
+}
+
+// TODO(b/73896574): Test for PROT_READ + MAP_POPULATE anonymous mappings. Their
+// semantics are more subtle:
+//
+// Small pages -> Zero page mapped, not counted in RSS
+// (mm/memory.c:do_anonymous_page).
+//
+// Huge pages (THP enabled, use_zero_page=0) -> Pages committed
+// (mm/memory.c:__handle_mm_fault -> create_huge_pmd).
+//
+// Huge pages (THP enabled, use_zero_page=1) -> Zero page mapped, not counted in
+// RSS (mm/huge_memory.c:do_huge_pmd_anonymous_page).
+
+// PROT_WRITE + MAP_POPULATE anonymous mappings are always committed.
+TEST(ProcSelfStat, PopulateWriteRSS) {
+  uint64_t before, after;
+  MapPopulateRSS(PROT_READ | PROT_WRITE, &before, &after);
+
+  // Mapping is committed.
+  EXPECT_NEAR(before + kMappingSize, after, kRSSTolerance);
+}
+
+// PROT_NONE + MAP_POPULATE anonymous mappings are never committed.
+TEST(ProcSelfStat, PopulateNoneRSS) {
+  uint64_t before, after;
+  MapPopulateRSS(PROT_NONE, &before, &after);
+
+  // Mapping not committed.
+  EXPECT_NEAR(before, after, kRSSTolerance);
+}
+
+// Returns the calling thread's name.
+PosixErrorOr<std::string> ThreadName() {
+  // "The buffer should allow space for up to 16 bytes; the returned std::string
+  // will be null-terminated if it is shorter than that." - prctl(2). But we
+  // always want the thread name to be null-terminated.
+  char thread_name[17];
+  int rc = prctl(PR_GET_NAME, thread_name, 0, 0, 0);
+  MaybeSave();
+  if (rc < 0) {
+    return PosixError(errno, "prctl(PR_GET_NAME)");
+  }
+  thread_name[16] = '\0';
+  return std::string(thread_name);
+}
+
+// Parses the contents of a /proc/[pid]/status file into a collection of
+// key-value pairs.
+PosixErrorOr<std::map<std::string, std::string>> ParseProcStatus(
+    absl::string_view status_str) {
+  std::map<std::string, std::string> fields;
+  for (absl::string_view const line :
+       absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) {
+    const std::pair<absl::string_view, absl::string_view> kv =
+        absl::StrSplit(line, absl::MaxSplits(":\t", 1));
+    if (kv.first.empty()) {
+      return PosixError(
+          EINVAL, absl::StrCat("failed to parse key in line \"", line, "\""));
+    }
+    std::string key(kv.first);
+    if (fields.count(key)) {
+      return PosixError(EINVAL,
+                        absl::StrCat("duplicate key \"", kv.first, "\""));
+    }
+    std::string value(kv.second);
+    absl::StripLeadingAsciiWhitespace(&value);
+    fields.emplace(std::move(key), std::move(value));
+  }
+  return fields;
+}
+
+TEST(ParseProcStatusTest, ParsesSimpleStatusFileWithMixedWhitespaceCorrectly) {
+  EXPECT_THAT(
+      ParseProcStatus(
+          "Name:\tinit\nState:\tS (sleeping)\nCapEff:\t 0000001fffffffff\n"),
+      IsPosixErrorOkAndHolds(UnorderedElementsAre(
+          Pair("Name", "init"), Pair("State", "S (sleeping)"),
+          Pair("CapEff", "0000001fffffffff"))));
+}
+
+TEST(ParseProcStatusTest, DetectsDuplicateKeys) {
+  auto proc_status_or = ParseProcStatus("Name:\tfoo\nName:\tfoo\n");
+  EXPECT_THAT(proc_status_or,
+              PosixErrorIs(EINVAL, ::testing::StrEq("duplicate key \"Name\"")));
+}
+
+TEST(ParseProcStatusTest, DetectsMissingTabs) {
+  EXPECT_THAT(ParseProcStatus("Name:foo\nPid: 1\n"),
+              IsPosixErrorOkAndHolds(UnorderedElementsAre(Pair("Name:foo", ""),
+                                                          Pair("Pid: 1", ""))));
+}
+
+TEST(ProcPidStatusTest, HasBasicFields) {
+  // Do this on a separate thread since we want tgid != tid.
+  ScopedThread([] {
+    const pid_t tgid = getpid();
+    const pid_t tid = syscall(SYS_gettid);
+    EXPECT_NE(tgid, tid);
+    const auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(ThreadName());
+
+    std::string status_str = ASSERT_NO_ERRNO_AND_VALUE(
+        GetContents(absl::StrCat("/proc/", tid, "/status")));
+
+    ASSERT_FALSE(status_str.empty());
+    const auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str));
+    EXPECT_THAT(status, IsSupersetOf({Pair("Name", thread_name),
+                                      Pair("Tgid", absl::StrCat(tgid)),
+                                      Pair("Pid", absl::StrCat(tid)),
+                                      Pair("PPid", absl::StrCat(getppid()))}));
+  });
+}
+
+TEST(ProcPidStatusTest, StateRunning) {
+  // Task must be running when reading the file.
+  const pid_t tid = syscall(SYS_gettid);
+  std::string status_str = ASSERT_NO_ERRNO_AND_VALUE(
+      GetContents(absl::StrCat("/proc/", tid, "/status")));
+
+  EXPECT_THAT(ParseProcStatus(status_str),
+              IsPosixErrorOkAndHolds(Contains(Pair("State", "R (running)"))));
+}
+
+TEST(ProcPidStatusTest, StateSleeping_NoRandomSave) {
+  // Starts a child process that blocks and checks that State is sleeping.
+  auto res = WithSubprocess(
+      [&](int pid) -> PosixError {
+        // Because this test is timing based we will disable cooperative saving
+        // and the test itself also has random saving disabled.
+        const DisableSave ds;
+        // Try multiple times in case the child isn't sleeping when status file
+        // is read.
+        MonotonicTimer timer;
+        timer.Start();
+        for (;;) {
+          ASSIGN_OR_RETURN_ERRNO(
+              std::string status_str,
+              GetContents(absl::StrCat("/proc/", pid, "/status")));
+          ASSIGN_OR_RETURN_ERRNO(auto map, ParseProcStatus(status_str));
+          if (map["State"] == std::string("S (sleeping)")) {
+            // Test passed!
+            return NoError();
+          }
+          if (timer.Duration() > absl::Seconds(10)) {
+            return PosixError(ETIMEDOUT, "Timeout waiting for child to sleep");
+          }
+          absl::SleepFor(absl::Milliseconds(10));
+        }
+      },
+      nullptr, nullptr);
+  ASSERT_NO_ERRNO(res);
+}
+
+TEST(ProcPidStatusTest, ValuesAreTabDelimited) {
+  std::string status_str =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status"));
+  ASSERT_FALSE(status_str.empty());
+  for (absl::string_view const line :
+       absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) {
+    EXPECT_NE(std::string::npos, line.find(":\t"));
+  }
+}
+
+// Threads properly counts running threads.
+//
+// TODO(mpratt): Test zombied threads while the thread group leader is still
+// running with generalized fork and clone children from the wait test.
+TEST(ProcPidStatusTest, Threads) {
+  char buf[4096] = {};
+  EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf) - 1),
+              SyscallSucceedsWithValue(Gt(0)));
+
+  auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf));
+  auto it = status.find("Threads");
+  ASSERT_NE(it, status.end());
+  int threads = -1;
+  EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads))
+      << "Threads value " << it->second << " is not a number";
+  // Don't make assumptions about the exact number of threads, as it may not be
+  // constant.
+  EXPECT_GE(threads, 1);
+
+  memset(buf, 0, sizeof(buf));
+  EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf) - 1),
+              SyscallSucceedsWithValue(Gt(0)));
+
+  status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf));
+  it = status.find("Threads");
+  ASSERT_NE(it, status.end());
+  threads = -1;
+  EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads))
+      << "Threads value " << it->second << " is not a number";
+  // There must be only the thread group leader remaining, zombied.
+  EXPECT_EQ(threads, 1);
+}
+
+// Returns true if all characters in s are digits.
+bool IsDigits(absl::string_view s) {
+  return std::all_of(s.begin(), s.end(), absl::ascii_isdigit);
+}
+
+TEST(ProcPidStatTest, VmStats) {
+  std::string status_str =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status"));
+  ASSERT_FALSE(status_str.empty());
+  auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str));
+
+  const auto vss_it = status.find("VmSize");
+  ASSERT_NE(vss_it, status.end());
+
+  absl::string_view vss_str(vss_it->second);
+
+  // Room for the " kB" suffix plus at least one digit.
+  ASSERT_GT(vss_str.length(), 3);
+  EXPECT_TRUE(absl::EndsWith(vss_str, " kB"));
+  // Everything else is part of a number.
+  EXPECT_TRUE(IsDigits(vss_str.substr(0, vss_str.length() - 3))) << vss_str;
+  // ... which is not 0.
+  EXPECT_NE('0', vss_str[0]);
+
+  const auto rss_it = status.find("VmRSS");
+  ASSERT_NE(rss_it, status.end());
+
+  absl::string_view rss_str(rss_it->second);
+
+  // Room for the " kB" suffix plus at least one digit.
+  ASSERT_GT(rss_str.length(), 3);
+  EXPECT_TRUE(absl::EndsWith(rss_str, " kB"));
+  // Everything else is part of a number.
+  EXPECT_TRUE(IsDigits(rss_str.substr(0, rss_str.length() - 3))) << rss_str;
+  // ... which is not 0.
+  EXPECT_NE('0', rss_str[0]);
+
+  const auto data_it = status.find("VmData");
+  ASSERT_NE(data_it, status.end());
+
+  absl::string_view data_str(data_it->second);
+
+  // Room for the " kB" suffix plus at least one digit.
+  ASSERT_GT(data_str.length(), 3);
+  EXPECT_TRUE(absl::EndsWith(data_str, " kB"));
+  // Everything else is part of a number.
+  EXPECT_TRUE(IsDigits(data_str.substr(0, data_str.length() - 3))) << data_str;
+  // ... which is not 0.
+  EXPECT_NE('0', data_str[0]);
+}
+
+// Parse an array of NUL-terminated char* arrays, returning a vector of
+// strings.
+std::vector<std::string> ParseNulTerminatedStrings(std::string contents) {
+  EXPECT_EQ('\0', contents.back());
+  // The split will leave an empty string if the NUL-byte remains, so pop
+  // it.
+  contents.pop_back();
+
+  return absl::StrSplit(contents, '\0');
+}
+
+TEST(ProcPidCmdline, MatchesArgv) {
+  std::vector<std::string> proc_cmdline = ParseNulTerminatedStrings(
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline")));
+  EXPECT_THAT(saved_argv, ContainerEq(proc_cmdline));
+}
+
+TEST(ProcPidEnviron, MatchesEnviron) {
+  std::vector<std::string> proc_environ = ParseNulTerminatedStrings(
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/environ")));
+  // Get the environment from the environ variable, which we will compare with
+  // /proc/self/environ.
+  std::vector<std::string> env;
+  for (char** v = environ; *v; v++) {
+    env.push_back(*v);
+  }
+  EXPECT_THAT(env, ContainerEq(proc_environ));
+}
+
+TEST(ProcPidCmdline, SubprocessForkSameCmdline) {
+  std::vector<std::string> proc_cmdline_parent;
+  std::vector<std::string> proc_cmdline;
+  proc_cmdline_parent = ParseNulTerminatedStrings(
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline")));
+  auto res = WithSubprocess(
+      [&](int pid) -> PosixError {
+        ASSIGN_OR_RETURN_ERRNO(
+            auto raw_cmdline,
+            GetContents(absl::StrCat("/proc/", pid, "/cmdline")));
+        proc_cmdline = ParseNulTerminatedStrings(raw_cmdline);
+        return NoError();
+      },
+      nullptr, nullptr);
+  ASSERT_NO_ERRNO(res);
+
+  for (size_t i = 0; i < proc_cmdline_parent.size(); i++) {
+    EXPECT_EQ(proc_cmdline_parent[i], proc_cmdline[i]);
+  }
+}
+
+// Test whether /proc/PID/ symlinks can be read for a running process.
+TEST(ProcPidSymlink, SubprocessRunning) {
+  char buf[1];
+
+  EXPECT_THAT(ReadlinkWhileRunning("exe", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadlinkWhileRunning("ns/net", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadlinkWhileRunning("ns/pid", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadlinkWhileRunning("ns/user", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST(ProcPidSymlink, SubprocessZombied) {
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  char buf[1];
+
+  int want = EACCES;
+  if (!IsRunningOnGvisor()) {
+    auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion());
+    if (version.major > 4 || (version.major == 4 && version.minor > 3)) {
+      want = ENOENT;
+    }
+  }
+
+  EXPECT_THAT(ReadlinkWhileZombied("exe", buf, sizeof(buf)),
+              SyscallFailsWithErrno(want));
+
+  if (!IsRunningOnGvisor()) {
+    EXPECT_THAT(ReadlinkWhileZombied("ns/net", buf, sizeof(buf)),
+                SyscallFailsWithErrno(want));
+  }
+
+  // FIXME(gvisor.dev/issue/164): Inconsistent behavior between linux on proc
+  // files.
+  //
+  // ~4.3: Syscall fails with EACCES.
+  // 4.17: Syscall succeeds and returns 1.
+  //
+  if (!IsRunningOnGvisor()) {
+    return;
+  }
+
+  EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)),
+              SyscallFailsWithErrno(want));
+
+  EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)),
+              SyscallFailsWithErrno(want));
+}
+
+// Test whether /proc/PID/ symlinks can be read for an exited process.
+TEST(ProcPidSymlink, SubprocessExited) {
+  char buf[1];
+
+  EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)),
+              SyscallFailsWithErrno(ESRCH));
+
+  EXPECT_THAT(ReadlinkWhileExited("ns/net", buf, sizeof(buf)),
+              SyscallFailsWithErrno(ESRCH));
+
+  EXPECT_THAT(ReadlinkWhileExited("ns/pid", buf, sizeof(buf)),
+              SyscallFailsWithErrno(ESRCH));
+
+  EXPECT_THAT(ReadlinkWhileExited("ns/user", buf, sizeof(buf)),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+// /proc/PID/exe points to the correct binary.
+TEST(ProcPidExe, Subprocess) {
+  auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
+  auto expected_absolute_path =
+      ASSERT_NO_ERRNO_AND_VALUE(MakeAbsolute(link, ""));
+
+  char actual[PATH_MAX + 1] = {};
+  ASSERT_THAT(ReadlinkWhileRunning("exe", actual, sizeof(actual)),
+              SyscallSucceedsWithValue(Gt(0)));
+  EXPECT_EQ(actual, expected_absolute_path);
+}
+
+// Test whether /proc/PID/ files can be read for a running process.
+TEST(ProcPidFile, SubprocessRunning) {
+  char buf[1];
+
+  EXPECT_THAT(ReadWhileRunning("auxv", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("cmdline", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("comm", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("gid_map", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("io", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("maps", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("stat", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("oom_score", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileRunning("oom_score_adj", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+// Test whether /proc/PID/ files can be read for a zombie process.
+TEST(ProcPidFile, SubprocessZombie) {
+  char buf[1];
+
+  // FIXME(gvisor.dev/issue/164): Loosen requirement due to inconsistent
+  // behavior on different kernels.
+  //
+  // ~4.3: Succeds and returns 0.
+  // 4.17: Succeeds and returns 1.
+  // gVisor: Succeeds and returns 0.
+  EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds());
+
+  EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(0));
+
+  EXPECT_THAT(ReadWhileZombied("comm", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileZombied("gid_map", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileZombied("maps", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(0));
+
+  EXPECT_THAT(ReadWhileZombied("stat", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileZombied("oom_score", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(ReadWhileZombied("oom_score_adj", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux
+  // on proc files.
+  //
+  // ~4.3: Fails and returns EACCES.
+  // gVisor & 4.17: Succeeds and returns 1.
+  //
+  // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)),
+  //          SyscallFailsWithErrno(EACCES));
+}
+
+// Test whether /proc/PID/ files can be read for an exited process.
+TEST(ProcPidFile, SubprocessExited) {
+  char buf[1];
+
+  // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels.
+  //
+  // ~4.3: Fails and returns ESRCH.
+  // gVisor: Fails with ESRCH.
+  // 4.17: Succeeds and returns 1.
+  //
+  // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)),
+  //            SyscallFailsWithErrno(ESRCH));
+
+  EXPECT_THAT(ReadWhileExited("cmdline", buf, sizeof(buf)),
+              SyscallFailsWithErrno(ESRCH));
+
+  if (!IsRunningOnGvisor()) {
+    // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
+    EXPECT_THAT(ReadWhileExited("comm", buf, sizeof(buf)),
+                SyscallFailsWithErrno(ESRCH));
+  }
+
+  EXPECT_THAT(ReadWhileExited("gid_map", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  if (!IsRunningOnGvisor()) {
+    // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
+    EXPECT_THAT(ReadWhileExited("io", buf, sizeof(buf)),
+                SyscallFailsWithErrno(ESRCH));
+  }
+
+  if (!IsRunningOnGvisor()) {
+    // FIXME(gvisor.dev/issue/164): Returns EOF on gVisor.
+    EXPECT_THAT(ReadWhileExited("maps", buf, sizeof(buf)),
+                SyscallFailsWithErrno(ESRCH));
+  }
+
+  if (!IsRunningOnGvisor()) {
+    // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
+    EXPECT_THAT(ReadWhileExited("stat", buf, sizeof(buf)),
+                SyscallFailsWithErrno(ESRCH));
+  }
+
+  if (!IsRunningOnGvisor()) {
+    // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
+    EXPECT_THAT(ReadWhileExited("status", buf, sizeof(buf)),
+                SyscallFailsWithErrno(ESRCH));
+  }
+
+  EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  if (!IsRunningOnGvisor()) {
+    // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
+    EXPECT_THAT(ReadWhileExited("oom_score", buf, sizeof(buf)),
+                SyscallFailsWithErrno(ESRCH));
+  }
+
+  EXPECT_THAT(ReadWhileExited("oom_score_adj", buf, sizeof(buf)),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+PosixError DirContainsImpl(absl::string_view path,
+                           const std::vector<std::string>& targets,
+                           bool strict) {
+  ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false));
+  bool success = true;
+
+  for (auto& expected_entry : targets) {
+    auto cursor = std::find(listing.begin(), listing.end(), expected_entry);
+    if (cursor == listing.end()) {
+      success = false;
+    }
+  }
+
+  if (!success) {
+    return PosixError(
+        ENOENT,
+        absl::StrCat("Failed to find one or more paths in '", path, "'"));
+  }
+
+  if (strict) {
+    if (targets.size() != listing.size()) {
+      return PosixError(
+          EINVAL,
+          absl::StrCat("Expected to find ", targets.size(), " elements in '",
+                       path, "', but found ", listing.size()));
+    }
+  }
+
+  return NoError();
+}
+
+PosixError DirContains(absl::string_view path,
+                       const std::vector<std::string>& targets) {
+  return DirContainsImpl(path, targets, false);
+}
+
+PosixError DirContainsExactly(absl::string_view path,
+                              const std::vector<std::string>& targets) {
+  return DirContainsImpl(path, targets, true);
+}
+
+PosixError EventuallyDirContainsExactly(
+    absl::string_view path, const std::vector<std::string>& targets) {
+  constexpr int kRetryCount = 100;
+  const absl::Duration kRetryDelay = absl::Milliseconds(100);
+
+  for (int i = 0; i < kRetryCount; ++i) {
+    auto res = DirContainsExactly(path, targets);
+    if (res.ok()) {
+      return res;
+    } else if (i < kRetryCount - 1) {
+      // Sleep if this isn't the final iteration.
+      absl::SleepFor(kRetryDelay);
+    }
+  }
+  return PosixError(ETIMEDOUT,
+                    "Timed out while waiting for directory to contain files ");
+}
+
+TEST(ProcTask, Basic) {
+  EXPECT_NO_ERRNO(
+      DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())}));
+}
+
+std::vector<std::string> TaskFiles(
+    const std::vector<std::string>& initial_contents,
+    const std::vector<pid_t>& pids) {
+  return VecCat<std::string>(
+      initial_contents,
+      ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); },
+                            pids));
+}
+
+std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) {
+  return TaskFiles({".", "..", absl::StrCat(getpid())}, pids);
+}
+
+// Helper class for creating a new task in the current thread group.
+class BlockingChild {
+ public:
+  BlockingChild() : thread_([=] { Start(); }) {}
+  ~BlockingChild() { Join(); }
+
+  pid_t Tid() const {
+    absl::MutexLock ml(&mu_);
+    mu_.Await(absl::Condition(&tid_ready_));
+    return tid_;
+  }
+
+  void Join() { Stop(); }
+
+ private:
+  void Start() {
+    absl::MutexLock ml(&mu_);
+    tid_ = syscall(__NR_gettid);
+    tid_ready_ = true;
+    mu_.Await(absl::Condition(&stop_));
+  }
+
+  void Stop() {
+    absl::MutexLock ml(&mu_);
+    stop_ = true;
+  }
+
+  mutable absl::Mutex mu_;
+  bool stop_ ABSL_GUARDED_BY(mu_) = false;
+  pid_t tid_;
+  bool tid_ready_ ABSL_GUARDED_BY(mu_) = false;
+
+  // Must be last to ensure that the destructor for the thread is run before
+  // any other member of the object is destroyed.
+  ScopedThread thread_;
+};
+
+TEST(ProcTask, NewThreadAppears) {
+  auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task", false));
+  BlockingChild child1;
+  EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task",
+                                     TaskFiles(initial, {child1.Tid()})));
+}
+
+TEST(ProcTask, KilledThreadsDisappear) {
+  auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task/", false));
+
+  BlockingChild child1;
+  EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task",
+                                     TaskFiles(initial, {child1.Tid()})));
+
+  // Stat child1's task file. Regression test for b/32097707.
+  struct stat statbuf;
+  const std::string child1_task_file =
+      absl::StrCat("/proc/self/task/", child1.Tid());
+  EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallSucceeds());
+
+  BlockingChild child2;
+  EXPECT_NO_ERRNO(DirContainsExactly(
+      "/proc/self/task", TaskFiles(initial, {child1.Tid(), child2.Tid()})));
+
+  BlockingChild child3;
+  BlockingChild child4;
+  BlockingChild child5;
+  EXPECT_NO_ERRNO(DirContainsExactly(
+      "/proc/self/task",
+      TaskFiles(initial, {child1.Tid(), child2.Tid(), child3.Tid(),
+                          child4.Tid(), child5.Tid()})));
+
+  child2.Join();
+  EXPECT_NO_ERRNO(EventuallyDirContainsExactly(
+      "/proc/self/task", TaskFiles(initial, {child1.Tid(), child3.Tid(),
+                                             child4.Tid(), child5.Tid()})));
+
+  child1.Join();
+  child4.Join();
+  EXPECT_NO_ERRNO(EventuallyDirContainsExactly(
+      "/proc/self/task", TaskFiles(initial, {child3.Tid(), child5.Tid()})));
+
+  // Stat child1's task file again.  This time it should fail. See b/32097707.
+  EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf),
+              SyscallFailsWithErrno(ENOENT));
+
+  child3.Join();
+  child5.Join();
+  EXPECT_NO_ERRNO(EventuallyDirContainsExactly("/proc/self/task", initial));
+}
+
+TEST(ProcTask, ChildTaskDir) {
+  BlockingChild child1;
+  EXPECT_NO_ERRNO(DirContains("/proc/self/task", TaskFiles({child1.Tid()})));
+  EXPECT_NO_ERRNO(DirContains(absl::StrCat("/proc/", child1.Tid(), "/task"),
+                              TaskFiles({child1.Tid()})));
+}
+
+PosixError VerifyPidDir(std::string path) {
+  return DirContains(path, {"exe", "fd", "io", "maps", "ns", "stat", "status"});
+}
+
+TEST(ProcTask, VerifyTaskDir) {
+  EXPECT_NO_ERRNO(VerifyPidDir("/proc/self"));
+
+  EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", getpid())));
+  BlockingChild child1;
+  EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", child1.Tid())));
+
+  // Only the first level of task directories should contain the 'task'
+  // directory. That is:
+  //
+  // /proc/1234/task           <- should exist
+  // /proc/1234/task/1234/task <- should not exist
+  // /proc/1234/task/1235/task <- should not exist (where 1235 is in the same
+  //                                                thread group as 1234).
+  EXPECT_FALSE(
+      DirContains(absl::StrCat("/proc/self/task/", getpid()), {"task"}).ok())
+      << "Found 'task' directory in an inner directory.";
+}
+
+TEST(ProcTask, TaskDirCannotBeDeleted) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  EXPECT_THAT(rmdir("/proc/self/task"), SyscallFails());
+  EXPECT_THAT(rmdir(absl::StrCat("/proc/self/task/", getpid()).c_str()),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ProcTask, TaskDirHasCorrectMetadata) {
+  struct stat st;
+  EXPECT_THAT(stat("/proc/self/task", &st), SyscallSucceeds());
+  EXPECT_TRUE(S_ISDIR(st.st_mode));
+
+  // Verify file is readable and executable by everyone.
+  mode_t expected_permissions =
+      S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
+  mode_t permissions = st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
+  EXPECT_EQ(expected_permissions, permissions);
+}
+
+TEST(ProcTask, TaskDirCanSeekToEnd) {
+  const FileDescriptor dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/task", O_RDONLY));
+  EXPECT_THAT(lseek(dirfd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(ProcTask, VerifyTaskDirNlinks) {
+  // A task directory will have 3 links if the taskgroup has a single
+  // thread. For example, the following shows where the links to
+  // '/proc/12345/task comes' from for a single threaded process with pid 12345:
+  //
+  //   /proc/12345/task  <-- 1 link for the directory itself
+  //     .               <-- link from "."
+  //     ..
+  //     12345
+  //       .
+  //       ..            <-- link from ".." to parent.
+  //       <other contents of a task dir>
+  //
+  // We can't assert an absolute number of links since we don't control how many
+  // threads the test framework spawns. Instead, we'll ensure creating a new
+  // thread increases the number of links as expected.
+
+  // Once we reach the test body, we can count on the thread count being stable
+  // unless we spawn a new one.
+  uint64_t initial_links = ASSERT_NO_ERRNO_AND_VALUE(Links("/proc/self/task"));
+  ASSERT_GE(initial_links, 3);
+
+  // For each new subtask, we should gain a new link.
+  BlockingChild child1;
+  EXPECT_THAT(Links("/proc/self/task"),
+              IsPosixErrorOkAndHolds(initial_links + 1));
+  BlockingChild child2;
+  EXPECT_THAT(Links("/proc/self/task"),
+              IsPosixErrorOkAndHolds(initial_links + 2));
+}
+
+TEST(ProcTask, CommContainsThreadNameAndTrailingNewline) {
+  constexpr char kThreadName[] = "TestThread12345";
+  ASSERT_THAT(prctl(PR_SET_NAME, kThreadName), SyscallSucceeds());
+
+  auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(
+      GetContents(JoinPath("/proc", absl::StrCat(getpid()), "task",
+                           absl::StrCat(syscall(SYS_gettid)), "comm")));
+  EXPECT_EQ(absl::StrCat(kThreadName, "\n"), thread_name);
+}
+
+TEST(ProcTaskNs, NsDirExistsAndHasCorrectMetadata) {
+  EXPECT_NO_ERRNO(DirContains("/proc/self/ns", {"net", "pid", "user"}));
+
+  // Let's just test the 'pid' entry, all of them are very similar.
+  struct stat st;
+  EXPECT_THAT(lstat("/proc/self/ns/pid", &st), SyscallSucceeds());
+  EXPECT_TRUE(S_ISLNK(st.st_mode));
+
+  auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/ns/pid"));
+  EXPECT_THAT(link, ::testing::StartsWith("pid:["));
+}
+
+TEST(ProcTaskNs, AccessOnNsNodeSucceeds) {
+  EXPECT_THAT(access("/proc/self/ns/pid", F_OK), SyscallSucceeds());
+}
+
+TEST(ProcSysKernelHostname, Exists) {
+  EXPECT_THAT(open("/proc/sys/kernel/hostname", O_RDONLY), SyscallSucceeds());
+}
+
+TEST(ProcSysKernelHostname, MatchesUname) {
+  struct utsname buf;
+  EXPECT_THAT(uname(&buf), SyscallSucceeds());
+  const std::string hostname = absl::StrCat(buf.nodename, "\n");
+  auto procfs_hostname =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/hostname"));
+  EXPECT_EQ(procfs_hostname, hostname);
+}
+
+TEST(ProcSysVmMmapMinAddr, HasNumericValue) {
+  const std::string mmap_min_addr_str =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/mmap_min_addr"));
+  uintptr_t mmap_min_addr;
+  EXPECT_TRUE(absl::SimpleAtoi(mmap_min_addr_str, &mmap_min_addr))
+      << "/proc/sys/vm/mmap_min_addr does not contain a numeric value: "
+      << mmap_min_addr_str;
+}
+
+TEST(ProcSysVmOvercommitMemory, HasNumericValue) {
+  const std::string overcommit_memory_str =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/overcommit_memory"));
+  uintptr_t overcommit_memory;
+  EXPECT_TRUE(absl::SimpleAtoi(overcommit_memory_str, &overcommit_memory))
+      << "/proc/sys/vm/overcommit_memory does not contain a numeric value: "
+      << overcommit_memory;
+}
+
+// Check that link for proc fd entries point the target node, not the
+// symlink itself. Regression test for b/31155070.
+TEST(ProcTaskFd, FstatatFollowsSymlink) {
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  struct stat sproc = {};
+  EXPECT_THAT(
+      fstatat(-1, absl::StrCat("/proc/self/fd/", fd.get()).c_str(), &sproc, 0),
+      SyscallSucceeds());
+
+  struct stat sfile = {};
+  EXPECT_THAT(fstatat(-1, file.path().c_str(), &sfile, 0), SyscallSucceeds());
+
+  // If fstatat follows the fd symlink, the device and inode numbers should
+  // match at a minimum.
+  EXPECT_EQ(sproc.st_dev, sfile.st_dev);
+  EXPECT_EQ(sproc.st_ino, sfile.st_ino);
+  EXPECT_EQ(0, memcmp(&sfile, &sproc, sizeof(sfile)));
+}
+
+TEST(ProcFilesystems, Bug65172365) {
+  std::string proc_filesystems =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/filesystems"));
+  ASSERT_FALSE(proc_filesystems.empty());
+}
+
+TEST(ProcFilesystems, PresenceOfShmMaxMniAll) {
+  uint64_t shmmax = 0;
+  uint64_t shmall = 0;
+  uint64_t shmmni = 0;
+  std::string proc_file;
+  proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmax"));
+  ASSERT_FALSE(proc_file.empty());
+  ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmax));
+  proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmall"));
+  ASSERT_FALSE(proc_file.empty());
+  ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmall));
+  proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmni"));
+  ASSERT_FALSE(proc_file.empty());
+  ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmni));
+
+  ASSERT_GT(shmmax, 0);
+  ASSERT_GT(shmall, 0);
+  ASSERT_GT(shmmni, 0);
+  ASSERT_LE(shmall, shmmax);
+
+  // These values should never be higher than this by default, for more
+  // information see uapi/linux/shm.h
+  ASSERT_LE(shmmax, ULONG_MAX - (1UL << 24));
+  ASSERT_LE(shmall, ULONG_MAX - (1UL << 24));
+}
+
+// Check that /proc/mounts is a symlink to self/mounts.
+TEST(ProcMounts, IsSymlink) {
+  auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/mounts"));
+  EXPECT_EQ(link, "self/mounts");
+}
+
+TEST(ProcSelfMountinfo, RequiredFieldsArePresent) {
+  auto mountinfo =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mountinfo"));
+  EXPECT_THAT(
+      mountinfo,
+      AllOf(
+          // Root mount.
+          ContainsRegex(
+              R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ /\S* / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"),
+          // Proc mount - always rw.
+          ContainsRegex(
+              R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)")));
+}
+
+// Check that /proc/self/mounts looks something like a real mounts file.
+TEST(ProcSelfMounts, RequiredFieldsArePresent) {
+  auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts"));
+  EXPECT_THAT(mounts,
+              AllOf(
+                  // Root mount.
+                  ContainsRegex(R"(\S+ / \S+ (rw|ro)\S* [0-9]+ [0-9]+\s)"),
+                  // Root mount.
+                  ContainsRegex(R"(\S+ /proc \S+ rw\S* [0-9]+ [0-9]+\s)")));
+}
+
+void CheckDuplicatesRecursively(std::string path) {
+  std::vector<std::string> child_dirs;
+
+  // There is the known issue of the linux procfs, that two consequent calls of
+  // readdir can return the same entry twice if between these calls one or more
+  // entries have been removed from this directory.
+  int max_attempts = 5;
+  for (int i = 0; i < max_attempts; i++) {
+    child_dirs.clear();
+    errno = 0;
+    bool success = true;
+    DIR* dir = opendir(path.c_str());
+    if (dir == nullptr) {
+      // Ignore any directories we can't read or missing directories as the
+      // directory could have been deleted/mutated from the time the parent
+      // directory contents were read.
+      return;
+    }
+    auto dir_closer = Cleanup([&dir]() { closedir(dir); });
+    std::unordered_set<std::string> children;
+    while (true) {
+      // Readdir(3): If the end of the directory stream is reached, NULL is
+      // returned and errno is not changed.  If an error occurs, NULL is
+      // returned and errno is set appropriately.  To distinguish end of stream
+      // and from an error, set errno to zero before calling readdir() and then
+      // check the value of errno if NULL is returned.
+      errno = 0;
+      struct dirent* dp = readdir(dir);
+      if (dp == nullptr) {
+        // Linux will return EINVAL when calling getdents on a /proc/tid/net
+        // file corresponding to a zombie task.
+        // See fs/proc/proc_net.c:proc_tgid_net_readdir().
+        //
+        // We just ignore the directory in this case.
+        if (errno == EINVAL && absl::StartsWith(path, "/proc/") &&
+            absl::EndsWith(path, "/net")) {
+          break;
+        }
+
+        // Otherwise, no errors are allowed.
+        ASSERT_EQ(errno, 0) << path;
+        break;  // We're done.
+      }
+
+      const std::string name = dp->d_name;
+
+      if (name == "." || name == "..") {
+        continue;
+      }
+
+      // Ignore a duplicate entry if it isn't the last attempt.
+      if (i == max_attempts - 1) {
+        ASSERT_EQ(children.find(name), children.end())
+            << absl::StrCat(path, "/", name);
+      } else if (children.find(name) != children.end()) {
+        std::cerr << "Duplicate entry: " << i << ":"
+                  << absl::StrCat(path, "/", name) << std::endl;
+        success = false;
+        break;
+      }
+      children.insert(name);
+
+      if (dp->d_type == DT_DIR) {
+        child_dirs.push_back(name);
+      }
+    }
+    if (success) {
+      break;
+    }
+  }
+  for (auto dname = child_dirs.begin(); dname != child_dirs.end(); dname++) {
+    CheckDuplicatesRecursively(absl::StrCat(path, "/", *dname));
+  }
+}
+
+TEST(Proc, NoDuplicates) { CheckDuplicatesRecursively("/proc"); }
+
+// Most /proc/PID files are owned by the task user with SUID_DUMP_USER.
+TEST(ProcPid, UserDumpableOwner) {
+  int before;
+  ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds());
+  auto cleanup = Cleanup([before] {
+    ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds());
+  });
+
+  EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds());
+
+  // This applies to the task directory itself and files inside.
+  struct stat st;
+  ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds());
+  EXPECT_EQ(st.st_uid, geteuid());
+  EXPECT_EQ(st.st_gid, getegid());
+
+  ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds());
+  EXPECT_EQ(st.st_uid, geteuid());
+  EXPECT_EQ(st.st_gid, getegid());
+}
+
+// /proc/PID files are owned by root with SUID_DUMP_DISABLE.
+TEST(ProcPid, RootDumpableOwner) {
+  int before;
+  ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds());
+  auto cleanup = Cleanup([before] {
+    ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds());
+  });
+
+  EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds());
+
+  // This *does not* applies to the task directory itself (or other 0555
+  // directories), but does to files inside.
+  struct stat st;
+  ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds());
+  EXPECT_EQ(st.st_uid, geteuid());
+  EXPECT_EQ(st.st_gid, getegid());
+
+  // This file is owned by root. Also allow nobody in case this test is running
+  // in a userns without root mapped.
+  ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds());
+  EXPECT_THAT(st.st_uid, AnyOf(Eq(0), Eq(65534)));
+  EXPECT_THAT(st.st_gid, AnyOf(Eq(0), Eq(65534)));
+}
+
+TEST(Proc, GetdentsEnoent) {
+  FileDescriptor fd;
+  ASSERT_NO_ERRNO(WithSubprocess(
+      [&](int pid) -> PosixError {
+        // Running.
+        ASSIGN_OR_RETURN_ERRNO(fd, Open(absl::StrCat("/proc/", pid, "/task"),
+                                        O_RDONLY | O_DIRECTORY));
+
+        return NoError();
+      },
+      nullptr, nullptr));
+  char buf[1024];
+  ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+void CheckSyscwFromIOFile(const std::string& path, const std::string& regex) {
+  std::string output;
+  ASSERT_NO_ERRNO(GetContents(path, &output));
+  ASSERT_THAT(output, ContainsRegex(absl::StrCat("syscw:\\s+", regex, "\n")));
+}
+
+// Checks that there is variable accounting of IO between threads/tasks.
+TEST(Proc, PidTidIOAccounting) {
+  absl::Notification notification;
+
+  // Run a thread with a bunch of writes. Check that io account records exactly
+  // the number of write calls. File open/close is there to prevent buffering.
+  ScopedThread writer([&notification] {
+    const int num_writes = 100;
+    for (int i = 0; i < num_writes; i++) {
+      auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+      ASSERT_NO_ERRNO(SetContents(path.path(), "a"));
+    }
+    notification.Notify();
+    const std::string& writer_dir =
+        absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io");
+
+    CheckSyscwFromIOFile(writer_dir, std::to_string(num_writes));
+  });
+
+  // Run a thread and do no writes. Check that no writes are recorded.
+  ScopedThread noop([&notification] {
+    notification.WaitForNotification();
+    const std::string& noop_dir =
+        absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io");
+
+    CheckSyscwFromIOFile(noop_dir, "0");
+  });
+
+  writer.Join();
+  noop.Join();
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  for (int i = 0; i < argc; ++i) {
+    gvisor::testing::saved_argv.emplace_back(std::string(argv[i]));
+  }
+
+  gvisor::testing::TestInit(&argc, &argv);
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc
new file mode 100644
index 000000000..3377b65cf
--- /dev/null
+++ b/test/syscalls/linux/proc_net.cc
@@ -0,0 +1,482 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/clock.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+constexpr const char kProcNet[] = "/proc/net";
+
+TEST(ProcNetSymlinkTarget, FileMode) {
+  struct stat s;
+  ASSERT_THAT(stat(kProcNet, &s), SyscallSucceeds());
+  EXPECT_EQ(s.st_mode & S_IFMT, S_IFDIR);
+  EXPECT_EQ(s.st_mode & 0777, 0555);
+}
+
+TEST(ProcNetSymlink, FileMode) {
+  struct stat s;
+  ASSERT_THAT(lstat(kProcNet, &s), SyscallSucceeds());
+  EXPECT_EQ(s.st_mode & S_IFMT, S_IFLNK);
+  EXPECT_EQ(s.st_mode & 0777, 0777);
+}
+
+TEST(ProcNetSymlink, Contents) {
+  char buf[40] = {};
+  int n = readlink(kProcNet, buf, sizeof(buf));
+  ASSERT_THAT(n, SyscallSucceeds());
+
+  buf[n] = 0;
+  EXPECT_STREQ(buf, "self/net");
+}
+
+TEST(ProcNetIfInet6, Format) {
+  auto ifinet6 = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/if_inet6"));
+  EXPECT_THAT(ifinet6,
+              ::testing::MatchesRegex(
+                  // Ex: "00000000000000000000000000000001 01 80 10 80 lo\n"
+                  "^([a-f0-9]{32}( [a-f0-9]{2}){4} +[a-z][a-z0-9]*\n)+$"));
+}
+
+TEST(ProcSysNetIpv4Sack, Exists) {
+  EXPECT_THAT(open("/proc/sys/net/ipv4/tcp_sack", O_RDONLY), SyscallSucceeds());
+}
+
+TEST(ProcSysNetIpv4Sack, CanReadAndWrite) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_DAC_OVERRIDE))));
+
+  auto const fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/sys/net/ipv4/tcp_sack", O_RDWR));
+
+  char buf;
+  EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_TRUE(buf == '0' || buf == '1') << "unexpected tcp_sack: " << buf;
+
+  char to_write = (buf == '1') ? '0' : '1';
+  EXPECT_THAT(PwriteFd(fd.get(), &to_write, sizeof(to_write), 0),
+              SyscallSucceedsWithValue(sizeof(to_write)));
+
+  buf = 0;
+  EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(buf)));
+  EXPECT_EQ(buf, to_write);
+}
+
+// DeviceEntry is an entry in /proc/net/dev
+struct DeviceEntry {
+  std::string name;
+  uint64_t stats[16];
+};
+
+PosixErrorOr<std::vector<DeviceEntry>> GetDeviceMetricsFromProc(
+    const std::string dev) {
+  std::vector<std::string> lines = absl::StrSplit(dev, '\n');
+  std::vector<DeviceEntry> entries;
+
+  // /proc/net/dev prints 2 lines of headers followed by a line of metrics for
+  // each network interface.
+  for (unsigned i = 2; i < lines.size(); i++) {
+    // Ignore empty lines.
+    if (lines[i].empty()) {
+      continue;
+    }
+
+    std::vector<std::string> values =
+        absl::StrSplit(lines[i], ' ', absl::SkipWhitespace());
+
+    // Interface name + 16 values.
+    if (values.size() != 17) {
+      return PosixError(EINVAL, "invalid line: " + lines[i]);
+    }
+
+    DeviceEntry entry;
+    entry.name = values[0];
+    // Skip the interface name and read only the values.
+    for (unsigned j = 1; j < 17; j++) {
+      uint64_t num;
+      if (!absl::SimpleAtoi(values[j], &num)) {
+        return PosixError(EINVAL, "invalid value: " + values[j]);
+      }
+      entry.stats[j - 1] = num;
+    }
+
+    entries.push_back(entry);
+  }
+
+  return entries;
+}
+
+// TEST(ProcNetDev, Format) tests that /proc/net/dev is parsable and
+// contains at least one entry.
+TEST(ProcNetDev, Format) {
+  auto dev = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/dev"));
+  auto entries = ASSERT_NO_ERRNO_AND_VALUE(GetDeviceMetricsFromProc(dev));
+
+  EXPECT_GT(entries.size(), 0);
+}
+
+PosixErrorOr<uint64_t> GetSNMPMetricFromProc(const std::string snmp,
+                                             const std::string& type,
+                                             const std::string& item) {
+  std::vector<std::string> snmp_vec = absl::StrSplit(snmp, '\n');
+
+  // /proc/net/snmp prints a line of headers followed by a line of metrics.
+  // Only search the headers.
+  for (unsigned i = 0; i < snmp_vec.size(); i = i + 2) {
+    if (!absl::StartsWith(snmp_vec[i], type)) continue;
+
+    std::vector<std::string> fields =
+        absl::StrSplit(snmp_vec[i], ' ', absl::SkipWhitespace());
+
+    EXPECT_TRUE((i + 1) < snmp_vec.size());
+    std::vector<std::string> values =
+        absl::StrSplit(snmp_vec[i + 1], ' ', absl::SkipWhitespace());
+
+    EXPECT_TRUE(!fields.empty() && fields.size() == values.size());
+
+    // Metrics start at the first index.
+    for (unsigned j = 1; j < fields.size(); j++) {
+      if (fields[j] == item) {
+        uint64_t val;
+        if (!absl::SimpleAtoi(values[j], &val)) {
+          return PosixError(EINVAL,
+                            absl::StrCat("field is not a number: ", values[j]));
+        }
+
+        return val;
+      }
+    }
+  }
+  // We should never get here.
+  return PosixError(
+      EINVAL, absl::StrCat("failed to find ", type, "/", item, " in:", snmp));
+}
+
+TEST(ProcNetSnmp, TcpReset_NoRandomSave) {
+  // TODO(gvisor.dev/issue/866): epsocket metrics are not savable.
+  DisableSave ds;
+
+  uint64_t oldAttemptFails;
+  uint64_t oldActiveOpens;
+  uint64_t oldOutRsts;
+  auto snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+  oldActiveOpens = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "ActiveOpens"));
+  oldOutRsts =
+      ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Tcp", "OutRsts"));
+  oldAttemptFails = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "AttemptFails"));
+
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, 0));
+
+  struct sockaddr_in sin = {
+      .sin_family = AF_INET,
+      .sin_port = htons(1234),
+  };
+
+  ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1);
+  ASSERT_THAT(connect(s.get(), (struct sockaddr*)&sin, sizeof(sin)),
+              SyscallFailsWithErrno(ECONNREFUSED));
+
+  uint64_t newAttemptFails;
+  uint64_t newActiveOpens;
+  uint64_t newOutRsts;
+  snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+  newActiveOpens = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "ActiveOpens"));
+  newOutRsts =
+      ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Tcp", "OutRsts"));
+  newAttemptFails = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "AttemptFails"));
+
+  EXPECT_EQ(oldActiveOpens, newActiveOpens - 1);
+  EXPECT_EQ(oldOutRsts, newOutRsts - 1);
+  EXPECT_EQ(oldAttemptFails, newAttemptFails - 1);
+}
+
+TEST(ProcNetSnmp, TcpEstab_NoRandomSave) {
+  // TODO(gvisor.dev/issue/866): epsocket metrics are not savable.
+  DisableSave ds;
+
+  uint64_t oldEstabResets;
+  uint64_t oldActiveOpens;
+  uint64_t oldPassiveOpens;
+  uint64_t oldCurrEstab;
+  auto snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+  oldActiveOpens = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "ActiveOpens"));
+  oldPassiveOpens = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "PassiveOpens"));
+  oldCurrEstab = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "CurrEstab"));
+  oldEstabResets = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "EstabResets"));
+
+  FileDescriptor s_listen =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, 0));
+  struct sockaddr_in sin = {
+      .sin_family = AF_INET,
+      .sin_port = 0,
+  };
+
+  ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1);
+  ASSERT_THAT(bind(s_listen.get(), (struct sockaddr*)&sin, sizeof(sin)),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(s_listen.get(), 1), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = sizeof(sin);
+  ASSERT_THAT(
+      getsockname(s_listen.get(), reinterpret_cast<sockaddr*>(&sin), &addrlen),
+      SyscallSucceeds());
+
+  FileDescriptor s_connect =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, 0));
+  ASSERT_THAT(connect(s_connect.get(), (struct sockaddr*)&sin, sizeof(sin)),
+              SyscallSucceeds());
+
+  auto s_accept =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(s_listen.get(), nullptr, nullptr));
+
+  uint64_t newEstabResets;
+  uint64_t newActiveOpens;
+  uint64_t newPassiveOpens;
+  uint64_t newCurrEstab;
+  snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+  newActiveOpens = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "ActiveOpens"));
+  newPassiveOpens = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "PassiveOpens"));
+  newCurrEstab = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "CurrEstab"));
+
+  EXPECT_EQ(oldActiveOpens, newActiveOpens - 1);
+  EXPECT_EQ(oldPassiveOpens, newPassiveOpens - 1);
+  EXPECT_EQ(oldCurrEstab, newCurrEstab - 2);
+
+  // Send 1 byte from client to server.
+  ASSERT_THAT(send(s_connect.get(), "a", 1, 0), SyscallSucceedsWithValue(1));
+
+  constexpr int kPollTimeoutMs = 20000;  // Wait up to 20 seconds for the data.
+
+  // Wait until server-side fd sees the data on its side but don't read it.
+  struct pollfd poll_fd = {s_accept.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Now close server-side fd without reading the data which leads to a RST
+  // packet sent to client side.
+  s_accept.reset(-1);
+
+  // Wait until client-side fd sees RST packet.
+  struct pollfd poll_fd1 = {s_connect.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd1, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Now close client-side fd.
+  s_connect.reset(-1);
+
+  // Wait until the process of the netstack.
+  absl::SleepFor(absl::Seconds(1));
+
+  snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+  newCurrEstab = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "CurrEstab"));
+  newEstabResets = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Tcp", "EstabResets"));
+
+  EXPECT_EQ(oldCurrEstab, newCurrEstab);
+  EXPECT_EQ(oldEstabResets, newEstabResets - 2);
+}
+
+TEST(ProcNetSnmp, UdpNoPorts_NoRandomSave) {
+  // TODO(gvisor.dev/issue/866): epsocket metrics are not savable.
+  DisableSave ds;
+
+  uint64_t oldOutDatagrams;
+  uint64_t oldNoPorts;
+  auto snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+  oldOutDatagrams = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Udp", "OutDatagrams"));
+  oldNoPorts =
+      ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Udp", "NoPorts"));
+
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+  struct sockaddr_in sin = {
+      .sin_family = AF_INET,
+      .sin_port = htons(4444),
+  };
+  ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1);
+  ASSERT_THAT(sendto(s.get(), "a", 1, 0, (struct sockaddr*)&sin, sizeof(sin)),
+              SyscallSucceedsWithValue(1));
+
+  uint64_t newOutDatagrams;
+  uint64_t newNoPorts;
+  snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+  newOutDatagrams = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Udp", "OutDatagrams"));
+  newNoPorts =
+      ASSERT_NO_ERRNO_AND_VALUE(GetSNMPMetricFromProc(snmp, "Udp", "NoPorts"));
+
+  EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1);
+  EXPECT_EQ(oldNoPorts, newNoPorts - 1);
+}
+
+TEST(ProcNetSnmp, UdpIn_NoRandomSave) {
+  // TODO(gvisor.dev/issue/866): epsocket metrics are not savable.
+  const DisableSave ds;
+
+  uint64_t oldOutDatagrams;
+  uint64_t oldInDatagrams;
+  auto snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+  oldOutDatagrams = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Udp", "OutDatagrams"));
+  oldInDatagrams = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Udp", "InDatagrams"));
+
+  std::cerr << "snmp: " << std::endl << snmp << std::endl;
+  FileDescriptor server =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  struct sockaddr_in sin = {
+      .sin_family = AF_INET,
+      .sin_port = htons(0),
+  };
+  ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1);
+  ASSERT_THAT(bind(server.get(), (struct sockaddr*)&sin, sizeof(sin)),
+              SyscallSucceeds());
+  // Get the port bound by the server socket.
+  socklen_t addrlen = sizeof(sin);
+  ASSERT_THAT(
+      getsockname(server.get(), reinterpret_cast<sockaddr*>(&sin), &addrlen),
+      SyscallSucceeds());
+
+  FileDescriptor client =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  ASSERT_THAT(
+      sendto(client.get(), "a", 1, 0, (struct sockaddr*)&sin, sizeof(sin)),
+      SyscallSucceedsWithValue(1));
+
+  char buf[128];
+  ASSERT_THAT(recvfrom(server.get(), buf, sizeof(buf), 0, NULL, NULL),
+              SyscallSucceedsWithValue(1));
+
+  uint64_t newOutDatagrams;
+  uint64_t newInDatagrams;
+  snmp = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+  std::cerr << "new snmp: " << std::endl << snmp << std::endl;
+  newOutDatagrams = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Udp", "OutDatagrams"));
+  newInDatagrams = ASSERT_NO_ERRNO_AND_VALUE(
+      GetSNMPMetricFromProc(snmp, "Udp", "InDatagrams"));
+
+  EXPECT_EQ(oldOutDatagrams, newOutDatagrams - 1);
+  EXPECT_EQ(oldInDatagrams, newInDatagrams - 1);
+}
+
+TEST(ProcNetSnmp, CheckNetStat) {
+  // TODO(b/155123175): SNMP and netstat don't work on gVisor.
+  SKIP_IF(IsRunningOnGvisor());
+
+  std::string contents =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/netstat"));
+
+  int name_count = 0;
+  int value_count = 0;
+  std::vector<absl::string_view> lines = absl::StrSplit(contents, '\n');
+  for (int i = 0; i + 1 < lines.size(); i += 2) {
+    std::vector<absl::string_view> names =
+        absl::StrSplit(lines[i], absl::ByAnyChar("\t "));
+    std::vector<absl::string_view> values =
+        absl::StrSplit(lines[i + 1], absl::ByAnyChar("\t "));
+    EXPECT_EQ(names.size(), values.size()) << " mismatch in lines '" << lines[i]
+                                           << "' and '" << lines[i + 1] << "'";
+    for (int j = 0; j < names.size() && j < values.size(); ++j) {
+      if (names[j] == "TCPOrigDataSent" || names[j] == "TCPSynRetrans" ||
+          names[j] == "TCPDSACKRecv" || names[j] == "TCPDSACKOfoRecv") {
+        ++name_count;
+        int64_t val;
+        if (absl::SimpleAtoi(values[j], &val)) {
+          ++value_count;
+        }
+      }
+    }
+  }
+  EXPECT_EQ(name_count, 4);
+  EXPECT_EQ(value_count, 4);
+}
+
+TEST(ProcNetSnmp, Stat) {
+  struct stat st = {};
+  ASSERT_THAT(stat("/proc/net/snmp", &st), SyscallSucceeds());
+}
+
+TEST(ProcNetSnmp, CheckSnmp) {
+  // TODO(b/155123175): SNMP and netstat don't work on gVisor.
+  SKIP_IF(IsRunningOnGvisor());
+
+  std::string contents =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp"));
+
+  int name_count = 0;
+  int value_count = 0;
+  std::vector<absl::string_view> lines = absl::StrSplit(contents, '\n');
+  for (int i = 0; i + 1 < lines.size(); i += 2) {
+    std::vector<absl::string_view> names =
+        absl::StrSplit(lines[i], absl::ByAnyChar("\t "));
+    std::vector<absl::string_view> values =
+        absl::StrSplit(lines[i + 1], absl::ByAnyChar("\t "));
+    EXPECT_EQ(names.size(), values.size()) << " mismatch in lines '" << lines[i]
+                                           << "' and '" << lines[i + 1] << "'";
+    for (int j = 0; j < names.size() && j < values.size(); ++j) {
+      if (names[j] == "RetransSegs") {
+        ++name_count;
+        int64_t val;
+        if (absl::SimpleAtoi(values[j], &val)) {
+          ++value_count;
+        }
+      }
+    }
+  }
+  EXPECT_EQ(name_count, 1);
+  EXPECT_EQ(value_count, 1);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/proc_net_tcp.cc b/test/syscalls/linux/proc_net_tcp.cc
new file mode 100644
index 000000000..5b6e3e3cd
--- /dev/null
+++ b/test/syscalls/linux/proc_net_tcp.cc
@@ -0,0 +1,496 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/tcp.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using absl::StrCat;
+using absl::StrSplit;
+
+constexpr char kProcNetTCPHeader[] =
+    "  sl  local_address rem_address   st tx_queue rx_queue tr tm->when "
+    "retrnsmt   uid  timeout inode                                             "
+    "        ";
+
+// TCPEntry represents a single entry from /proc/net/tcp.
+struct TCPEntry {
+  uint32_t local_addr;
+  uint16_t local_port;
+
+  uint32_t remote_addr;
+  uint16_t remote_port;
+
+  uint64_t state;
+  uint64_t uid;
+  uint64_t inode;
+};
+
+// Finds the first entry in 'entries' for which 'predicate' returns true.
+// Returns true on match, and sets 'match' to a copy of the matching entry. If
+// 'match' is null, it's ignored.
+bool FindBy(const std::vector<TCPEntry>& entries, TCPEntry* match,
+            std::function<bool(const TCPEntry&)> predicate) {
+  for (const TCPEntry& entry : entries) {
+    if (predicate(entry)) {
+      if (match != nullptr) {
+        *match = entry;
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+bool FindByLocalAddr(const std::vector<TCPEntry>& entries, TCPEntry* match,
+                     const struct sockaddr* addr) {
+  uint32_t host = IPFromInetSockaddr(addr);
+  uint16_t port = PortFromInetSockaddr(addr);
+  return FindBy(entries, match, [host, port](const TCPEntry& e) {
+    return (e.local_addr == host && e.local_port == port);
+  });
+}
+
+bool FindByRemoteAddr(const std::vector<TCPEntry>& entries, TCPEntry* match,
+                      const struct sockaddr* addr) {
+  uint32_t host = IPFromInetSockaddr(addr);
+  uint16_t port = PortFromInetSockaddr(addr);
+  return FindBy(entries, match, [host, port](const TCPEntry& e) {
+    return (e.remote_addr == host && e.remote_port == port);
+  });
+}
+
+// Returns a parsed representation of /proc/net/tcp entries.
+PosixErrorOr<std::vector<TCPEntry>> ProcNetTCPEntries() {
+  std::string content;
+  RETURN_IF_ERRNO(GetContents("/proc/net/tcp", &content));
+
+  bool found_header = false;
+  std::vector<TCPEntry> entries;
+  std::vector<std::string> lines = StrSplit(content, '\n');
+  std::cerr << "<contents of /proc/net/tcp>" << std::endl;
+  for (const std::string& line : lines) {
+    std::cerr << line << std::endl;
+
+    if (!found_header) {
+      EXPECT_EQ(line, kProcNetTCPHeader);
+      found_header = true;
+      continue;
+    }
+    if (line.empty()) {
+      continue;
+    }
+
+    // Parse a single entry from /proc/net/tcp.
+    //
+    // Example entries:
+    //
+    // clang-format off
+    //
+    //  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
+    //   0: 00000000:006F 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 1968 1 0000000000000000 100 0 0 10 0
+    //   1: 0100007F:7533 00000000:0000 0A 00000000:00000000 00:00000000 00000000   120        0 10684 1 0000000000000000 100 0 0 10 0
+    //   ^     ^       ^     ^       ^   ^     ^       ^      ^     ^        ^       ^         ^   ^   ^      ^            ^  ^ ^  ^ ^
+    //   0     1       2     3       4   5     6       7      8     9       10       11       12  13  14     15           16 17 18 19 20
+    //
+    // clang-format on
+
+    TCPEntry entry;
+    std::vector<std::string> fields =
+        StrSplit(line, absl::ByAnyChar(": "), absl::SkipEmpty());
+
+    ASSIGN_OR_RETURN_ERRNO(entry.local_addr, AtoiBase(fields[1], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.local_port, AtoiBase(fields[2], 16));
+
+    ASSIGN_OR_RETURN_ERRNO(entry.remote_addr, AtoiBase(fields[3], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16));
+
+    ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi<uint64_t>(fields[11]));
+    ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi<uint64_t>(fields[13]));
+
+    entries.push_back(entry);
+  }
+  std::cerr << "<end of /proc/net/tcp>" << std::endl;
+
+  return entries;
+}
+
+TEST(ProcNetTCP, Exists) {
+  const std::string content =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/tcp"));
+  const std::string header_line = StrCat(kProcNetTCPHeader, "\n");
+  if (IsRunningOnGvisor()) {
+    // Should be just the header since we don't have any tcp sockets yet.
+    EXPECT_EQ(content, header_line);
+  } else {
+    // On a general linux machine, we could have abitrary sockets on the system,
+    // so just check the header.
+    EXPECT_THAT(content, ::testing::StartsWith(header_line));
+  }
+}
+
+TEST(ProcNetTCP, EntryUID) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create());
+  std::vector<TCPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  TCPEntry e;
+  ASSERT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr()));
+  EXPECT_EQ(e.uid, geteuid());
+  ASSERT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr()));
+  EXPECT_EQ(e.uid, geteuid());
+}
+
+TEST(ProcNetTCP, BindAcceptConnect) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create());
+  std::vector<TCPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  // We can only make assertions about the total number of entries if we control
+  // the entire "machine".
+  if (IsRunningOnGvisor()) {
+    EXPECT_EQ(entries.size(), 2);
+  }
+
+  EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->first_addr()));
+  EXPECT_TRUE(FindByRemoteAddr(entries, nullptr, sockets->first_addr()));
+}
+
+TEST(ProcNetTCP, InodeReasonable) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create());
+  std::vector<TCPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+
+  TCPEntry accepted_entry;
+  ASSERT_TRUE(FindByLocalAddr(entries, &accepted_entry, sockets->first_addr()));
+  EXPECT_NE(accepted_entry.inode, 0);
+
+  TCPEntry client_entry;
+  ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, sockets->first_addr()));
+  EXPECT_NE(client_entry.inode, 0);
+  EXPECT_NE(accepted_entry.inode, client_entry.inode);
+}
+
+TEST(ProcNetTCP, State) {
+  std::unique_ptr<FileDescriptor> server =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create());
+
+  auto test_addr = V4Loopback();
+  ASSERT_THAT(
+      bind(server->get(), reinterpret_cast<struct sockaddr*>(&test_addr.addr),
+           test_addr.addr_len),
+      SyscallSucceeds());
+
+  struct sockaddr addr;
+  socklen_t addrlen = sizeof(struct sockaddr);
+  ASSERT_THAT(getsockname(server->get(), &addr, &addrlen), SyscallSucceeds());
+  ASSERT_EQ(addrlen, sizeof(struct sockaddr));
+
+  ASSERT_THAT(listen(server->get(), 10), SyscallSucceeds());
+  std::vector<TCPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  TCPEntry listen_entry;
+  ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr));
+  EXPECT_EQ(listen_entry.state, TCP_LISTEN);
+
+  std::unique_ptr<FileDescriptor> client =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create());
+  ASSERT_THAT(RetryEINTR(connect)(client->get(), &addr, addrlen),
+              SyscallSucceeds());
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr));
+  EXPECT_EQ(listen_entry.state, TCP_LISTEN);
+  TCPEntry client_entry;
+  ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, &addr));
+  EXPECT_EQ(client_entry.state, TCP_ESTABLISHED);
+
+  FileDescriptor accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(server->get(), nullptr, nullptr));
+
+  const uint32_t accepted_local_host = IPFromInetSockaddr(&addr);
+  const uint16_t accepted_local_port = PortFromInetSockaddr(&addr);
+
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+  TCPEntry accepted_entry;
+  ASSERT_TRUE(FindBy(entries, &accepted_entry,
+                     [client_entry, accepted_local_host,
+                      accepted_local_port](const TCPEntry& e) {
+                       return e.local_addr == accepted_local_host &&
+                              e.local_port == accepted_local_port &&
+                              e.remote_addr == client_entry.local_addr &&
+                              e.remote_port == client_entry.local_port;
+                     }));
+  EXPECT_EQ(accepted_entry.state, TCP_ESTABLISHED);
+}
+
+constexpr char kProcNetTCP6Header[] =
+    "  sl  local_address                         remote_address"
+    "                        st tx_queue rx_queue tr tm->when retrnsmt"
+    "   uid  timeout inode";
+
+// TCP6Entry represents a single entry from /proc/net/tcp6.
+struct TCP6Entry {
+  struct in6_addr local_addr;
+  uint16_t local_port;
+
+  struct in6_addr remote_addr;
+  uint16_t remote_port;
+
+  uint64_t state;
+  uint64_t uid;
+  uint64_t inode;
+};
+
+bool IPv6AddrEqual(const struct in6_addr* a1, const struct in6_addr* a2) {
+  return memcmp(a1, a2, sizeof(struct in6_addr)) == 0;
+}
+
+// Finds the first entry in 'entries' for which 'predicate' returns true.
+// Returns true on match, and sets 'match' to a copy of the matching entry. If
+// 'match' is null, it's ignored.
+bool FindBy6(const std::vector<TCP6Entry>& entries, TCP6Entry* match,
+             std::function<bool(const TCP6Entry&)> predicate) {
+  for (const TCP6Entry& entry : entries) {
+    if (predicate(entry)) {
+      if (match != nullptr) {
+        *match = entry;
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+const struct in6_addr* IP6FromInetSockaddr(const struct sockaddr* addr) {
+  auto* addr6 = reinterpret_cast<const struct sockaddr_in6*>(addr);
+  return &addr6->sin6_addr;
+}
+
+bool FindByLocalAddr6(const std::vector<TCP6Entry>& entries, TCP6Entry* match,
+                      const struct sockaddr* addr) {
+  const struct in6_addr* local = IP6FromInetSockaddr(addr);
+  uint16_t port = PortFromInetSockaddr(addr);
+  return FindBy6(entries, match, [local, port](const TCP6Entry& e) {
+    return (IPv6AddrEqual(&e.local_addr, local) && e.local_port == port);
+  });
+}
+
+bool FindByRemoteAddr6(const std::vector<TCP6Entry>& entries, TCP6Entry* match,
+                       const struct sockaddr* addr) {
+  const struct in6_addr* remote = IP6FromInetSockaddr(addr);
+  uint16_t port = PortFromInetSockaddr(addr);
+  return FindBy6(entries, match, [remote, port](const TCP6Entry& e) {
+    return (IPv6AddrEqual(&e.remote_addr, remote) && e.remote_port == port);
+  });
+}
+
+void ReadIPv6Address(std::string s, struct in6_addr* addr) {
+  uint32_t a0, a1, a2, a3;
+  const char* fmt = "%08X%08X%08X%08X";
+  EXPECT_EQ(sscanf(s.c_str(), fmt, &a0, &a1, &a2, &a3), 4);
+
+  uint8_t* b = addr->s6_addr;
+  *((uint32_t*)&b[0]) = a0;
+  *((uint32_t*)&b[4]) = a1;
+  *((uint32_t*)&b[8]) = a2;
+  *((uint32_t*)&b[12]) = a3;
+}
+
+// Returns a parsed representation of /proc/net/tcp6 entries.
+PosixErrorOr<std::vector<TCP6Entry>> ProcNetTCP6Entries() {
+  std::string content;
+  RETURN_IF_ERRNO(GetContents("/proc/net/tcp6", &content));
+
+  bool found_header = false;
+  std::vector<TCP6Entry> entries;
+  std::vector<std::string> lines = StrSplit(content, '\n');
+  std::cerr << "<contents of /proc/net/tcp6>" << std::endl;
+  for (const std::string& line : lines) {
+    std::cerr << line << std::endl;
+
+    if (!found_header) {
+      EXPECT_EQ(line, kProcNetTCP6Header);
+      found_header = true;
+      continue;
+    }
+    if (line.empty()) {
+      continue;
+    }
+
+    // Parse a single entry from /proc/net/tcp6.
+    //
+    // Example entries:
+    //
+    // clang-format off
+    //
+    //  sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
+    //   0: 00000000000000000000000000000000:1F90 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 876340 1 ffff8803da9c9380 100 0 0 10 0
+    //   1: 00000000000000000000000000000000:C350 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 876987 1 ffff8803ec408000 100 0 0 10 0
+    //   ^                  ^                  ^                  ^                  ^   ^     ^       ^      ^     ^        ^       ^         ^    ^   ^      ^            ^  ^ ^  ^ ^
+    //   0                  1                  2                  3                  4   5     6       7      8     9       10       11       12   13  14     15           16 17 18 19 20
+    //
+    // clang-format on
+
+    TCP6Entry entry;
+    std::vector<std::string> fields =
+        StrSplit(line, absl::ByAnyChar(": "), absl::SkipEmpty());
+
+    ReadIPv6Address(fields[1], &entry.local_addr);
+    ASSIGN_OR_RETURN_ERRNO(entry.local_port, AtoiBase(fields[2], 16));
+    ReadIPv6Address(fields[3], &entry.remote_addr);
+    ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi<uint64_t>(fields[11]));
+    ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi<uint64_t>(fields[13]));
+
+    entries.push_back(entry);
+  }
+  std::cerr << "<end of /proc/net/tcp6>" << std::endl;
+
+  return entries;
+}
+
+TEST(ProcNetTCP6, Exists) {
+  const std::string content =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/tcp6"));
+  const std::string header_line = StrCat(kProcNetTCP6Header, "\n");
+  if (IsRunningOnGvisor()) {
+    // Should be just the header since we don't have any tcp sockets yet.
+    EXPECT_EQ(content, header_line);
+  } else {
+    // On a general linux machine, we could have abitrary sockets on the system,
+    // so just check the header.
+    EXPECT_THAT(content, ::testing::StartsWith(header_line));
+  }
+}
+
+TEST(ProcNetTCP6, EntryUID) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPAcceptBindSocketPair(0).Create());
+  std::vector<TCP6Entry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries());
+  TCP6Entry e;
+
+  ASSERT_TRUE(FindByLocalAddr6(entries, &e, sockets->first_addr()));
+  EXPECT_EQ(e.uid, geteuid());
+  ASSERT_TRUE(FindByRemoteAddr6(entries, &e, sockets->first_addr()));
+  EXPECT_EQ(e.uid, geteuid());
+}
+
+TEST(ProcNetTCP6, BindAcceptConnect) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPAcceptBindSocketPair(0).Create());
+  std::vector<TCP6Entry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries());
+  // We can only make assertions about the total number of entries if we control
+  // the entire "machine".
+  if (IsRunningOnGvisor()) {
+    EXPECT_EQ(entries.size(), 2);
+  }
+
+  EXPECT_TRUE(FindByLocalAddr6(entries, nullptr, sockets->first_addr()));
+  EXPECT_TRUE(FindByRemoteAddr6(entries, nullptr, sockets->first_addr()));
+}
+
+TEST(ProcNetTCP6, InodeReasonable) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPAcceptBindSocketPair(0).Create());
+  std::vector<TCP6Entry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries());
+
+  TCP6Entry accepted_entry;
+
+  ASSERT_TRUE(
+      FindByLocalAddr6(entries, &accepted_entry, sockets->first_addr()));
+  EXPECT_NE(accepted_entry.inode, 0);
+
+  TCP6Entry client_entry;
+  ASSERT_TRUE(FindByRemoteAddr6(entries, &client_entry, sockets->first_addr()));
+  EXPECT_NE(client_entry.inode, 0);
+  EXPECT_NE(accepted_entry.inode, client_entry.inode);
+}
+
+TEST(ProcNetTCP6, State) {
+  std::unique_ptr<FileDescriptor> server =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPUnboundSocket(0).Create());
+
+  auto test_addr = V6Loopback();
+  ASSERT_THAT(
+      bind(server->get(), reinterpret_cast<struct sockaddr*>(&test_addr.addr),
+           test_addr.addr_len),
+      SyscallSucceeds());
+
+  struct sockaddr_in6 addr6;
+  socklen_t addrlen = sizeof(struct sockaddr_in6);
+  auto* addr = reinterpret_cast<struct sockaddr*>(&addr6);
+  ASSERT_THAT(getsockname(server->get(), addr, &addrlen), SyscallSucceeds());
+  ASSERT_EQ(addrlen, sizeof(struct sockaddr_in6));
+
+  ASSERT_THAT(listen(server->get(), 10), SyscallSucceeds());
+  std::vector<TCP6Entry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries());
+  TCP6Entry listen_entry;
+
+  ASSERT_TRUE(FindByLocalAddr6(entries, &listen_entry, addr));
+  EXPECT_EQ(listen_entry.state, TCP_LISTEN);
+
+  std::unique_ptr<FileDescriptor> client =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv6TCPUnboundSocket(0).Create());
+  ASSERT_THAT(RetryEINTR(connect)(client->get(), addr, addrlen),
+              SyscallSucceeds());
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries());
+  ASSERT_TRUE(FindByLocalAddr6(entries, &listen_entry, addr));
+  EXPECT_EQ(listen_entry.state, TCP_LISTEN);
+  TCP6Entry client_entry;
+  ASSERT_TRUE(FindByRemoteAddr6(entries, &client_entry, addr));
+  EXPECT_EQ(client_entry.state, TCP_ESTABLISHED);
+
+  FileDescriptor accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(server->get(), nullptr, nullptr));
+
+  const struct in6_addr* local = IP6FromInetSockaddr(addr);
+  const uint16_t accepted_local_port = PortFromInetSockaddr(addr);
+
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries());
+  TCP6Entry accepted_entry;
+  ASSERT_TRUE(FindBy6(
+      entries, &accepted_entry,
+      [client_entry, local, accepted_local_port](const TCP6Entry& e) {
+        return IPv6AddrEqual(&e.local_addr, local) &&
+               e.local_port == accepted_local_port &&
+               IPv6AddrEqual(&e.remote_addr, &client_entry.local_addr) &&
+               e.remote_port == client_entry.local_port;
+      }));
+  EXPECT_EQ(accepted_entry.state, TCP_ESTABLISHED);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/proc_net_udp.cc b/test/syscalls/linux/proc_net_udp.cc
new file mode 100644
index 000000000..786b4b4af
--- /dev/null
+++ b/test/syscalls/linux/proc_net_udp.cc
@@ -0,0 +1,309 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/tcp.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using absl::StrCat;
+using absl::StrFormat;
+using absl::StrSplit;
+
+constexpr char kProcNetUDPHeader[] =
+    "  sl  local_address rem_address   st tx_queue rx_queue tr tm->when "
+    "retrnsmt   uid  timeout inode ref pointer drops             ";
+
+// UDPEntry represents a single entry from /proc/net/udp.
+struct UDPEntry {
+  uint32_t local_addr;
+  uint16_t local_port;
+
+  uint32_t remote_addr;
+  uint16_t remote_port;
+
+  uint64_t state;
+  uint64_t uid;
+  uint64_t inode;
+};
+
+std::string DescribeFirstInetSocket(const SocketPair& sockets) {
+  const struct sockaddr* addr = sockets.first_addr();
+  return StrFormat("First test socket: fd:%d %8X:%4X", sockets.first_fd(),
+                   IPFromInetSockaddr(addr), PortFromInetSockaddr(addr));
+}
+
+std::string DescribeSecondInetSocket(const SocketPair& sockets) {
+  const struct sockaddr* addr = sockets.second_addr();
+  return StrFormat("Second test socket fd:%d %8X:%4X", sockets.second_fd(),
+                   IPFromInetSockaddr(addr), PortFromInetSockaddr(addr));
+}
+
+// Finds the first entry in 'entries' for which 'predicate' returns true.
+// Returns true on match, and set 'match' to a copy of the matching entry. If
+// 'match' is null, it's ignored.
+bool FindBy(const std::vector<UDPEntry>& entries, UDPEntry* match,
+            std::function<bool(const UDPEntry&)> predicate) {
+  for (const UDPEntry& entry : entries) {
+    if (predicate(entry)) {
+      if (match != nullptr) {
+        *match = entry;
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+bool FindByLocalAddr(const std::vector<UDPEntry>& entries, UDPEntry* match,
+                     const struct sockaddr* addr) {
+  uint32_t host = IPFromInetSockaddr(addr);
+  uint16_t port = PortFromInetSockaddr(addr);
+  return FindBy(entries, match, [host, port](const UDPEntry& e) {
+    return (e.local_addr == host && e.local_port == port);
+  });
+}
+
+bool FindByRemoteAddr(const std::vector<UDPEntry>& entries, UDPEntry* match,
+                      const struct sockaddr* addr) {
+  uint32_t host = IPFromInetSockaddr(addr);
+  uint16_t port = PortFromInetSockaddr(addr);
+  return FindBy(entries, match, [host, port](const UDPEntry& e) {
+    return (e.remote_addr == host && e.remote_port == port);
+  });
+}
+
+PosixErrorOr<uint64_t> InodeFromSocketFD(int fd) {
+  ASSIGN_OR_RETURN_ERRNO(struct stat s, Fstat(fd));
+  if (!S_ISSOCK(s.st_mode)) {
+    return PosixError(EINVAL, StrFormat("FD %d is not a socket", fd));
+  }
+  return s.st_ino;
+}
+
+PosixErrorOr<bool> FindByFD(const std::vector<UDPEntry>& entries,
+                            UDPEntry* match, int fd) {
+  ASSIGN_OR_RETURN_ERRNO(uint64_t inode, InodeFromSocketFD(fd));
+  return FindBy(entries, match,
+                [inode](const UDPEntry& e) { return (e.inode == inode); });
+}
+
+// Returns a parsed representation of /proc/net/udp entries.
+PosixErrorOr<std::vector<UDPEntry>> ProcNetUDPEntries() {
+  std::string content;
+  RETURN_IF_ERRNO(GetContents("/proc/net/udp", &content));
+
+  bool found_header = false;
+  std::vector<UDPEntry> entries;
+  std::vector<std::string> lines = StrSplit(content, '\n');
+  std::cerr << "<contents of /proc/net/udp>" << std::endl;
+  for (const std::string& line : lines) {
+    std::cerr << line << std::endl;
+
+    if (!found_header) {
+      EXPECT_EQ(line, kProcNetUDPHeader);
+      found_header = true;
+      continue;
+    }
+    if (line.empty()) {
+      continue;
+    }
+
+    // Parse a single entry from /proc/net/udp.
+    //
+    // Example entries:
+    //
+    // clang-format off
+    //
+    //  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
+    // 3503: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 33317 2 0000000000000000 0
+    // 3518: 00000000:0044 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 40394 2 0000000000000000 0
+    //   ^     ^       ^     ^       ^   ^     ^       ^      ^     ^        ^         ^        ^   ^   ^      ^           ^
+    //   0     1       2     3       4   5     6       7      8     9       10        11       12  13  14     15           16
+    //
+    // clang-format on
+
+    UDPEntry entry;
+    std::vector<std::string> fields =
+        StrSplit(line, absl::ByAnyChar(": "), absl::SkipEmpty());
+
+    ASSIGN_OR_RETURN_ERRNO(entry.local_addr, AtoiBase(fields[1], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.local_port, AtoiBase(fields[2], 16));
+
+    ASSIGN_OR_RETURN_ERRNO(entry.remote_addr, AtoiBase(fields[3], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16));
+
+    ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi<uint64_t>(fields[11]));
+    ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi<uint64_t>(fields[13]));
+
+    // Linux shares internal data structures between TCP and UDP sockets. The
+    // proc entries for UDP sockets share some fields with TCP sockets, but
+    // these fields should always be zero as they're not meaningful for UDP
+    // sockets.
+    EXPECT_EQ(fields[8], "00") << StrFormat("sl:%s, tr", fields[0]);
+    EXPECT_EQ(fields[9], "00000000") << StrFormat("sl:%s, tm->when", fields[0]);
+    EXPECT_EQ(fields[10], "00000000")
+        << StrFormat("sl:%s, retrnsmt", fields[0]);
+    EXPECT_EQ(fields[12], "0") << StrFormat("sl:%s, timeout", fields[0]);
+
+    entries.push_back(entry);
+  }
+  std::cerr << "<end of /proc/net/udp>" << std::endl;
+
+  return entries;
+}
+
+TEST(ProcNetUDP, Exists) {
+  const std::string content =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/udp"));
+  const std::string header_line = StrCat(kProcNetUDPHeader, "\n");
+  EXPECT_THAT(content, ::testing::StartsWith(header_line));
+}
+
+TEST(ProcNetUDP, EntryUID) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4UDPBidirectionalBindSocketPair(0).Create());
+  std::vector<UDPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries());
+  UDPEntry e;
+  ASSERT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr()))
+      << DescribeFirstInetSocket(*sockets);
+  EXPECT_EQ(e.uid, geteuid());
+  ASSERT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr()))
+      << DescribeSecondInetSocket(*sockets);
+  EXPECT_EQ(e.uid, geteuid());
+}
+
+TEST(ProcNetUDP, FindMutualEntries) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4UDPBidirectionalBindSocketPair(0).Create());
+  std::vector<UDPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries());
+
+  EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->first_addr()))
+      << DescribeFirstInetSocket(*sockets);
+  EXPECT_TRUE(FindByRemoteAddr(entries, nullptr, sockets->first_addr()))
+      << DescribeSecondInetSocket(*sockets);
+
+  EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->second_addr()))
+      << DescribeSecondInetSocket(*sockets);
+  EXPECT_TRUE(FindByRemoteAddr(entries, nullptr, sockets->second_addr()))
+      << DescribeFirstInetSocket(*sockets);
+}
+
+TEST(ProcNetUDP, EntriesRemovedOnClose) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4UDPBidirectionalBindSocketPair(0).Create());
+  std::vector<UDPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries());
+
+  EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->first_addr()))
+      << DescribeFirstInetSocket(*sockets);
+  EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->second_addr()))
+      << DescribeSecondInetSocket(*sockets);
+
+  EXPECT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries());
+  // First socket's entry should be gone, but the second socket's entry should
+  // still exist.
+  EXPECT_FALSE(FindByLocalAddr(entries, nullptr, sockets->first_addr()))
+      << DescribeFirstInetSocket(*sockets);
+  EXPECT_TRUE(FindByLocalAddr(entries, nullptr, sockets->second_addr()))
+      << DescribeSecondInetSocket(*sockets);
+
+  EXPECT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries());
+  // Both entries should be gone.
+  EXPECT_FALSE(FindByLocalAddr(entries, nullptr, sockets->first_addr()))
+      << DescribeFirstInetSocket(*sockets);
+  EXPECT_FALSE(FindByLocalAddr(entries, nullptr, sockets->second_addr()))
+      << DescribeSecondInetSocket(*sockets);
+}
+
+PosixErrorOr<std::unique_ptr<FileDescriptor>> BoundUDPSocket() {
+  ASSIGN_OR_RETURN_ERRNO(std::unique_ptr<FileDescriptor> socket,
+                         IPv4UDPUnboundSocket(0).Create());
+  struct sockaddr_in addr;
+  addr.sin_family = AF_INET;
+  addr.sin_addr.s_addr = htonl(INADDR_ANY);
+  addr.sin_port = 0;
+
+  int res = bind(socket->get(), reinterpret_cast<const struct sockaddr*>(&addr),
+                 sizeof(addr));
+  if (res) {
+    return PosixError(errno, "bind()");
+  }
+  return socket;
+}
+
+TEST(ProcNetUDP, BoundEntry) {
+  std::unique_ptr<FileDescriptor> socket =
+      ASSERT_NO_ERRNO_AND_VALUE(BoundUDPSocket());
+  struct sockaddr addr;
+  socklen_t len = sizeof(addr);
+  ASSERT_THAT(getsockname(socket->get(), &addr, &len), SyscallSucceeds());
+  uint16_t port = PortFromInetSockaddr(&addr);
+
+  std::vector<UDPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries());
+  UDPEntry e;
+  ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(FindByFD(entries, &e, socket->get())));
+  EXPECT_EQ(e.local_port, port);
+  EXPECT_EQ(e.remote_addr, 0);
+  EXPECT_EQ(e.remote_port, 0);
+}
+
+TEST(ProcNetUDP, BoundSocketStateClosed) {
+  std::unique_ptr<FileDescriptor> socket =
+      ASSERT_NO_ERRNO_AND_VALUE(BoundUDPSocket());
+  std::vector<UDPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries());
+  UDPEntry e;
+  ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(FindByFD(entries, &e, socket->get())));
+  EXPECT_EQ(e.state, TCP_CLOSE);
+}
+
+TEST(ProcNetUDP, ConnectedSocketStateEstablished) {
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(IPv4UDPBidirectionalBindSocketPair(0).Create());
+  std::vector<UDPEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries());
+
+  UDPEntry e;
+  ASSERT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr()))
+      << DescribeFirstInetSocket(*sockets);
+  EXPECT_EQ(e.state, TCP_ESTABLISHED);
+
+  ASSERT_TRUE(FindByLocalAddr(entries, &e, sockets->second_addr()))
+      << DescribeSecondInetSocket(*sockets);
+  EXPECT_EQ(e.state, TCP_ESTABLISHED);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/proc_net_unix.cc b/test/syscalls/linux/proc_net_unix.cc
new file mode 100644
index 000000000..a63067586
--- /dev/null
+++ b/test/syscalls/linux/proc_net_unix.cc
@@ -0,0 +1,443 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using absl::StrCat;
+using absl::StreamFormat;
+using absl::StrFormat;
+
+constexpr char kProcNetUnixHeader[] =
+    "Num       RefCount Protocol Flags    Type St Inode Path";
+
+// Possible values of the "st" field in a /proc/net/unix entry. Source: Linux
+// kernel, include/uapi/linux/net.h.
+enum {
+  SS_FREE = 0,      // Not allocated
+  SS_UNCONNECTED,   // Unconnected to any socket
+  SS_CONNECTING,    // In process of connecting
+  SS_CONNECTED,     // Connected to socket
+  SS_DISCONNECTING  // In process of disconnecting
+};
+
+// UnixEntry represents a single entry from /proc/net/unix.
+struct UnixEntry {
+  uintptr_t addr;
+  uint64_t refs;
+  uint64_t protocol;
+  uint64_t flags;
+  uint64_t type;
+  uint64_t state;
+  uint64_t inode;
+  std::string path;
+};
+
+// Abstract socket paths can have either trailing null bytes or '@'s as padding
+// at the end, depending on the linux version. This function strips any such
+// padding.
+void StripAbstractPathPadding(std::string* s) {
+  const char pad_char = s->back();
+  if (pad_char != '\0' && pad_char != '@') {
+    return;
+  }
+
+  const auto last_pos = s->find_last_not_of(pad_char);
+  if (last_pos != std::string::npos) {
+    s->resize(last_pos + 1);
+  }
+}
+
+// Precondition: addr must be a unix socket address (i.e. sockaddr_un) and
+// addr->sun_path must be null-terminated. This is always the case if addr comes
+// from Linux:
+//
+// Per man unix(7):
+//
+// "When the address of a pathname socket is returned (by [getsockname(2)]), its
+//  length is
+//
+//     offsetof(struct sockaddr_un, sun_path) + strlen(sun_path) + 1
+//
+//  and sun_path contains the null-terminated pathname."
+std::string ExtractPath(const struct sockaddr* addr) {
+  const char* path =
+      reinterpret_cast<const struct sockaddr_un*>(addr)->sun_path;
+  // Note: sockaddr_un.sun_path is an embedded character array of length
+  // UNIX_PATH_MAX, so we can always safely dereference the first 2 bytes below.
+  //
+  // We also rely on the path being null-terminated.
+  if (path[0] == 0) {
+    std::string abstract_path = StrCat("@", &path[1]);
+    StripAbstractPathPadding(&abstract_path);
+    return abstract_path;
+  }
+  return std::string(path);
+}
+
+// Returns a parsed representation of /proc/net/unix entries.
+PosixErrorOr<std::vector<UnixEntry>> ProcNetUnixEntries() {
+  std::string content;
+  RETURN_IF_ERRNO(GetContents("/proc/net/unix", &content));
+
+  bool skipped_header = false;
+  std::vector<UnixEntry> entries;
+  std::vector<std::string> lines = absl::StrSplit(content, '\n');
+  std::cerr << "<contents of /proc/net/unix>" << std::endl;
+  for (const std::string& line : lines) {
+    // Emit the proc entry to the test output to provide context for the test
+    // results.
+    std::cerr << line << std::endl;
+
+    if (!skipped_header) {
+      EXPECT_EQ(line, kProcNetUnixHeader);
+      skipped_header = true;
+      continue;
+    }
+    if (line.empty()) {
+      continue;
+    }
+
+    // Parse a single entry from /proc/net/unix.
+    //
+    // Sample file:
+    //
+    // clang-format off
+    //
+    // Num       RefCount Protocol Flags    Type St Inode Path"
+    // ffffa130e7041c00: 00000002 00000000 00010000 0001 01 1299413685 /tmp/control_server/13293772586877554487
+    // ffffa14f547dc400: 00000002 00000000 00010000 0001 01  3793 @remote_coredump
+    //
+    // clang-format on
+    //
+    // Note that from the second entry, the inode number can be padded using
+    // spaces, so we need to handle it separately during parsing. See
+    // net/unix/af_unix.c:unix_seq_show() for how these entries are produced. In
+    // particular, only the inode field is padded with spaces.
+    UnixEntry entry;
+
+    // Process the first 6 fields, up to but not including "Inode".
+    std::vector<std::string> fields =
+        absl::StrSplit(line, absl::MaxSplits(' ', 6));
+
+    if (fields.size() < 7) {
+      return PosixError(EINVAL, StrFormat("Invalid entry: '%s'\n", line));
+    }
+
+    // AtoiBase can't handle the ':' in the "Num" field, so strip it out.
+    std::vector<std::string> addr = absl::StrSplit(fields[0], ':');
+    ASSIGN_OR_RETURN_ERRNO(entry.addr, AtoiBase(addr[0], 16));
+
+    ASSIGN_OR_RETURN_ERRNO(entry.refs, AtoiBase(fields[1], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.protocol, AtoiBase(fields[2], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.flags, AtoiBase(fields[3], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.type, AtoiBase(fields[4], 16));
+    ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16));
+
+    absl::string_view rest = absl::StripAsciiWhitespace(fields[6]);
+    fields = absl::StrSplit(rest, absl::MaxSplits(' ', 1));
+    if (fields.empty()) {
+      return PosixError(
+          EINVAL, StrFormat("Invalid entry, missing 'Inode': '%s'\n", line));
+    }
+    ASSIGN_OR_RETURN_ERRNO(entry.inode, AtoiBase(fields[0], 10));
+
+    entry.path = "";
+    if (fields.size() > 1) {
+      entry.path = fields[1];
+      StripAbstractPathPadding(&entry.path);
+    }
+
+    entries.push_back(entry);
+  }
+  std::cerr << "<end of /proc/net/unix>" << std::endl;
+
+  return entries;
+}
+
+// Finds the first entry in 'entries' for which 'predicate' returns true.
+// Returns true on match, and sets 'match' to point to the matching entry.
+bool FindBy(std::vector<UnixEntry> entries, UnixEntry* match,
+            std::function<bool(const UnixEntry&)> predicate) {
+  for (int i = 0; i < entries.size(); ++i) {
+    if (predicate(entries[i])) {
+      *match = entries[i];
+      return true;
+    }
+  }
+  return false;
+}
+
+bool FindByPath(std::vector<UnixEntry> entries, UnixEntry* match,
+                const std::string& path) {
+  return FindBy(entries, match,
+                [path](const UnixEntry& e) { return e.path == path; });
+}
+
+TEST(ProcNetUnix, Exists) {
+  const std::string content =
+      ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/unix"));
+  const std::string header_line = StrCat(kProcNetUnixHeader, "\n");
+  if (IsRunningOnGvisor()) {
+    // Should be just the header since we don't have any unix domain sockets
+    // yet.
+    EXPECT_EQ(content, header_line);
+  } else {
+    // However, on a general linux machine, we could have abitrary sockets on
+    // the system, so just check the header.
+    EXPECT_THAT(content, ::testing::StartsWith(header_line));
+  }
+}
+
+TEST(ProcNetUnix, FilesystemBindAcceptConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(
+      FilesystemBoundUnixDomainSocketPair(SOCK_STREAM).Create());
+
+  std::string path1 = ExtractPath(sockets->first_addr());
+  std::string path2 = ExtractPath(sockets->second_addr());
+  std::cerr << StreamFormat("Server socket address (path1): %s\n", path1);
+  std::cerr << StreamFormat("Client socket address (path2): %s\n", path2);
+
+  std::vector<UnixEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+  if (IsRunningOnGvisor()) {
+    EXPECT_EQ(entries.size(), 2);
+  }
+
+  // The server-side socket's path is listed in the socket entry...
+  UnixEntry s1;
+  EXPECT_TRUE(FindByPath(entries, &s1, path1));
+
+  // ... but the client-side socket's path is not.
+  UnixEntry s2;
+  EXPECT_FALSE(FindByPath(entries, &s2, path2));
+}
+
+TEST(ProcNetUnix, AbstractBindAcceptConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(
+      AbstractBoundUnixDomainSocketPair(SOCK_STREAM).Create());
+
+  std::string path1 = ExtractPath(sockets->first_addr());
+  std::string path2 = ExtractPath(sockets->second_addr());
+  std::cerr << StreamFormat("Server socket address (path1): '%s'\n", path1);
+  std::cerr << StreamFormat("Client socket address (path2): '%s'\n", path2);
+
+  std::vector<UnixEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+  if (IsRunningOnGvisor()) {
+    EXPECT_EQ(entries.size(), 2);
+  }
+
+  // The server-side socket's path is listed in the socket entry...
+  UnixEntry s1;
+  EXPECT_TRUE(FindByPath(entries, &s1, path1));
+
+  // ... but the client-side socket's path is not.
+  UnixEntry s2;
+  EXPECT_FALSE(FindByPath(entries, &s2, path2));
+}
+
+TEST(ProcNetUnix, SocketPair) {
+  // Under gvisor, ensure a socketpair() syscall creates exactly 2 new
+  // entries. We have no way to verify this under Linux, as we have no control
+  // over socket creation on a general Linux machine.
+  SKIP_IF(!IsRunningOnGvisor());
+
+  std::vector<UnixEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+  ASSERT_EQ(entries.size(), 0);
+
+  auto sockets =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_STREAM).Create());
+
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+  EXPECT_EQ(entries.size(), 2);
+}
+
+TEST(ProcNetUnix, StreamSocketStateUnconnectedOnBind) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(
+      AbstractUnboundUnixDomainSocketPair(SOCK_STREAM).Create());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  std::vector<UnixEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+
+  const std::string address = ExtractPath(sockets->first_addr());
+  UnixEntry bind_entry;
+  ASSERT_TRUE(FindByPath(entries, &bind_entry, address));
+  EXPECT_EQ(bind_entry.state, SS_UNCONNECTED);
+}
+
+TEST(ProcNetUnix, StreamSocketStateStateUnconnectedOnListen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(
+      AbstractUnboundUnixDomainSocketPair(SOCK_STREAM).Create());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  std::vector<UnixEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+
+  const std::string address = ExtractPath(sockets->first_addr());
+  UnixEntry bind_entry;
+  ASSERT_TRUE(FindByPath(entries, &bind_entry, address));
+  EXPECT_EQ(bind_entry.state, SS_UNCONNECTED);
+
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+  UnixEntry listen_entry;
+  ASSERT_TRUE(
+      FindByPath(entries, &listen_entry, ExtractPath(sockets->first_addr())));
+  EXPECT_EQ(listen_entry.state, SS_UNCONNECTED);
+  // The bind and listen entries should refer to the same socket.
+  EXPECT_EQ(listen_entry.inode, bind_entry.inode);
+}
+
+TEST(ProcNetUnix, StreamSocketStateStateConnectedOnAccept) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(
+      AbstractUnboundUnixDomainSocketPair(SOCK_STREAM).Create());
+  const std::string address = ExtractPath(sockets->first_addr());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+  std::vector<UnixEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+  UnixEntry listen_entry;
+  ASSERT_TRUE(
+      FindByPath(entries, &listen_entry, ExtractPath(sockets->first_addr())));
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  int clientfd;
+  ASSERT_THAT(clientfd = accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallSucceeds());
+
+  // Find the entry for the accepted socket. UDS proc entries don't have a
+  // remote address, so we distinguish the accepted socket from the listen
+  // socket by checking for a different inode.
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+  UnixEntry accept_entry;
+  ASSERT_TRUE(FindBy(
+      entries, &accept_entry, [address, listen_entry](const UnixEntry& e) {
+        return e.path == address && e.inode != listen_entry.inode;
+      }));
+  EXPECT_EQ(accept_entry.state, SS_CONNECTED);
+  // Listen entry should still be in SS_UNCONNECTED state.
+  ASSERT_TRUE(FindBy(entries, &listen_entry,
+                     [&sockets, listen_entry](const UnixEntry& e) {
+                       return e.path == ExtractPath(sockets->first_addr()) &&
+                              e.inode == listen_entry.inode;
+                     }));
+  EXPECT_EQ(listen_entry.state, SS_UNCONNECTED);
+}
+
+TEST(ProcNetUnix, DgramSocketStateDisconnectingOnBind) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(
+      AbstractUnboundUnixDomainSocketPair(SOCK_DGRAM).Create());
+
+  std::vector<UnixEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+
+  // On gVisor, the only two UDS on the system are the ones we just created and
+  // we rely on this to locate the test socket entries in the remainder of the
+  // test. On a generic Linux system, we have no easy way to locate the
+  // corresponding entries, as they don't have an address yet.
+  if (IsRunningOnGvisor()) {
+    ASSERT_EQ(entries.size(), 2);
+    for (const auto& e : entries) {
+      ASSERT_EQ(e.state, SS_DISCONNECTING);
+    }
+  }
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+  const std::string address = ExtractPath(sockets->first_addr());
+  UnixEntry bind_entry;
+  ASSERT_TRUE(FindByPath(entries, &bind_entry, address));
+  EXPECT_EQ(bind_entry.state, SS_UNCONNECTED);
+}
+
+TEST(ProcNetUnix, DgramSocketStateConnectingOnConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(
+      AbstractUnboundUnixDomainSocketPair(SOCK_DGRAM).Create());
+
+  std::vector<UnixEntry> entries =
+      ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+
+  // On gVisor, the only two UDS on the system are the ones we just created and
+  // we rely on this to locate the test socket entries in the remainder of the
+  // test. On a generic Linux system, we have no easy way to locate the
+  // corresponding entries, as they don't have an address yet.
+  if (IsRunningOnGvisor()) {
+    ASSERT_EQ(entries.size(), 2);
+    for (const auto& e : entries) {
+      ASSERT_EQ(e.state, SS_DISCONNECTING);
+    }
+  }
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+  const std::string address = ExtractPath(sockets->first_addr());
+  UnixEntry bind_entry;
+  ASSERT_TRUE(FindByPath(entries, &bind_entry, address));
+
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUnixEntries());
+
+  // Once again, we have no easy way to identify the connecting socket as it has
+  // no listed address. We can only identify the entry as the "non-bind socket
+  // entry" on gVisor, where we're guaranteed to have only the two entries we
+  // create during this test.
+  if (IsRunningOnGvisor()) {
+    ASSERT_EQ(entries.size(), 2);
+    UnixEntry connect_entry;
+    ASSERT_TRUE(
+        FindBy(entries, &connect_entry, [bind_entry](const UnixEntry& e) {
+          return e.inode != bind_entry.inode;
+        }));
+    EXPECT_EQ(connect_entry.state, SS_CONNECTING);
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/proc_pid_oomscore.cc b/test/syscalls/linux/proc_pid_oomscore.cc
new file mode 100644
index 000000000..707821a3f
--- /dev/null
+++ b/test/syscalls/linux/proc_pid_oomscore.cc
@@ -0,0 +1,72 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+
+#include <exception>
+#include <iostream>
+#include <string>
+
+#include "test/util/fs_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<int> ReadProcNumber(std::string path) {
+  ASSIGN_OR_RETURN_ERRNO(std::string contents, GetContents(path));
+  EXPECT_EQ(contents[contents.length() - 1], '\n');
+
+  int num;
+  if (!absl::SimpleAtoi(contents, &num)) {
+    return PosixError(EINVAL, "invalid value: " + contents);
+  }
+
+  return num;
+}
+
+TEST(ProcPidOomscoreTest, BasicRead) {
+  auto const oom_score =
+      ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score"));
+  EXPECT_LE(oom_score, 1000);
+  EXPECT_GE(oom_score, -1000);
+}
+
+TEST(ProcPidOomscoreAdjTest, BasicRead) {
+  auto const oom_score =
+      ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score_adj"));
+
+  // oom_score_adj defaults to 0.
+  EXPECT_EQ(oom_score, 0);
+}
+
+TEST(ProcPidOomscoreAdjTest, BasicWrite) {
+  constexpr int test_value = 7;
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/oom_score_adj", O_WRONLY));
+  ASSERT_THAT(
+      RetryEINTR(write)(fd.get(), std::to_string(test_value).c_str(), 1),
+      SyscallSucceeds());
+
+  auto const oom_score =
+      ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score_adj"));
+  EXPECT_EQ(oom_score, test_value);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/proc_pid_smaps.cc b/test/syscalls/linux/proc_pid_smaps.cc
new file mode 100644
index 000000000..9fb1b3a2c
--- /dev/null
+++ b/test/syscalls/linux/proc_pid_smaps.cc
@@ -0,0 +1,468 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/proc_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::Contains;
+using ::testing::ElementsAreArray;
+using ::testing::IsSupersetOf;
+using ::testing::Not;
+using ::testing::Optional;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+struct ProcPidSmapsEntry {
+  ProcMapsEntry maps_entry;
+
+  // These fields should always exist, as they were included in e070ad49f311
+  // "[PATCH] add /proc/pid/smaps".
+  size_t size_kb;
+  size_t rss_kb;
+  size_t shared_clean_kb;
+  size_t shared_dirty_kb;
+  size_t private_clean_kb;
+  size_t private_dirty_kb;
+
+  // These fields were added later and may not be present.
+  absl::optional<size_t> pss_kb;
+  absl::optional<size_t> referenced_kb;
+  absl::optional<size_t> anonymous_kb;
+  absl::optional<size_t> anon_huge_pages_kb;
+  absl::optional<size_t> shared_hugetlb_kb;
+  absl::optional<size_t> private_hugetlb_kb;
+  absl::optional<size_t> swap_kb;
+  absl::optional<size_t> swap_pss_kb;
+  absl::optional<size_t> kernel_page_size_kb;
+  absl::optional<size_t> mmu_page_size_kb;
+  absl::optional<size_t> locked_kb;
+
+  // Caution: "Note that there is no guarantee that every flag and associated
+  // mnemonic will be present in all further kernel releases. Things get
+  // changed, the flags may be vanished or the reverse -- new added." - Linux
+  // Documentation/filesystems/proc.txt, on VmFlags. Avoid checking for any
+  // flags that are not extremely well-established.
+  absl::optional<std::vector<std::string>> vm_flags;
+};
+
+// Given the value part of a /proc/[pid]/smaps field containing a value in kB
+// (for example, "    4 kB", returns the value in kB (in this example, 4).
+PosixErrorOr<size_t> SmapsValueKb(absl::string_view value) {
+  // TODO(jamieliu): let us use RE2 or <regex>
+  std::pair<absl::string_view, absl::string_view> parts =
+      absl::StrSplit(value, ' ', absl::SkipEmpty());
+  if (parts.second != "kB") {
+    return PosixError(EINVAL,
+                      absl::StrCat("invalid smaps field value: ", value));
+  }
+  ASSIGN_OR_RETURN_ERRNO(auto val_kb, Atoi<size_t>(parts.first));
+  return val_kb;
+}
+
+PosixErrorOr<std::vector<ProcPidSmapsEntry>> ParseProcPidSmaps(
+    absl::string_view contents) {
+  std::vector<ProcPidSmapsEntry> entries;
+  absl::optional<ProcPidSmapsEntry> entry;
+  bool have_size_kb = false;
+  bool have_rss_kb = false;
+  bool have_shared_clean_kb = false;
+  bool have_shared_dirty_kb = false;
+  bool have_private_clean_kb = false;
+  bool have_private_dirty_kb = false;
+
+  auto const finish_entry = [&] {
+    if (entry) {
+      if (!have_size_kb) {
+        return PosixError(EINVAL, "smaps entry is missing Size");
+      }
+      if (!have_rss_kb) {
+        return PosixError(EINVAL, "smaps entry is missing Rss");
+      }
+      if (!have_shared_clean_kb) {
+        return PosixError(EINVAL, "smaps entry is missing Shared_Clean");
+      }
+      if (!have_shared_dirty_kb) {
+        return PosixError(EINVAL, "smaps entry is missing Shared_Dirty");
+      }
+      if (!have_private_clean_kb) {
+        return PosixError(EINVAL, "smaps entry is missing Private_Clean");
+      }
+      if (!have_private_dirty_kb) {
+        return PosixError(EINVAL, "smaps entry is missing Private_Dirty");
+      }
+      // std::move(entry.value()) instead of std::move(entry).value(), because
+      // otherwise tools may report a "use-after-move" warning, which is
+      // spurious because entry.emplace() below resets entry to a new
+      // ProcPidSmapsEntry.
+      entries.emplace_back(std::move(entry.value()));
+    }
+    entry.emplace();
+    have_size_kb = false;
+    have_rss_kb = false;
+    have_shared_clean_kb = false;
+    have_shared_dirty_kb = false;
+    have_private_clean_kb = false;
+    have_private_dirty_kb = false;
+    return NoError();
+  };
+
+  // Holds key/value pairs from smaps field lines. Declared here so it can be
+  // captured by reference by the following lambdas.
+  std::vector<absl::string_view> key_value;
+
+  auto const on_required_field_kb = [&](size_t* field, bool* have_field) {
+    if (*have_field) {
+      return PosixError(
+          EINVAL,
+          absl::StrFormat("smaps entry has duplicate %s line", key_value[0]));
+    }
+    ASSIGN_OR_RETURN_ERRNO(*field, SmapsValueKb(key_value[1]));
+    *have_field = true;
+    return NoError();
+  };
+
+  auto const on_optional_field_kb = [&](absl::optional<size_t>* field) {
+    if (*field) {
+      return PosixError(
+          EINVAL,
+          absl::StrFormat("smaps entry has duplicate %s line", key_value[0]));
+    }
+    ASSIGN_OR_RETURN_ERRNO(*field, SmapsValueKb(key_value[1]));
+    return NoError();
+  };
+
+  absl::flat_hash_set<std::string> unknown_fields;
+  auto const on_unknown_field = [&] {
+    absl::string_view key = key_value[0];
+    // Don't mention unknown fields more than once.
+    if (unknown_fields.count(key)) {
+      return;
+    }
+    unknown_fields.insert(std::string(key));
+    std::cerr << "skipping unknown smaps field " << key << std::endl;
+  };
+
+  auto lines = absl::StrSplit(contents, '\n', absl::SkipEmpty());
+  for (absl::string_view l : lines) {
+    // Is this line a valid /proc/[pid]/maps entry?
+    auto maybe_maps_entry = ParseProcMapsLine(l);
+    if (maybe_maps_entry.ok()) {
+      // This marks the beginning of a new /proc/[pid]/smaps entry.
+      RETURN_IF_ERRNO(finish_entry());
+      entry->maps_entry = std::move(maybe_maps_entry).ValueOrDie();
+      continue;
+    }
+    // Otherwise it's a field in an existing /proc/[pid]/smaps entry of the form
+    // "key:value" (where value in practice will be preceded by a variable
+    // amount of whitespace).
+    if (!entry) {
+      std::cerr << "smaps line not considered a maps line: "
+                << maybe_maps_entry.error_message() << std::endl;
+      return PosixError(
+          EINVAL,
+          absl::StrCat("smaps field line without preceding maps line: ", l));
+    }
+    key_value = absl::StrSplit(l, absl::MaxSplits(':', 1));
+    if (key_value.size() != 2) {
+      return PosixError(EINVAL, absl::StrCat("invalid smaps field line: ", l));
+    }
+    absl::string_view const key = key_value[0];
+    if (key == "Size") {
+      RETURN_IF_ERRNO(on_required_field_kb(&entry->size_kb, &have_size_kb));
+    } else if (key == "Rss") {
+      RETURN_IF_ERRNO(on_required_field_kb(&entry->rss_kb, &have_rss_kb));
+    } else if (key == "Shared_Clean") {
+      RETURN_IF_ERRNO(
+          on_required_field_kb(&entry->shared_clean_kb, &have_shared_clean_kb));
+    } else if (key == "Shared_Dirty") {
+      RETURN_IF_ERRNO(
+          on_required_field_kb(&entry->shared_dirty_kb, &have_shared_dirty_kb));
+    } else if (key == "Private_Clean") {
+      RETURN_IF_ERRNO(on_required_field_kb(&entry->private_clean_kb,
+                                           &have_private_clean_kb));
+    } else if (key == "Private_Dirty") {
+      RETURN_IF_ERRNO(on_required_field_kb(&entry->private_dirty_kb,
+                                           &have_private_dirty_kb));
+    } else if (key == "Pss") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->pss_kb));
+    } else if (key == "Referenced") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->referenced_kb));
+    } else if (key == "Anonymous") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->anonymous_kb));
+    } else if (key == "AnonHugePages") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->anon_huge_pages_kb));
+    } else if (key == "Shared_Hugetlb") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->shared_hugetlb_kb));
+    } else if (key == "Private_Hugetlb") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->private_hugetlb_kb));
+    } else if (key == "Swap") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->swap_kb));
+    } else if (key == "SwapPss") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->swap_pss_kb));
+    } else if (key == "KernelPageSize") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->kernel_page_size_kb));
+    } else if (key == "MMUPageSize") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->mmu_page_size_kb));
+    } else if (key == "Locked") {
+      RETURN_IF_ERRNO(on_optional_field_kb(&entry->locked_kb));
+    } else if (key == "VmFlags") {
+      if (entry->vm_flags) {
+        return PosixError(EINVAL, "duplicate VmFlags line");
+      }
+      entry->vm_flags = absl::StrSplit(key_value[1], ' ', absl::SkipEmpty());
+    } else {
+      on_unknown_field();
+    }
+  }
+  RETURN_IF_ERRNO(finish_entry());
+  return entries;
+};
+
+TEST(ParseProcPidSmapsTest, Correctness) {
+  auto entries = ASSERT_NO_ERRNO_AND_VALUE(
+      ParseProcPidSmaps("0-10000 rw-s 00000000 00:00 0 "
+                        "                   /dev/zero (deleted)\n"
+                        "Size:                  0 kB\n"
+                        "Rss:                   1 kB\n"
+                        "Pss:                   2 kB\n"
+                        "Shared_Clean:          3 kB\n"
+                        "Shared_Dirty:          4 kB\n"
+                        "Private_Clean:         5 kB\n"
+                        "Private_Dirty:         6 kB\n"
+                        "Referenced:            7 kB\n"
+                        "Anonymous:             8 kB\n"
+                        "AnonHugePages:         9 kB\n"
+                        "Shared_Hugetlb:       10 kB\n"
+                        "Private_Hugetlb:      11 kB\n"
+                        "Swap:                 12 kB\n"
+                        "SwapPss:              13 kB\n"
+                        "KernelPageSize:       14 kB\n"
+                        "MMUPageSize:          15 kB\n"
+                        "Locked:               16 kB\n"
+                        "FutureUnknownKey:     17 kB\n"
+                        "VmFlags: rd wr sh mr mw me ms lo ?? sd \n"));
+  ASSERT_EQ(entries.size(), 1);
+  auto& entry = entries[0];
+  EXPECT_EQ(entry.maps_entry.filename, "/dev/zero (deleted)");
+  EXPECT_EQ(entry.size_kb, 0);
+  EXPECT_EQ(entry.rss_kb, 1);
+  EXPECT_THAT(entry.pss_kb, Optional(2));
+  EXPECT_EQ(entry.shared_clean_kb, 3);
+  EXPECT_EQ(entry.shared_dirty_kb, 4);
+  EXPECT_EQ(entry.private_clean_kb, 5);
+  EXPECT_EQ(entry.private_dirty_kb, 6);
+  EXPECT_THAT(entry.referenced_kb, Optional(7));
+  EXPECT_THAT(entry.anonymous_kb, Optional(8));
+  EXPECT_THAT(entry.anon_huge_pages_kb, Optional(9));
+  EXPECT_THAT(entry.shared_hugetlb_kb, Optional(10));
+  EXPECT_THAT(entry.private_hugetlb_kb, Optional(11));
+  EXPECT_THAT(entry.swap_kb, Optional(12));
+  EXPECT_THAT(entry.swap_pss_kb, Optional(13));
+  EXPECT_THAT(entry.kernel_page_size_kb, Optional(14));
+  EXPECT_THAT(entry.mmu_page_size_kb, Optional(15));
+  EXPECT_THAT(entry.locked_kb, Optional(16));
+  EXPECT_THAT(entry.vm_flags,
+              Optional(ElementsAreArray({"rd", "wr", "sh", "mr", "mw", "me",
+                                         "ms", "lo", "??", "sd"})));
+}
+
+// Returns the unique entry in entries containing the given address.
+PosixErrorOr<ProcPidSmapsEntry> FindUniqueSmapsEntry(
+    std::vector<ProcPidSmapsEntry> const& entries, uintptr_t addr) {
+  auto const pred = [&](ProcPidSmapsEntry const& entry) {
+    return entry.maps_entry.start <= addr && addr < entry.maps_entry.end;
+  };
+  auto const it = std::find_if(entries.begin(), entries.end(), pred);
+  if (it == entries.end()) {
+    return PosixError(EINVAL,
+                      absl::StrFormat("no entry contains address %#x", addr));
+  }
+  auto const it2 = std::find_if(it + 1, entries.end(), pred);
+  if (it2 != entries.end()) {
+    return PosixError(
+        EINVAL,
+        absl::StrFormat("overlapping entries [%#x-%#x) and [%#x-%#x) both "
+                        "contain address %#x",
+                        it->maps_entry.start, it->maps_entry.end,
+                        it2->maps_entry.start, it2->maps_entry.end, addr));
+  }
+  return *it;
+}
+
+PosixErrorOr<std::vector<ProcPidSmapsEntry>> ReadProcSelfSmaps() {
+  ASSIGN_OR_RETURN_ERRNO(std::string contents, GetContents("/proc/self/smaps"));
+  return ParseProcPidSmaps(contents);
+}
+
+TEST(ProcPidSmapsTest, SharedAnon) {
+  // Map with MAP_POPULATE so we get some RSS.
+  Mapping const m = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(
+      2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE));
+  auto const entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfSmaps());
+  auto const entry =
+      ASSERT_NO_ERRNO_AND_VALUE(FindUniqueSmapsEntry(entries, m.addr()));
+
+  EXPECT_EQ(entry.size_kb, m.len() / 1024);
+  // It's possible that populated pages have been swapped out, so RSS might be
+  // less than size.
+  EXPECT_LE(entry.rss_kb, entry.size_kb);
+
+  if (entry.pss_kb) {
+    // PSS should be exactly equal to RSS since no other address spaces should
+    // be sharing our new mapping.
+    EXPECT_EQ(entry.pss_kb.value(), entry.rss_kb);
+  }
+
+  // "Shared" and "private" in smaps refers to whether or not *physical pages*
+  // are shared; thus all pages in our MAP_SHARED mapping should nevertheless
+  // be private.
+  EXPECT_EQ(entry.shared_clean_kb, 0);
+  EXPECT_EQ(entry.shared_dirty_kb, 0);
+  EXPECT_EQ(entry.private_clean_kb + entry.private_dirty_kb, entry.rss_kb)
+      << "Private_Clean = " << entry.private_clean_kb
+      << " kB, Private_Dirty = " << entry.private_dirty_kb << " kB";
+
+  // Shared anonymous mappings are implemented as a shmem file, so their pages
+  // are not PageAnon.
+  if (entry.anonymous_kb) {
+    EXPECT_EQ(entry.anonymous_kb.value(), 0);
+  }
+
+  if (entry.vm_flags) {
+    EXPECT_THAT(entry.vm_flags.value(),
+                IsSupersetOf({"rd", "wr", "sh", "mr", "mw", "me", "ms"}));
+    EXPECT_THAT(entry.vm_flags.value(), Not(Contains("ex")));
+  }
+}
+
+TEST(ProcPidSmapsTest, PrivateAnon) {
+  // Map with MAP_POPULATE so we get some RSS.
+  Mapping const m = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(2 * kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_POPULATE));
+  auto const entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfSmaps());
+  auto const entry =
+      ASSERT_NO_ERRNO_AND_VALUE(FindUniqueSmapsEntry(entries, m.addr()));
+
+  // It's possible that our mapping was merged with another vma, so the smaps
+  // entry might be bigger than our original mapping.
+  EXPECT_GE(entry.size_kb, m.len() / 1024);
+  EXPECT_LE(entry.rss_kb, entry.size_kb);
+  if (entry.pss_kb) {
+    EXPECT_LE(entry.pss_kb.value(), entry.rss_kb);
+  }
+
+  if (entry.anonymous_kb) {
+    EXPECT_EQ(entry.anonymous_kb.value(), entry.rss_kb);
+  }
+
+  if (entry.vm_flags) {
+    EXPECT_THAT(entry.vm_flags.value(), IsSupersetOf({"wr", "mr", "mw", "me"}));
+    // We passed PROT_WRITE to mmap. On at least x86, the mapping is in
+    // practice readable because there is no way to configure the MMU to make
+    // pages writable but not readable. However, VmFlags should reflect the
+    // flags set on the VMA, so "rd" (VM_READ) should not appear in VmFlags.
+    EXPECT_THAT(entry.vm_flags.value(), Not(Contains("rd")));
+    EXPECT_THAT(entry.vm_flags.value(), Not(Contains("ex")));
+    EXPECT_THAT(entry.vm_flags.value(), Not(Contains("sh")));
+    EXPECT_THAT(entry.vm_flags.value(), Not(Contains("ms")));
+  }
+}
+
+TEST(ProcPidSmapsTest, SharedReadOnlyFile) {
+  size_t const kFileSize = kPageSize;
+
+  auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  ASSERT_THAT(truncate(temp_file.path().c_str(), kFileSize), SyscallSucceeds());
+  auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY));
+
+  auto const m = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+      nullptr, kFileSize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd.get(), 0));
+  auto const entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfSmaps());
+  auto const entry =
+      ASSERT_NO_ERRNO_AND_VALUE(FindUniqueSmapsEntry(entries, m.addr()));
+
+  // Most of the same logic as the SharedAnon case applies.
+  EXPECT_EQ(entry.size_kb, kFileSize / 1024);
+  EXPECT_LE(entry.rss_kb, entry.size_kb);
+  if (entry.pss_kb) {
+    EXPECT_EQ(entry.pss_kb.value(), entry.rss_kb);
+  }
+  EXPECT_EQ(entry.shared_clean_kb, 0);
+  EXPECT_EQ(entry.shared_dirty_kb, 0);
+  EXPECT_EQ(entry.private_clean_kb + entry.private_dirty_kb, entry.rss_kb)
+      << "Private_Clean = " << entry.private_clean_kb
+      << " kB, Private_Dirty = " << entry.private_dirty_kb << " kB";
+  if (entry.anonymous_kb) {
+    EXPECT_EQ(entry.anonymous_kb.value(), 0);
+  }
+
+  if (entry.vm_flags) {
+    EXPECT_THAT(entry.vm_flags.value(), IsSupersetOf({"rd", "mr", "me", "ms"}));
+    EXPECT_THAT(entry.vm_flags.value(), Not(Contains("wr")));
+    EXPECT_THAT(entry.vm_flags.value(), Not(Contains("ex")));
+    // Because the mapped file was opened O_RDONLY, the VMA is !VM_MAYWRITE and
+    // also !VM_SHARED.
+    EXPECT_THAT(entry.vm_flags.value(), Not(Contains("sh")));
+    EXPECT_THAT(entry.vm_flags.value(), Not(Contains("mw")));
+  }
+}
+
+// Tests that gVisor's /proc/[pid]/smaps provides all of the fields we expect it
+// to, which as of this writing is all fields provided by Linux 4.4.
+TEST(ProcPidSmapsTest, GvisorFields) {
+  SKIP_IF(!IsRunningOnGvisor());
+  auto const entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfSmaps());
+  for (auto const& entry : entries) {
+    EXPECT_TRUE(entry.pss_kb);
+    EXPECT_TRUE(entry.referenced_kb);
+    EXPECT_TRUE(entry.anonymous_kb);
+    EXPECT_TRUE(entry.anon_huge_pages_kb);
+    EXPECT_TRUE(entry.shared_hugetlb_kb);
+    EXPECT_TRUE(entry.private_hugetlb_kb);
+    EXPECT_TRUE(entry.swap_kb);
+    EXPECT_TRUE(entry.swap_pss_kb);
+    EXPECT_THAT(entry.kernel_page_size_kb, Optional(kPageSize / 1024));
+    EXPECT_THAT(entry.mmu_page_size_kb, Optional(kPageSize / 1024));
+    EXPECT_TRUE(entry.locked_kb);
+    EXPECT_TRUE(entry.vm_flags);
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/proc_pid_uid_gid_map.cc b/test/syscalls/linux/proc_pid_uid_gid_map.cc
new file mode 100644
index 000000000..748f7be58
--- /dev/null
+++ b/test/syscalls/linux/proc_pid_uid_gid_map.cc
@@ -0,0 +1,311 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <functional>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/save_util.h"
+#include "test/util/test_util.h"
+#include "test/util/time_util.h"
+
+namespace gvisor {
+namespace testing {
+
+PosixErrorOr<int> InNewUserNamespace(const std::function<void()>& fn) {
+  return InForkedProcess([&] {
+    TEST_PCHECK(unshare(CLONE_NEWUSER) == 0);
+    MaybeSave();
+    fn();
+  });
+}
+
+PosixErrorOr<std::tuple<pid_t, Cleanup>> CreateProcessInNewUserNamespace() {
+  int pipefd[2];
+  if (pipe(pipefd) < 0) {
+    return PosixError(errno, "pipe failed");
+  }
+  const auto cleanup_pipe_read =
+      Cleanup([&] { EXPECT_THAT(close(pipefd[0]), SyscallSucceeds()); });
+  auto cleanup_pipe_write =
+      Cleanup([&] { EXPECT_THAT(close(pipefd[1]), SyscallSucceeds()); });
+  pid_t child_pid = fork();
+  if (child_pid < 0) {
+    return PosixError(errno, "fork failed");
+  }
+  if (child_pid == 0) {
+    // Close our copy of the pipe's read end, which doesn't really matter.
+    TEST_PCHECK(close(pipefd[0]) >= 0);
+    TEST_PCHECK(unshare(CLONE_NEWUSER) == 0);
+    MaybeSave();
+    // Indicate that we've switched namespaces by unblocking the parent's read.
+    TEST_PCHECK(close(pipefd[1]) >= 0);
+    while (true) {
+      SleepSafe(absl::Minutes(1));
+    }
+  }
+  auto cleanup_child = Cleanup([child_pid] {
+    EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+    int status;
+    ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+                SyscallSucceedsWithValue(child_pid));
+    EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+        << "status = " << status;
+  });
+  // Close our copy of the pipe's write end, then wait for the child to close
+  // its copy, indicating that it's switched namespaces.
+  cleanup_pipe_write.Release()();
+  char buf;
+  if (RetryEINTR(read)(pipefd[0], &buf, 1) < 0) {
+    return PosixError(errno, "reading from pipe failed");
+  }
+  MaybeSave();
+  return std::make_tuple(child_pid, std::move(cleanup_child));
+}
+
+// TEST_CHECK-fails on error, since this function is used in contexts that
+// require async-signal-safety.
+void DenySetgroupsByPath(const char* path) {
+  int fd = open(path, O_WRONLY);
+  if (fd < 0 && errno == ENOENT) {
+    // On kernels where this file doesn't exist, writing "deny" to it isn't
+    // necessary to write to gid_map.
+    return;
+  }
+  TEST_PCHECK(fd >= 0);
+  MaybeSave();
+  char deny[] = "deny";
+  TEST_PCHECK(write(fd, deny, sizeof(deny)) == sizeof(deny));
+  MaybeSave();
+  TEST_PCHECK(close(fd) == 0);
+}
+
+void DenySelfSetgroups() { DenySetgroupsByPath("/proc/self/setgroups"); }
+
+void DenyPidSetgroups(pid_t pid) {
+  DenySetgroupsByPath(absl::StrCat("/proc/", pid, "/setgroups").c_str());
+}
+
+// Returns a valid UID/GID that isn't id.
+uint32_t another_id(uint32_t id) { return (id + 1) % 65535; }
+
+struct TestParam {
+  std::string desc;
+  int cap;
+  std::function<std::string(absl::string_view)> get_map_filename;
+  std::function<uint32_t()> get_current_id;
+};
+
+std::string DescribeTestParam(const ::testing::TestParamInfo<TestParam>& info) {
+  return info.param.desc;
+}
+
+std::vector<TestParam> UidGidMapTestParams() {
+  return {TestParam{"UID", CAP_SETUID,
+                    [](absl::string_view pid) {
+                      return absl::StrCat("/proc/", pid, "/uid_map");
+                    },
+                    []() -> uint32_t { return getuid(); }},
+          TestParam{"GID", CAP_SETGID,
+                    [](absl::string_view pid) {
+                      return absl::StrCat("/proc/", pid, "/gid_map");
+                    },
+                    []() -> uint32_t { return getgid(); }}};
+}
+
+class ProcUidGidMapTest : public ::testing::TestWithParam<TestParam> {
+ protected:
+  uint32_t CurrentID() { return GetParam().get_current_id(); }
+};
+
+class ProcSelfUidGidMapTest : public ProcUidGidMapTest {
+ protected:
+  PosixErrorOr<int> InNewUserNamespaceWithMapFD(
+      const std::function<void(int)>& fn) {
+    std::string map_filename = GetParam().get_map_filename("self");
+    return InNewUserNamespace([&] {
+      int fd = open(map_filename.c_str(), O_RDWR);
+      TEST_PCHECK(fd >= 0);
+      MaybeSave();
+      fn(fd);
+      TEST_PCHECK(close(fd) == 0);
+    });
+  }
+};
+
+class ProcPidUidGidMapTest : public ProcUidGidMapTest {
+ protected:
+  PosixErrorOr<bool> HaveSetIDCapability() {
+    return HaveCapability(GetParam().cap);
+  }
+
+  // Returns true if the caller is running in a user namespace with all IDs
+  // mapped. This matters for tests that expect to successfully map arbitrary
+  // IDs into a child user namespace, since even with CAP_SET*ID this is only
+  // possible if those IDs are mapped into the current one.
+  PosixErrorOr<bool> AllIDsMapped() {
+    ASSIGN_OR_RETURN_ERRNO(std::string id_map,
+                           GetContents(GetParam().get_map_filename("self")));
+    absl::StripTrailingAsciiWhitespace(&id_map);
+    std::vector<std::string> id_map_parts =
+        absl::StrSplit(id_map, ' ', absl::SkipEmpty());
+    return id_map_parts == std::vector<std::string>({"0", "0", "4294967295"});
+  }
+
+  PosixErrorOr<FileDescriptor> OpenMapFile(pid_t pid) {
+    return Open(GetParam().get_map_filename(absl::StrCat(pid)), O_RDWR);
+  }
+};
+
+TEST_P(ProcSelfUidGidMapTest, IsInitiallyEmpty) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+  EXPECT_THAT(InNewUserNamespaceWithMapFD([](int fd) {
+                char buf[64];
+                TEST_PCHECK(read(fd, buf, sizeof(buf)) == 0);
+              }),
+              IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(ProcSelfUidGidMapTest, IdentityMapOwnID) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+  uint32_t id = CurrentID();
+  std::string line = absl::StrCat(id, " ", id, " 1");
+  EXPECT_THAT(
+      InNewUserNamespaceWithMapFD([&](int fd) {
+        DenySelfSetgroups();
+        TEST_PCHECK(write(fd, line.c_str(), line.size()) == line.size());
+      }),
+      IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(ProcSelfUidGidMapTest, TrailingNewlineAndNULIgnored) {
+  // This is identical to IdentityMapOwnID, except that a trailing newline, NUL,
+  // and an invalid (incomplete) map entry are appended to the valid entry. The
+  // newline should be accepted, and everything after the NUL should be ignored.
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+  uint32_t id = CurrentID();
+  std::string line = absl::StrCat(id, " ", id, " 1\n\0 4 3");
+  EXPECT_THAT(
+      InNewUserNamespaceWithMapFD([&](int fd) {
+        DenySelfSetgroups();
+        // The write should return the full size of the write, even though
+        // characters after the NUL were ignored.
+        TEST_PCHECK(write(fd, line.c_str(), line.size()) == line.size());
+      }),
+      IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(ProcSelfUidGidMapTest, NonIdentityMapOwnID) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+  uint32_t id = CurrentID();
+  uint32_t id2 = another_id(id);
+  std::string line = absl::StrCat(id2, " ", id, " 1");
+  EXPECT_THAT(
+      InNewUserNamespaceWithMapFD([&](int fd) {
+        DenySelfSetgroups();
+        TEST_PCHECK(write(fd, line.c_str(), line.size()) == line.size());
+      }),
+      IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(ProcSelfUidGidMapTest, MapOtherID) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+  // Whether or not we have CAP_SET*ID is irrelevant: the process running in the
+  // new (child) user namespace won't have any capabilities in the current
+  // (parent) user namespace, which is needed.
+  uint32_t id = CurrentID();
+  uint32_t id2 = another_id(id);
+  std::string line = absl::StrCat(id, " ", id2, " 1");
+  EXPECT_THAT(InNewUserNamespaceWithMapFD([&](int fd) {
+                DenySelfSetgroups();
+                TEST_PCHECK(write(fd, line.c_str(), line.size()) < 0);
+                TEST_CHECK(errno == EPERM);
+              }),
+              IsPosixErrorOkAndHolds(0));
+}
+
+INSTANTIATE_TEST_SUITE_P(All, ProcSelfUidGidMapTest,
+                         ::testing::ValuesIn(UidGidMapTestParams()),
+                         DescribeTestParam);
+
+TEST_P(ProcPidUidGidMapTest, MapOtherIDPrivileged) {
+  // Like ProcSelfUidGidMapTest_MapOtherID, but since we have CAP_SET*ID in the
+  // parent user namespace (this one), we can map IDs that aren't ours.
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveSetIDCapability()));
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(AllIDsMapped()));
+
+  pid_t child_pid;
+  Cleanup cleanup_child;
+  std::tie(child_pid, cleanup_child) =
+      ASSERT_NO_ERRNO_AND_VALUE(CreateProcessInNewUserNamespace());
+
+  uint32_t id = CurrentID();
+  uint32_t id2 = another_id(id);
+  std::string line = absl::StrCat(id, " ", id2, " 1");
+  DenyPidSetgroups(child_pid);
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenMapFile(child_pid));
+  EXPECT_THAT(write(fd.get(), line.c_str(), line.size()),
+              SyscallSucceedsWithValue(line.size()));
+}
+
+TEST_P(ProcPidUidGidMapTest, MapAnyIDsPrivileged) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveSetIDCapability()));
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(AllIDsMapped()));
+
+  pid_t child_pid;
+  Cleanup cleanup_child;
+  std::tie(child_pid, cleanup_child) =
+      ASSERT_NO_ERRNO_AND_VALUE(CreateProcessInNewUserNamespace());
+
+  // Test all of:
+  //
+  // - Mapping ranges of length > 1
+  //
+  // - Mapping multiple ranges
+  //
+  // - Non-identity mappings
+  char entries[] = "2 0 2\n4 6 2";
+  DenyPidSetgroups(child_pid);
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenMapFile(child_pid));
+  EXPECT_THAT(write(fd.get(), entries, sizeof(entries)),
+              SyscallSucceedsWithValue(sizeof(entries)));
+}
+
+INSTANTIATE_TEST_SUITE_P(All, ProcPidUidGidMapTest,
+                         ::testing::ValuesIn(UidGidMapTestParams()),
+                         DescribeTestParam);
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/pselect.cc b/test/syscalls/linux/pselect.cc
new file mode 100644
index 000000000..4e43c4d7f
--- /dev/null
+++ b/test/syscalls/linux/pselect.cc
@@ -0,0 +1,190 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/select.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/base_poll_test.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+struct MaskWithSize {
+  sigset_t* mask;
+  size_t mask_size;
+};
+
+// Linux and glibc have a different idea of the sizeof sigset_t. When calling
+// the syscall directly, use what the kernel expects.
+unsigned kSigsetSize = SIGRTMAX / 8;
+
+// Linux pselect(2) differs from the glibc wrapper function in that Linux
+// updates the timeout with the amount of time remaining. In order to test this
+// behavior we need to use the syscall directly.
+int syscallPselect6(int nfds, fd_set* readfds, fd_set* writefds,
+                    fd_set* exceptfds, struct timespec* timeout,
+                    const MaskWithSize* mask_with_size) {
+  return syscall(SYS_pselect6, nfds, readfds, writefds, exceptfds, timeout,
+                 mask_with_size);
+}
+
+class PselectTest : public BasePollTest {
+ protected:
+  void SetUp() override { BasePollTest::SetUp(); }
+  void TearDown() override { BasePollTest::TearDown(); }
+};
+
+// See that when there are no FD sets, pselect behaves like sleep.
+TEST_F(PselectTest, NullFds) {
+  struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10));
+  ASSERT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, nullptr),
+              SyscallSucceeds());
+  EXPECT_EQ(timeout.tv_sec, 0);
+  EXPECT_EQ(timeout.tv_nsec, 0);
+
+  timeout = absl::ToTimespec(absl::Milliseconds(10));
+  ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr),
+              SyscallSucceeds());
+  EXPECT_EQ(timeout.tv_sec, 0);
+  EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+TEST_F(PselectTest, ClosedFds) {
+  fd_set read_set;
+  FD_ZERO(&read_set);
+  int fd;
+  ASSERT_THAT(fd = dup(1), SyscallSucceeds());
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+  FD_SET(fd, &read_set);
+  struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10));
+  EXPECT_THAT(
+      syscallPselect6(fd + 1, &read_set, nullptr, nullptr, &timeout, nullptr),
+      SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(PselectTest, ZeroTimeout) {
+  struct timespec timeout = {};
+  ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr),
+              SyscallSucceeds());
+  EXPECT_EQ(timeout.tv_sec, 0);
+  EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+// If random S/R interrupts the pselect, SIGALRM may be delivered before pselect
+// restarts, causing the pselect to hang forever.
+TEST_F(PselectTest, NoTimeout_NoRandomSave) {
+  // When there's no timeout, pselect may never return so set a timer.
+  SetTimer(absl::Milliseconds(100));
+  // See that we get interrupted by the timer.
+  ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, nullptr, nullptr),
+              SyscallFailsWithErrno(EINTR));
+  EXPECT_TRUE(TimerFired());
+}
+
+TEST_F(PselectTest, InvalidTimeoutNegative) {
+  struct timespec timeout = absl::ToTimespec(absl::Seconds(-1));
+  ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_EQ(timeout.tv_sec, -1);
+  EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+TEST_F(PselectTest, InvalidTimeoutNotNormalized) {
+  struct timespec timeout = {0, 1000000001};
+  ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_EQ(timeout.tv_sec, 0);
+  EXPECT_EQ(timeout.tv_nsec, 1000000001);
+}
+
+TEST_F(PselectTest, EmptySigMaskInvalidMaskSize) {
+  struct timespec timeout = {};
+  MaskWithSize invalid = {nullptr, 7};
+  EXPECT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, &invalid),
+              SyscallSucceeds());
+}
+
+TEST_F(PselectTest, EmptySigMaskValidMaskSize) {
+  struct timespec timeout = {};
+  MaskWithSize invalid = {nullptr, 8};
+  EXPECT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, &invalid),
+              SyscallSucceeds());
+}
+
+TEST_F(PselectTest, InvalidMaskSize) {
+  struct timespec timeout = {};
+  sigset_t sigmask;
+  ASSERT_THAT(sigemptyset(&sigmask), SyscallSucceeds());
+  MaskWithSize invalid = {&sigmask, 7};
+  EXPECT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &invalid),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Verify that signals blocked by the pselect mask (that would otherwise be
+// allowed) do not interrupt pselect.
+TEST_F(PselectTest, SignalMaskBlocksSignal) {
+  absl::Duration duration(absl::Seconds(30));
+  struct timespec timeout = absl::ToTimespec(duration);
+  absl::Duration timer_duration(absl::Seconds(10));
+
+  // Call with a mask that blocks SIGALRM. See that pselect is not interrupted
+  // (i.e. returns 0) and that upon completion, the timer has fired.
+  sigset_t mask;
+  ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+  ASSERT_THAT(sigaddset(&mask, SIGALRM), SyscallSucceeds());
+  MaskWithSize mask_with_size = {&mask, kSigsetSize};
+  SetTimer(timer_duration);
+  MaybeSave();
+  ASSERT_FALSE(TimerFired());
+  ASSERT_THAT(
+      syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &mask_with_size),
+      SyscallSucceeds());
+  EXPECT_TRUE(TimerFired());
+  EXPECT_EQ(absl::DurationFromTimespec(timeout), absl::Duration());
+}
+
+// Verify that signals allowed by the pselect mask (that would otherwise be
+// blocked) interrupt pselect.
+TEST_F(PselectTest, SignalMaskAllowsSignal) {
+  absl::Duration duration = absl::Seconds(30);
+  struct timespec timeout = absl::ToTimespec(duration);
+  absl::Duration timer_duration = absl::Seconds(10);
+
+  sigset_t mask;
+  ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+
+  // Block SIGALRM.
+  auto cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGALRM));
+
+  // Call with a mask that unblocks SIGALRM. See that pselect is interrupted.
+  MaskWithSize mask_with_size = {&mask, kSigsetSize};
+  SetTimer(timer_duration);
+  MaybeSave();
+  ASSERT_FALSE(TimerFired());
+  ASSERT_THAT(
+      syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &mask_with_size),
+      SyscallFailsWithErrno(EINTR));
+  EXPECT_TRUE(TimerFired());
+  EXPECT_GT(absl::DurationFromTimespec(timeout), absl::Duration());
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc
new file mode 100644
index 000000000..926690eb8
--- /dev/null
+++ b/test/syscalls/linux/ptrace.cc
@@ -0,0 +1,1229 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <elf.h>
+#include <signal.h>
+#include <stddef.h>
+#include <sys/ptrace.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/platform_util.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/time_util.h"
+
+ABSL_FLAG(bool, ptrace_test_execve_child, false,
+          "If true, run the "
+          "PtraceExecveTest_Execve_GetRegs_PeekUser_SIGKILL_TraceClone_"
+          "TraceExit child workload.");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// PTRACE_GETSIGMASK and PTRACE_SETSIGMASK are not defined until glibc 2.23
+// (fb53a27c5741 "Add new header definitions from Linux 4.4 (plus older ptrace
+// definitions)").
+constexpr auto kPtraceGetSigMask = static_cast<__ptrace_request>(0x420a);
+constexpr auto kPtraceSetSigMask = static_cast<__ptrace_request>(0x420b);
+
+// PTRACE_SYSEMU is not defined until glibc 2.27 (c48831d0eebf "linux/x86: sync
+// sys/ptrace.h with Linux 4.14 [BZ #22433]").
+constexpr auto kPtraceSysemu = static_cast<__ptrace_request>(31);
+
+// PTRACE_EVENT_STOP is not defined until glibc 2.26 (3f67d1a7021e "Add Linux
+// PTRACE_EVENT_STOP").
+constexpr int kPtraceEventStop = 128;
+
+// Sends sig to the current process with tgkill(2).
+//
+// glibc's raise(2) may change the signal mask before sending the signal. These
+// extra syscalls make tests of syscall, signal interception, etc. difficult to
+// write.
+void RaiseSignal(int sig) {
+  pid_t pid = getpid();
+  TEST_PCHECK(pid > 0);
+  pid_t tid = gettid();
+  TEST_PCHECK(tid > 0);
+  TEST_PCHECK(tgkill(pid, tid, sig) == 0);
+}
+
+// Returns the Yama ptrace scope.
+PosixErrorOr<int> YamaPtraceScope() {
+  constexpr char kYamaPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope";
+
+  ASSIGN_OR_RETURN_ERRNO(bool exists, Exists(kYamaPtraceScopePath));
+  if (!exists) {
+    // File doesn't exist means no Yama, so the scope is disabled -> 0.
+    return 0;
+  }
+
+  std::string contents;
+  RETURN_IF_ERRNO(GetContents(kYamaPtraceScopePath, &contents));
+
+  int scope;
+  if (!absl::SimpleAtoi(contents, &scope)) {
+    return PosixError(EINVAL, absl::StrCat(contents, ": not a valid number"));
+  }
+
+  return scope;
+}
+
+TEST(PtraceTest, AttachSelf) {
+  EXPECT_THAT(ptrace(PTRACE_ATTACH, gettid(), 0, 0),
+              SyscallFailsWithErrno(EPERM));
+}
+
+TEST(PtraceTest, AttachSameThreadGroup) {
+  pid_t const tid = gettid();
+  ScopedThread([&] {
+    EXPECT_THAT(ptrace(PTRACE_ATTACH, tid, 0, 0), SyscallFailsWithErrno(EPERM));
+  });
+}
+
+TEST(PtraceTest, AttachParent_PeekData_PokeData_SignalSuppression) {
+  // Yama prevents attaching to a parent. Skip the test if the scope is anything
+  // except disabled.
+  SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 0);
+
+  constexpr long kBeforePokeDataValue = 10;
+  constexpr long kAfterPokeDataValue = 20;
+
+  volatile long word = kBeforePokeDataValue;
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+
+    // Attach to the parent.
+    pid_t const parent_pid = getppid();
+    TEST_PCHECK(ptrace(PTRACE_ATTACH, parent_pid, 0, 0) == 0);
+    MaybeSave();
+
+    // Block until the parent enters signal-delivery-stop as a result of the
+    // SIGSTOP sent by PTRACE_ATTACH.
+    int status;
+    TEST_PCHECK(waitpid(parent_pid, &status, 0) == parent_pid);
+    MaybeSave();
+    TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+    // Replace the value of word in the parent process with kAfterPokeDataValue.
+    long const parent_word = ptrace(PTRACE_PEEKDATA, parent_pid, &word, 0);
+    MaybeSave();
+    TEST_CHECK(parent_word == kBeforePokeDataValue);
+    TEST_PCHECK(
+        ptrace(PTRACE_POKEDATA, parent_pid, &word, kAfterPokeDataValue) == 0);
+    MaybeSave();
+
+    // Detach from the parent and suppress the SIGSTOP. If the SIGSTOP is not
+    // suppressed, the parent will hang in group-stop, causing the test to time
+    // out.
+    TEST_PCHECK(ptrace(PTRACE_DETACH, parent_pid, 0, 0) == 0);
+    MaybeSave();
+    _exit(0);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to complete.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+
+  // Check that the child's PTRACE_POKEDATA was effective.
+  EXPECT_EQ(kAfterPokeDataValue, word);
+}
+
+TEST(PtraceTest, GetSigMask) {
+  // glibc and the Linux kernel define a sigset_t with different sizes. To avoid
+  // creating a kernel_sigset_t and recreating all the modification functions
+  // (sigemptyset, etc), we just hardcode the kernel sigset size.
+  constexpr int kSizeofKernelSigset = 8;
+  constexpr int kBlockSignal = SIGUSR1;
+  sigset_t blocked;
+  sigemptyset(&blocked);
+  sigaddset(&blocked, kBlockSignal);
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+
+    // Install a signal handler for kBlockSignal to avoid termination and block
+    // it.
+    TEST_PCHECK(signal(
+                    kBlockSignal, +[](int signo) {}) != SIG_ERR);
+    MaybeSave();
+    TEST_PCHECK(sigprocmask(SIG_SETMASK, &blocked, nullptr) == 0);
+    MaybeSave();
+
+    // Enable tracing.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    MaybeSave();
+
+    // This should be blocked.
+    RaiseSignal(kBlockSignal);
+
+    // This should be suppressed by parent, who will change signal mask in the
+    // meantime, which means kBlockSignal should be delivered once this resumes.
+    RaiseSignal(SIGSTOP);
+
+    _exit(0);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Get current signal mask.
+  sigset_t set;
+  EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, kSizeofKernelSigset, &set),
+              SyscallSucceeds());
+  EXPECT_THAT(blocked, EqualsSigset(set));
+
+  // Try to get current signal mask with bad size argument.
+  EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, 0, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Try to set bad signal mask.
+  sigset_t* bad_addr = reinterpret_cast<sigset_t*>(-1);
+  EXPECT_THAT(
+      ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, bad_addr),
+      SyscallFailsWithErrno(EFAULT));
+
+  // Set signal mask to empty set.
+  sigset_t set1;
+  sigemptyset(&set1);
+  EXPECT_THAT(ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, &set1),
+              SyscallSucceeds());
+
+  // Suppress SIGSTOP and resume the child. It should re-enter
+  // signal-delivery-stop for kBlockSignal.
+  ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kBlockSignal)
+      << " status " << status;
+
+  ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  // Let's see that process exited normally.
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+TEST(PtraceTest, GetSiginfo_SetSiginfo_SignalInjection) {
+  constexpr int kOriginalSigno = SIGUSR1;
+  constexpr int kInjectedSigno = SIGUSR2;
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+
+    // Override all signal handlers.
+    struct sigaction sa = {};
+    sa.sa_handler = +[](int signo) { _exit(signo); };
+    TEST_PCHECK(sigfillset(&sa.sa_mask) == 0);
+    for (int signo = 1; signo < 32; signo++) {
+      if (signo == SIGKILL || signo == SIGSTOP) {
+        continue;
+      }
+      TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0);
+    }
+    for (int signo = SIGRTMIN; signo <= SIGRTMAX; signo++) {
+      TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0);
+    }
+
+    // Unblock all signals.
+    TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0);
+    MaybeSave();
+
+    // Send ourselves kOriginalSignal while ptraced and exit with the signal we
+    // actually receive via the signal handler, if any, or 0 if we don't receive
+    // a signal.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    MaybeSave();
+    RaiseSignal(kOriginalSigno);
+    _exit(0);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to send itself kOriginalSigno and enter
+  // signal-delivery-stop.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno)
+      << " status " << status;
+
+  siginfo_t siginfo = {};
+  ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
+              SyscallSucceeds());
+  EXPECT_EQ(kOriginalSigno, siginfo.si_signo);
+  EXPECT_EQ(SI_TKILL, siginfo.si_code);
+
+  // Replace the signal with kInjectedSigno, and check that the child exits
+  // with kInjectedSigno, indicating that signal injection was successful.
+  siginfo.si_signo = kInjectedSigno;
+  ASSERT_THAT(ptrace(PTRACE_SETSIGINFO, child_pid, 0, &siginfo),
+              SyscallSucceeds());
+  ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, kInjectedSigno),
+              SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == kInjectedSigno)
+      << " status " << status;
+}
+
+TEST(PtraceTest, SIGKILLDoesNotCauseSignalDeliveryStop) {
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    MaybeSave();
+    RaiseSignal(SIGKILL);
+    TEST_CHECK_MSG(false, "Survived SIGKILL?");
+    _exit(1);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Expect the child to die to SIGKILL without entering signal-delivery-stop.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+      << " status " << status;
+}
+
+TEST(PtraceTest, PtraceKill) {
+  constexpr int kOriginalSigno = SIGUSR1;
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    MaybeSave();
+
+    // PTRACE_KILL only works if tracee has entered signal-delivery-stop.
+    RaiseSignal(kOriginalSigno);
+    TEST_CHECK_MSG(false, "Failed to kill the process?");
+    _exit(0);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to send itself kOriginalSigno and enter
+  // signal-delivery-stop.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno)
+      << " status " << status;
+
+  ASSERT_THAT(ptrace(PTRACE_KILL, child_pid, 0, 0), SyscallSucceeds());
+
+  // Expect the child to die with SIGKILL.
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+      << " status " << status;
+}
+
+TEST(PtraceTest, GetRegSet) {
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+
+    // Enable tracing.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    MaybeSave();
+
+    // Use kill explicitly because we check the syscall argument register below.
+    kill(getpid(), SIGSTOP);
+
+    _exit(0);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Get the general registers.
+  struct user_regs_struct regs;
+  struct iovec iov;
+  iov.iov_base = &regs;
+  iov.iov_len = sizeof(regs);
+  EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov),
+              SyscallSucceeds());
+
+  // Read exactly the full register set.
+  EXPECT_EQ(iov.iov_len, sizeof(regs));
+
+#if defined(__x86_64__)
+  // Child called kill(2), with SIGSTOP as arg 2.
+  EXPECT_EQ(regs.rsi, SIGSTOP);
+#elif defined(__aarch64__)
+  EXPECT_EQ(regs.regs[1], SIGSTOP);
+#endif
+
+  // Suppress SIGSTOP and resume the child.
+  ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  // Let's see that process exited normally.
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+TEST(PtraceTest, AttachingConvertsGroupStopToPtraceStop) {
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+    while (true) {
+      pause();
+    }
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // SIGSTOP the child and wait for it to stop.
+  ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Attach to the child and expect it to re-enter a traced group-stop despite
+  // already being stopped.
+  ASSERT_THAT(ptrace(PTRACE_ATTACH, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Verify that the child is ptrace-stopped by checking that it can receive
+  // ptrace commands requiring a ptrace-stop.
+  EXPECT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, 0), SyscallSucceeds());
+
+  // Group-stop is distinguished from signal-delivery-stop by PTRACE_GETSIGINFO
+  // failing with EINVAL.
+  siginfo_t siginfo = {};
+  EXPECT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Detach from the child and expect it to stay stopped without a notification.
+  ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED | WNOHANG),
+              SyscallSucceedsWithValue(0));
+
+  // Sending it SIGCONT should cause it to leave its stop.
+  ASSERT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, WCONTINUED),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFCONTINUED(status)) << " status " << status;
+
+  // Clean up the child.
+  ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+      << " status " << status;
+}
+
+// Fixture for tests parameterized by whether or not to use PTRACE_O_TRACEEXEC.
+class PtraceExecveTest : public ::testing::TestWithParam<bool> {
+ protected:
+  bool TraceExec() const { return GetParam(); }
+};
+
+TEST_P(PtraceExecveTest, Execve_GetRegs_PeekUser_SIGKILL_TraceClone_TraceExit) {
+  ExecveArray const owned_child_argv = {"/proc/self/exe",
+                                        "--ptrace_test_execve_child"};
+  char* const* const child_argv = owned_child_argv.get();
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process. The test relies on calling execve() in a non-leader
+    // thread; pthread_create() isn't async-signal-safe, so the safest way to
+    // do this is to execve() first, then enable tracing and run the expected
+    // child process behavior in the new subprocess.
+    execve(child_argv[0], child_argv, /* envp = */ nullptr);
+    TEST_PCHECK_MSG(false, "Survived execve to test child");
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Enable PTRACE_O_TRACECLONE so we can get the ID of the child's non-leader
+  // thread, PTRACE_O_TRACEEXIT so we can observe the leader's death, and
+  // PTRACE_O_TRACEEXEC if required by the test. (The leader doesn't call
+  // execve, but options should be inherited across clone.)
+  long opts = PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT;
+  if (TraceExec()) {
+    opts |= PTRACE_O_TRACEEXEC;
+  }
+  ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, opts), SyscallSucceeds());
+
+  // Suppress the SIGSTOP and wait for the child's leader thread to report
+  // PTRACE_EVENT_CLONE. Get the new thread's ID from the event.
+  ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_CLONE << 8), status >> 8);
+  unsigned long eventmsg;
+  ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg),
+              SyscallSucceeds());
+  pid_t const nonleader_tid = eventmsg;
+  pid_t const leader_tid = child_pid;
+
+  // The new thread should be ptraced and in signal-delivery-stop by SIGSTOP due
+  // to PTRACE_O_TRACECLONE.
+  //
+  // Before bf959931ddb88c4e4366e96dd22e68fa0db9527c "wait/ptrace: assume __WALL
+  // if the child is traced" (4.7) , waiting on it requires __WCLONE since, as a
+  // non-leader, its termination signal is 0. After, a standard wait is
+  // sufficient.
+  ASSERT_THAT(waitpid(nonleader_tid, &status, __WCLONE),
+              SyscallSucceedsWithValue(nonleader_tid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Resume both child threads.
+  for (pid_t const tid : {leader_tid, nonleader_tid}) {
+    ASSERT_THAT(ptrace(PTRACE_CONT, tid, 0, 0), SyscallSucceeds());
+  }
+
+  // The non-leader child thread should call execve, causing the leader thread
+  // to enter PTRACE_EVENT_EXIT with an apparent exit code of 0. At this point,
+  // the leader has not yet exited, so the non-leader should be blocked in
+  // execve.
+  ASSERT_THAT(waitpid(leader_tid, &status, 0),
+              SyscallSucceedsWithValue(leader_tid));
+  EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8);
+  ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg),
+              SyscallSucceeds());
+  EXPECT_TRUE(WIFEXITED(eventmsg) && WEXITSTATUS(eventmsg) == 0)
+      << " eventmsg " << eventmsg;
+  EXPECT_THAT(waitpid(nonleader_tid, &status, __WCLONE | WNOHANG),
+              SyscallSucceedsWithValue(0));
+
+  // Allow the leader to continue exiting. This should allow the non-leader to
+  // complete its execve, causing the original leader to be reaped without
+  // further notice and the non-leader to steal its ID.
+  ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(leader_tid, &status, 0),
+              SyscallSucceedsWithValue(leader_tid));
+  if (TraceExec()) {
+    // If PTRACE_O_TRACEEXEC was enabled, the execing thread should be in
+    // PTRACE_EVENT_EXEC-stop, with the event message set to its old thread ID.
+    EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXEC << 8), status >> 8);
+    ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg),
+                SyscallSucceeds());
+    EXPECT_EQ(nonleader_tid, eventmsg);
+  } else {
+    // Otherwise, the execing thread should have received SIGTRAP and should now
+    // be in signal-delivery-stop.
+    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+        << " status " << status;
+  }
+
+#ifdef __x86_64__
+  {
+    // CS should be 0x33, indicating an 64-bit binary.
+    constexpr uint64_t kAMD64UserCS = 0x33;
+    EXPECT_THAT(ptrace(PTRACE_PEEKUSER, leader_tid,
+                       offsetof(struct user_regs_struct, cs), 0),
+                SyscallSucceedsWithValue(kAMD64UserCS));
+    struct user_regs_struct regs = {};
+    ASSERT_THAT(ptrace(PTRACE_GETREGS, leader_tid, 0, &regs),
+                SyscallSucceeds());
+    EXPECT_EQ(kAMD64UserCS, regs.cs);
+  }
+#endif  // defined(__x86_64__)
+
+  // PTRACE_O_TRACEEXIT should have been inherited across execve. Send SIGKILL,
+  // which should end the PTRACE_EVENT_EXEC-stop or signal-delivery-stop and
+  // leave the child in PTRACE_EVENT_EXIT-stop.
+  ASSERT_THAT(kill(leader_tid, SIGKILL), SyscallSucceeds());
+  ASSERT_THAT(waitpid(leader_tid, &status, 0),
+              SyscallSucceedsWithValue(leader_tid));
+  EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8);
+  ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg),
+              SyscallSucceeds());
+  EXPECT_TRUE(WIFSIGNALED(eventmsg) && WTERMSIG(eventmsg) == SIGKILL)
+      << " eventmsg " << eventmsg;
+
+  // End the PTRACE_EVENT_EXIT stop, allowing the child to exit.
+  ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(leader_tid, &status, 0),
+              SyscallSucceedsWithValue(leader_tid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+      << " status " << status;
+}
+
+[[noreturn]] void RunExecveChild() {
+  // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
+  TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+  MaybeSave();
+  RaiseSignal(SIGSTOP);
+  MaybeSave();
+
+  // Call execve() in a non-leader thread. As long as execve() succeeds, what
+  // exactly we execve() shouldn't really matter, since the tracer should kill
+  // us after execve() completes.
+  ScopedThread t([&] {
+    ExecveArray const owned_child_argv = {"/proc/self/exe",
+                                          "--this_flag_shouldnt_exist"};
+    char* const* const child_argv = owned_child_argv.get();
+    execve(child_argv[0], child_argv, /* envp = */ nullptr);
+    TEST_PCHECK_MSG(false, "Survived execve? (thread)");
+  });
+  t.Join();
+  TEST_CHECK_MSG(false, "Survived execve? (main)");
+  _exit(1);
+}
+
+INSTANTIATE_TEST_SUITE_P(TraceExec, PtraceExecveTest, ::testing::Bool());
+
+// This test has expectations on when syscall-enter/exit-stops occur that are
+// violated if saving occurs, since saving interrupts all syscalls, causing
+// premature syscall-exit.
+TEST(PtraceTest,
+     ExitWhenParentIsNotTracer_Syscall_TraceVfork_TraceVforkDone_NoRandomSave) {
+  constexpr int kExitTraceeExitCode = 99;
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+
+    // Block SIGCHLD so it doesn't interrupt wait4.
+    sigset_t mask;
+    TEST_PCHECK(sigemptyset(&mask) == 0);
+    TEST_PCHECK(sigaddset(&mask, SIGCHLD) == 0);
+    TEST_PCHECK(sigprocmask(SIG_SETMASK, &mask, nullptr) == 0);
+    MaybeSave();
+
+    // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    MaybeSave();
+    RaiseSignal(SIGSTOP);
+    MaybeSave();
+
+    // Spawn a vfork child that exits immediately, and reap it. Don't save
+    // after vfork since the parent expects to see wait4 as the next syscall.
+    pid_t const pid = vfork();
+    if (pid == 0) {
+      _exit(kExitTraceeExitCode);
+    }
+    TEST_PCHECK_MSG(pid > 0, "vfork failed");
+
+    int status;
+    TEST_PCHECK(wait4(pid, &status, 0, nullptr) > 0);
+    MaybeSave();
+    TEST_CHECK(WIFEXITED(status) && WEXITSTATUS(status) == kExitTraceeExitCode);
+    _exit(0);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+  int status;
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Enable PTRACE_O_TRACEVFORK so we can get the ID of the grandchild,
+  // PTRACE_O_TRACEVFORKDONE so we can observe PTRACE_EVENT_VFORK_DONE, and
+  // PTRACE_O_TRACESYSGOOD so syscall-enter/exit-stops are unambiguously
+  // indicated by a stop signal of SIGTRAP|0x80 rather than just SIGTRAP.
+  ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0,
+                     PTRACE_O_TRACEVFORK | PTRACE_O_TRACEVFORKDONE |
+                         PTRACE_O_TRACESYSGOOD),
+              SyscallSucceeds());
+
+  // Suppress the SIGSTOP and wait for the child to report PTRACE_EVENT_VFORK.
+  // Get the new process' ID from the event.
+  ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK << 8), status >> 8);
+  unsigned long eventmsg;
+  ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg),
+              SyscallSucceeds());
+  pid_t const grandchild_pid = eventmsg;
+
+  // The grandchild should be traced by us and in signal-delivery-stop by
+  // SIGSTOP due to PTRACE_O_TRACEVFORK. This allows us to wait on it even
+  // though we're not its parent.
+  ASSERT_THAT(waitpid(grandchild_pid, &status, 0),
+              SyscallSucceedsWithValue(grandchild_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Resume the child with PTRACE_SYSCALL. Since the grandchild is still in
+  // signal-delivery-stop, the child should remain in vfork() waiting for the
+  // grandchild to exec or exit.
+  ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+  absl::SleepFor(absl::Seconds(1));
+  ASSERT_THAT(waitpid(child_pid, &status, WNOHANG),
+              SyscallSucceedsWithValue(0));
+
+  // Suppress the grandchild's SIGSTOP and wait for the grandchild to exit. Pass
+  // WNOWAIT to waitid() so that we don't acknowledge the grandchild's exit yet.
+  ASSERT_THAT(ptrace(PTRACE_CONT, grandchild_pid, 0, 0), SyscallSucceeds());
+  siginfo_t siginfo = {};
+  ASSERT_THAT(waitid(P_PID, grandchild_pid, &siginfo, WEXITED | WNOWAIT),
+              SyscallSucceeds());
+  EXPECT_EQ(SIGCHLD, siginfo.si_signo);
+  EXPECT_EQ(CLD_EXITED, siginfo.si_code);
+  EXPECT_EQ(kExitTraceeExitCode, siginfo.si_status);
+  EXPECT_EQ(grandchild_pid, siginfo.si_pid);
+  EXPECT_EQ(getuid(), siginfo.si_uid);
+
+  // The child should now be in PTRACE_EVENT_VFORK_DONE stop. The event
+  // message should still be the grandchild's PID.
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK_DONE << 8), status >> 8);
+  ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg),
+              SyscallSucceeds());
+  EXPECT_EQ(grandchild_pid, eventmsg);
+
+  // Resume the child with PTRACE_SYSCALL again and expect it to enter
+  // syscall-exit-stop for vfork() or clone(), either of which should return the
+  // grandchild's PID from the syscall. Aside from PTRACE_O_TRACESYSGOOD,
+  // syscall-stops are distinguished from signal-delivery-stop by
+  // PTRACE_GETSIGINFO returning a siginfo for which si_code == SIGTRAP or
+  // SIGTRAP|0x80.
+  ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
+      << " status " << status;
+  ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
+              SyscallSucceeds());
+  EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80))
+      << "si_code = " << siginfo.si_code;
+
+  {
+    struct user_regs_struct regs = {};
+    struct iovec iov;
+    iov.iov_base = &regs;
+    iov.iov_len = sizeof(regs);
+    EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov),
+                SyscallSucceeds());
+#if defined(__x86_64__)
+    EXPECT_TRUE(regs.orig_rax == SYS_vfork || regs.orig_rax == SYS_clone)
+        << "orig_rax = " << regs.orig_rax;
+    EXPECT_EQ(grandchild_pid, regs.rax);
+#elif defined(__aarch64__)
+    EXPECT_TRUE(regs.regs[8] == SYS_clone) << "regs[8] = " << regs.regs[8];
+    EXPECT_EQ(grandchild_pid, regs.regs[0]);
+#endif  // defined(__x86_64__)
+  }
+
+  // After this point, the child will be making wait4 syscalls that will be
+  // interrupted by saving, so saving is not permitted. Note that this is
+  // explicitly released below once the grandchild exits.
+  DisableSave ds;
+
+  // Resume the child with PTRACE_SYSCALL again and expect it to enter
+  // syscall-enter-stop for wait4().
+  ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
+      << " status " << status;
+  ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
+              SyscallSucceeds());
+  EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80))
+      << "si_code = " << siginfo.si_code;
+#ifdef __x86_64__
+  {
+    EXPECT_THAT(ptrace(PTRACE_PEEKUSER, child_pid,
+                       offsetof(struct user_regs_struct, orig_rax), 0),
+                SyscallSucceedsWithValue(SYS_wait4));
+  }
+#endif  // defined(__x86_64__)
+
+  // Resume the child with PTRACE_SYSCALL again. Since the grandchild is
+  // waiting for the tracer (us) to acknowledge its exit first, wait4 should
+  // block.
+  ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+  absl::SleepFor(absl::Seconds(1));
+  ASSERT_THAT(waitpid(child_pid, &status, WNOHANG),
+              SyscallSucceedsWithValue(0));
+
+  // Acknowledge the grandchild's exit.
+  ASSERT_THAT(waitpid(grandchild_pid, &status, 0),
+              SyscallSucceedsWithValue(grandchild_pid));
+  ds.reset();
+
+  // Now the child should enter syscall-exit-stop for wait4, returning with the
+  // grandchild's PID.
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
+      << " status " << status;
+  {
+    struct user_regs_struct regs = {};
+    struct iovec iov;
+    iov.iov_base = &regs;
+    iov.iov_len = sizeof(regs);
+    EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov),
+                SyscallSucceeds());
+#if defined(__x86_64__)
+    EXPECT_EQ(SYS_wait4, regs.orig_rax);
+    EXPECT_EQ(grandchild_pid, regs.rax);
+#elif defined(__aarch64__)
+    EXPECT_EQ(SYS_wait4, regs.regs[8]);
+    EXPECT_EQ(grandchild_pid, regs.regs[0]);
+#endif  // defined(__x86_64__)
+  }
+
+  // Detach from the child and wait for it to exit.
+  ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+// These tests requires knowledge of architecture-specific syscall convention.
+#ifdef __x86_64__
+TEST(PtraceTest, Int3) {
+  SKIP_IF(PlatformSupportInt3() == PlatformSupport::NotSupported);
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+
+    // Enable tracing.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+
+    // Interrupt 3 - trap to debugger
+    asm("int3");
+
+    _exit(56);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+      << " status " << status;
+
+  ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+
+  // The child should validate the injected return value and then exit normally.
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 56)
+      << " status " << status;
+}
+
+TEST(PtraceTest, Sysemu_PokeUser) {
+  constexpr int kSysemuHelperFirstExitCode = 126;
+  constexpr uint64_t kSysemuInjectedExitGroupReturn = 42;
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+
+    // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    RaiseSignal(SIGSTOP);
+
+    // Try to exit_group, expecting the tracer to skip the syscall and set its
+    // own return value.
+    int const rv = syscall(SYS_exit_group, kSysemuHelperFirstExitCode);
+    TEST_PCHECK_MSG(rv == kSysemuInjectedExitGroupReturn,
+                    "exit_group returned incorrect value");
+
+    _exit(0);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop
+  // for its first exit_group syscall.
+  ASSERT_THAT(ptrace(kPtraceSysemu, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+      << " status " << status;
+
+  struct user_regs_struct regs = {};
+  ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
+  EXPECT_EQ(SYS_exit_group, regs.orig_rax);
+  EXPECT_EQ(-ENOSYS, regs.rax);
+  EXPECT_EQ(kSysemuHelperFirstExitCode, regs.rdi);
+
+  // Replace the exit_group return value, then resume the child, which should
+  // automatically skip the syscall.
+  ASSERT_THAT(
+      ptrace(PTRACE_POKEUSER, child_pid, offsetof(struct user_regs_struct, rax),
+             kSysemuInjectedExitGroupReturn),
+      SyscallSucceeds());
+  ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+
+  // The child should validate the injected return value and then exit normally.
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+// This test also cares about syscall-exit-stop.
+TEST(PtraceTest, ERESTART_NoRandomSave) {
+  constexpr int kSigno = SIGUSR1;
+
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+
+    // Ignore, but unblock, kSigno.
+    struct sigaction sa = {};
+    sa.sa_handler = SIG_IGN;
+    TEST_PCHECK(sigfillset(&sa.sa_mask) == 0);
+    TEST_PCHECK(sigaction(kSigno, &sa, nullptr) == 0);
+    MaybeSave();
+    TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0);
+    MaybeSave();
+
+    // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    RaiseSignal(SIGSTOP);
+
+    // Invoke the pause syscall, which normally should not return until we
+    // receive a signal that "either terminates the process or causes the
+    // invocation of a signal-catching function".
+    pause();
+
+    _exit(0);
+  }
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // After this point, the child's pause syscall will be interrupted by saving,
+  // so saving is not permitted. Note that this is explicitly released below
+  // once the child is stopped.
+  DisableSave ds;
+
+  // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop for
+  // its pause syscall.
+  ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+      << " status " << status;
+
+  struct user_regs_struct regs = {};
+  ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
+  EXPECT_EQ(SYS_pause, regs.orig_rax);
+  EXPECT_EQ(-ENOSYS, regs.rax);
+
+  // Resume the child with PTRACE_SYSCALL and expect it to block in the pause
+  // syscall.
+  ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+  absl::SleepFor(absl::Seconds(1));
+  ASSERT_THAT(waitpid(child_pid, &status, WNOHANG),
+              SyscallSucceedsWithValue(0));
+
+  // Send the child kSigno, causing it to return ERESTARTNOHAND and enter
+  // syscall-exit-stop from the pause syscall.
+  constexpr int ERESTARTNOHAND = 514;
+  ASSERT_THAT(kill(child_pid, kSigno), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+      << " status " << status;
+  ds.reset();
+
+  ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
+  EXPECT_EQ(SYS_pause, regs.orig_rax);
+  EXPECT_EQ(-ERESTARTNOHAND, regs.rax);
+
+  // Replace the return value from pause with 0, causing pause to not be
+  // restarted despite kSigno being ignored.
+  ASSERT_THAT(ptrace(PTRACE_POKEUSER, child_pid,
+                     offsetof(struct user_regs_struct, rax), 0),
+              SyscallSucceeds());
+
+  // Detach from the child and wait for it to exit.
+  ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+#endif  // defined(__x86_64__)
+
+TEST(PtraceTest, Seize_Interrupt_Listen) {
+  volatile long child_should_spin = 1;
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+    while (child_should_spin) {
+      SleepSafe(absl::Seconds(1));
+    }
+    _exit(1);
+  }
+
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Attach to the child with PTRACE_SEIZE; doing so should not stop the child.
+  ASSERT_THAT(ptrace(PTRACE_SEIZE, child_pid, 0, 0), SyscallSucceeds());
+  int status;
+  EXPECT_THAT(waitpid(child_pid, &status, WNOHANG),
+              SyscallSucceedsWithValue(0));
+
+  // Stop the child with PTRACE_INTERRUPT.
+  ASSERT_THAT(ptrace(PTRACE_INTERRUPT, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_EQ(SIGTRAP | (kPtraceEventStop << 8), status >> 8);
+
+  // Unset child_should_spin to verify that the child never leaves the spin
+  // loop.
+  ASSERT_THAT(ptrace(PTRACE_POKEDATA, child_pid, &child_should_spin, 0),
+              SyscallSucceeds());
+
+  // Send SIGSTOP to the child, then resume it, allowing it to proceed to
+  // signal-delivery-stop.
+  ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds());
+  ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // Release the child from signal-delivery-stop without suppressing the
+  // SIGSTOP, causing it to enter group-stop.
+  ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, SIGSTOP), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_EQ(SIGSTOP | (kPtraceEventStop << 8), status >> 8);
+
+  // "The state of the tracee after PTRACE_LISTEN is somewhat of a gray area: it
+  // is not in any ptrace-stop (ptrace commands won't work on it, and it will
+  // deliver waitpid(2) notifications), but it also may be considered 'stopped'
+  // because it is not executing instructions (is not scheduled), and if it was
+  // in group-stop before PTRACE_LISTEN, it will not respond to signals until
+  // SIGCONT is received." - ptrace(2).
+  ASSERT_THAT(ptrace(PTRACE_LISTEN, child_pid, 0, 0), SyscallSucceeds());
+  EXPECT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0),
+              SyscallFailsWithErrno(ESRCH));
+  EXPECT_THAT(waitpid(child_pid, &status, WNOHANG),
+              SyscallSucceedsWithValue(0));
+  EXPECT_THAT(kill(child_pid, SIGTERM), SyscallSucceeds());
+  absl::SleepFor(absl::Seconds(1));
+  EXPECT_THAT(waitpid(child_pid, &status, WNOHANG),
+              SyscallSucceedsWithValue(0));
+
+  // Send SIGCONT to the child, causing it to leave group-stop and re-trap due
+  // to PTRACE_LISTEN.
+  EXPECT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_EQ(SIGTRAP | (kPtraceEventStop << 8), status >> 8);
+
+  // Detach the child and expect it to exit due to the SIGTERM we sent while
+  // it was stopped by PTRACE_LISTEN.
+  ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGTERM)
+      << " status " << status;
+}
+
+TEST(PtraceTest, Interrupt_Listen_RequireSeize) {
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+    MaybeSave();
+    raise(SIGSTOP);
+    _exit(0);
+  }
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+      << " status " << status;
+
+  // PTRACE_INTERRUPT and PTRACE_LISTEN should fail since the child wasn't
+  // attached with PTRACE_SEIZE, leaving the child in signal-delivery-stop.
+  EXPECT_THAT(ptrace(PTRACE_INTERRUPT, child_pid, 0, 0),
+              SyscallFailsWithErrno(EIO));
+  EXPECT_THAT(ptrace(PTRACE_LISTEN, child_pid, 0, 0),
+              SyscallFailsWithErrno(EIO));
+
+  // Suppress SIGSTOP and detach from the child, expecting it to exit normally.
+  ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+TEST(PtraceTest, SeizeSetOptions) {
+  pid_t const child_pid = fork();
+  if (child_pid == 0) {
+    // In child process.
+    while (true) {
+      SleepSafe(absl::Seconds(1));
+    }
+  }
+
+  // In parent process.
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  // Attach to the child with PTRACE_SEIZE while setting PTRACE_O_TRACESYSGOOD.
+  ASSERT_THAT(ptrace(PTRACE_SEIZE, child_pid, 0, PTRACE_O_TRACESYSGOOD),
+              SyscallSucceeds());
+
+  // Stop the child with PTRACE_INTERRUPT.
+  ASSERT_THAT(ptrace(PTRACE_INTERRUPT, child_pid, 0, 0), SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_EQ(SIGTRAP | (kPtraceEventStop << 8), status >> 8);
+
+  // Resume the child with PTRACE_SYSCALL and wait for it to enter
+  // syscall-enter-stop. The stop signal status from the syscall stop should be
+  // SIGTRAP|0x80, reflecting PTRACE_O_TRACESYSGOOD.
+  ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
+      << " status " << status;
+
+  // Clean up the child.
+  ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+  ASSERT_THAT(waitpid(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  if (WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) {
+    // "SIGKILL kills even within system calls (syscall-exit-stop is not
+    // generated prior to death by SIGKILL). The net effect is that SIGKILL
+    // always kills the process (all its threads), even if some threads of the
+    // process are ptraced." - ptrace(2). This is technically true, but...
+    //
+    // When we send SIGKILL to the child, kernel/signal.c:complete_signal() =>
+    // signal_wake_up(resume=1) kicks the tracee out of the syscall-enter-stop.
+    // The pending SIGKILL causes the syscall to be skipped, but the child
+    // thread still reports syscall-exit before checking for pending signals; in
+    // current kernels, this is
+    // arch/x86/entry/common.c:syscall_return_slowpath() =>
+    // syscall_slow_exit_work() =>
+    // include/linux/tracehook.h:tracehook_report_syscall_exit() =>
+    // ptrace_report_syscall() => kernel/signal.c:ptrace_notify() =>
+    // ptrace_do_notify() => ptrace_stop().
+    //
+    // ptrace_stop() sets the task's state to TASK_TRACED and the task's
+    // exit_code to SIGTRAP|0x80 (passed by ptrace_report_syscall()), then calls
+    // freezable_schedule(). freezable_schedule() eventually reaches
+    // __schedule(), which detects signal_pending_state() due to the pending
+    // SIGKILL, sets the task's state back to TASK_RUNNING, and returns without
+    // descheduling. Thus, the task never enters syscall-exit-stop. However, if
+    // our wait4() => kernel/exit.c:wait_task_stopped() racily observes the
+    // TASK_TRACED state and the non-zero exit code set by ptrace_stop() before
+    // __schedule() sets the state back to TASK_RUNNING, it will return the
+    // task's exit_code as status W_STOPCODE(SIGTRAP|0x80). So we get a spurious
+    // syscall-exit-stop notification, and need to wait4() again for task exit.
+    //
+    // gVisor is not susceptible to this race because
+    // kernel.Task.waitCollectTraceeStopLocked() checks specifically for an
+    // active ptraceStop, which is not initiated if SIGKILL is pending.
+    std::cout << "Observed syscall-exit after SIGKILL" << std::endl;
+    ASSERT_THAT(waitpid(child_pid, &status, 0),
+                SyscallSucceedsWithValue(child_pid));
+  }
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+      << " status " << status;
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  gvisor::testing::TestInit(&argc, &argv);
+
+  if (absl::GetFlag(FLAGS_ptrace_test_execve_child)) {
+    gvisor::testing::RunExecveChild();
+  }
+
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc
new file mode 100644
index 000000000..f9392b9e0
--- /dev/null
+++ b/test/syscalls/linux/pty.cc
@@ -0,0 +1,1627 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/capability.h>
+#include <linux/major.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include <iostream>
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/strings/str_cat.h"
+#include "absl/synchronization/notification.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/pty_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::AnyOf;
+using ::testing::Contains;
+using ::testing::Eq;
+using ::testing::Not;
+
+// Tests Unix98 pseudoterminals.
+//
+// These tests assume that /dev/ptmx exists and is associated with a devpts
+// filesystem mounted at /dev/pts/. While a Linux distribution could
+// theoretically place those anywhere, glibc expects those locations, so they
+// are effectively fixed.
+
+// Minor device number for an unopened ptmx file.
+constexpr int kPtmxMinor = 2;
+
+// The timeout when polling for data from a pty. When data is written to one end
+// of a pty, Linux asynchronously makes it available to the other end, so we
+// have to wait.
+constexpr absl::Duration kTimeout = absl::Seconds(20);
+
+// The maximum line size in bytes returned per read from a pty file.
+constexpr int kMaxLineSize = 4096;
+
+constexpr char kMasterPath[] = "/dev/ptmx";
+
+// glibc defines its own, different, version of struct termios. We care about
+// what the kernel does, not glibc.
+#define KERNEL_NCCS 19
+struct kernel_termios {
+  tcflag_t c_iflag;
+  tcflag_t c_oflag;
+  tcflag_t c_cflag;
+  tcflag_t c_lflag;
+  cc_t c_line;
+  cc_t c_cc[KERNEL_NCCS];
+};
+
+bool operator==(struct kernel_termios const& a,
+                struct kernel_termios const& b) {
+  return memcmp(&a, &b, sizeof(a)) == 0;
+}
+
+// Returns the termios-style control character for the passed character.
+//
+// e.g., for Ctrl-C, i.e., ^C, call ControlCharacter('C').
+//
+// Standard control characters are ASCII bytes 0 through 31.
+constexpr char ControlCharacter(char c) {
+  // A is 1, B is 2, etc.
+  return c - 'A' + 1;
+}
+
+// Returns the printable character the given control character represents.
+constexpr char FromControlCharacter(char c) { return c + 'A' - 1; }
+
+// Returns true if c is a control character.
+//
+// Standard control characters are ASCII bytes 0 through 31.
+constexpr bool IsControlCharacter(char c) { return c <= 31; }
+
+struct Field {
+  const char* name;
+  uint64_t mask;
+  uint64_t value;
+};
+
+// ParseFields returns a string representation of value, using the names in
+// fields.
+std::string ParseFields(const Field* fields, size_t len, uint64_t value) {
+  bool first = true;
+  std::string s;
+  for (size_t i = 0; i < len; i++) {
+    const Field f = fields[i];
+    if ((value & f.mask) == f.value) {
+      if (!first) {
+        s += "|";
+      }
+      s += f.name;
+      first = false;
+      value &= ~f.mask;
+    }
+  }
+
+  if (value) {
+    if (!first) {
+      s += "|";
+    }
+    absl::StrAppend(&s, value);
+  }
+
+  return s;
+}
+
+const Field kIflagFields[] = {
+    {"IGNBRK", IGNBRK, IGNBRK}, {"BRKINT", BRKINT, BRKINT},
+    {"IGNPAR", IGNPAR, IGNPAR}, {"PARMRK", PARMRK, PARMRK},
+    {"INPCK", INPCK, INPCK},    {"ISTRIP", ISTRIP, ISTRIP},
+    {"INLCR", INLCR, INLCR},    {"IGNCR", IGNCR, IGNCR},
+    {"ICRNL", ICRNL, ICRNL},    {"IUCLC", IUCLC, IUCLC},
+    {"IXON", IXON, IXON},       {"IXANY", IXANY, IXANY},
+    {"IXOFF", IXOFF, IXOFF},    {"IMAXBEL", IMAXBEL, IMAXBEL},
+    {"IUTF8", IUTF8, IUTF8},
+};
+
+const Field kOflagFields[] = {
+    {"OPOST", OPOST, OPOST}, {"OLCUC", OLCUC, OLCUC},
+    {"ONLCR", ONLCR, ONLCR}, {"OCRNL", OCRNL, OCRNL},
+    {"ONOCR", ONOCR, ONOCR}, {"ONLRET", ONLRET, ONLRET},
+    {"OFILL", OFILL, OFILL}, {"OFDEL", OFDEL, OFDEL},
+    {"NL0", NLDLY, NL0},     {"NL1", NLDLY, NL1},
+    {"CR0", CRDLY, CR0},     {"CR1", CRDLY, CR1},
+    {"CR2", CRDLY, CR2},     {"CR3", CRDLY, CR3},
+    {"TAB0", TABDLY, TAB0},  {"TAB1", TABDLY, TAB1},
+    {"TAB2", TABDLY, TAB2},  {"TAB3", TABDLY, TAB3},
+    {"BS0", BSDLY, BS0},     {"BS1", BSDLY, BS1},
+    {"FF0", FFDLY, FF0},     {"FF1", FFDLY, FF1},
+    {"VT0", VTDLY, VT0},     {"VT1", VTDLY, VT1},
+    {"XTABS", XTABS, XTABS},
+};
+
+#ifndef IBSHIFT
+// Shift from CBAUD to CIBAUD.
+#define IBSHIFT 16
+#endif
+
+const Field kCflagFields[] = {
+    {"B0", CBAUD, B0},
+    {"B50", CBAUD, B50},
+    {"B75", CBAUD, B75},
+    {"B110", CBAUD, B110},
+    {"B134", CBAUD, B134},
+    {"B150", CBAUD, B150},
+    {"B200", CBAUD, B200},
+    {"B300", CBAUD, B300},
+    {"B600", CBAUD, B600},
+    {"B1200", CBAUD, B1200},
+    {"B1800", CBAUD, B1800},
+    {"B2400", CBAUD, B2400},
+    {"B4800", CBAUD, B4800},
+    {"B9600", CBAUD, B9600},
+    {"B19200", CBAUD, B19200},
+    {"B38400", CBAUD, B38400},
+    {"CS5", CSIZE, CS5},
+    {"CS6", CSIZE, CS6},
+    {"CS7", CSIZE, CS7},
+    {"CS8", CSIZE, CS8},
+    {"CSTOPB", CSTOPB, CSTOPB},
+    {"CREAD", CREAD, CREAD},
+    {"PARENB", PARENB, PARENB},
+    {"PARODD", PARODD, PARODD},
+    {"HUPCL", HUPCL, HUPCL},
+    {"CLOCAL", CLOCAL, CLOCAL},
+    {"B57600", CBAUD, B57600},
+    {"B115200", CBAUD, B115200},
+    {"B230400", CBAUD, B230400},
+    {"B460800", CBAUD, B460800},
+    {"B500000", CBAUD, B500000},
+    {"B576000", CBAUD, B576000},
+    {"B921600", CBAUD, B921600},
+    {"B1000000", CBAUD, B1000000},
+    {"B1152000", CBAUD, B1152000},
+    {"B1500000", CBAUD, B1500000},
+    {"B2000000", CBAUD, B2000000},
+    {"B2500000", CBAUD, B2500000},
+    {"B3000000", CBAUD, B3000000},
+    {"B3500000", CBAUD, B3500000},
+    {"B4000000", CBAUD, B4000000},
+    {"CMSPAR", CMSPAR, CMSPAR},
+    {"CRTSCTS", CRTSCTS, CRTSCTS},
+    {"IB0", CIBAUD, B0 << IBSHIFT},
+    {"IB50", CIBAUD, B50 << IBSHIFT},
+    {"IB75", CIBAUD, B75 << IBSHIFT},
+    {"IB110", CIBAUD, B110 << IBSHIFT},
+    {"IB134", CIBAUD, B134 << IBSHIFT},
+    {"IB150", CIBAUD, B150 << IBSHIFT},
+    {"IB200", CIBAUD, B200 << IBSHIFT},
+    {"IB300", CIBAUD, B300 << IBSHIFT},
+    {"IB600", CIBAUD, B600 << IBSHIFT},
+    {"IB1200", CIBAUD, B1200 << IBSHIFT},
+    {"IB1800", CIBAUD, B1800 << IBSHIFT},
+    {"IB2400", CIBAUD, B2400 << IBSHIFT},
+    {"IB4800", CIBAUD, B4800 << IBSHIFT},
+    {"IB9600", CIBAUD, B9600 << IBSHIFT},
+    {"IB19200", CIBAUD, B19200 << IBSHIFT},
+    {"IB38400", CIBAUD, B38400 << IBSHIFT},
+    {"IB57600", CIBAUD, B57600 << IBSHIFT},
+    {"IB115200", CIBAUD, B115200 << IBSHIFT},
+    {"IB230400", CIBAUD, B230400 << IBSHIFT},
+    {"IB460800", CIBAUD, B460800 << IBSHIFT},
+    {"IB500000", CIBAUD, B500000 << IBSHIFT},
+    {"IB576000", CIBAUD, B576000 << IBSHIFT},
+    {"IB921600", CIBAUD, B921600 << IBSHIFT},
+    {"IB1000000", CIBAUD, B1000000 << IBSHIFT},
+    {"IB1152000", CIBAUD, B1152000 << IBSHIFT},
+    {"IB1500000", CIBAUD, B1500000 << IBSHIFT},
+    {"IB2000000", CIBAUD, B2000000 << IBSHIFT},
+    {"IB2500000", CIBAUD, B2500000 << IBSHIFT},
+    {"IB3000000", CIBAUD, B3000000 << IBSHIFT},
+    {"IB3500000", CIBAUD, B3500000 << IBSHIFT},
+    {"IB4000000", CIBAUD, B4000000 << IBSHIFT},
+};
+
+const Field kLflagFields[] = {
+    {"ISIG", ISIG, ISIG},          {"ICANON", ICANON, ICANON},
+    {"XCASE", XCASE, XCASE},       {"ECHO", ECHO, ECHO},
+    {"ECHOE", ECHOE, ECHOE},       {"ECHOK", ECHOK, ECHOK},
+    {"ECHONL", ECHONL, ECHONL},    {"NOFLSH", NOFLSH, NOFLSH},
+    {"TOSTOP", TOSTOP, TOSTOP},    {"ECHOCTL", ECHOCTL, ECHOCTL},
+    {"ECHOPRT", ECHOPRT, ECHOPRT}, {"ECHOKE", ECHOKE, ECHOKE},
+    {"FLUSHO", FLUSHO, FLUSHO},    {"PENDIN", PENDIN, PENDIN},
+    {"IEXTEN", IEXTEN, IEXTEN},    {"EXTPROC", EXTPROC, EXTPROC},
+};
+
+std::string FormatCC(char c) {
+  if (isgraph(c)) {
+    return std::string(1, c);
+  } else if (c == ' ') {
+    return " ";
+  } else if (c == '\t') {
+    return "\\t";
+  } else if (c == '\r') {
+    return "\\r";
+  } else if (c == '\n') {
+    return "\\n";
+  } else if (c == '\0') {
+    return "\\0";
+  } else if (IsControlCharacter(c)) {
+    return absl::StrCat("^", std::string(1, FromControlCharacter(c)));
+  }
+  return absl::StrCat("\\x", absl::Hex(c));
+}
+
+std::ostream& operator<<(std::ostream& os, struct kernel_termios const& a) {
+  os << "{ c_iflag = "
+     << ParseFields(kIflagFields, ABSL_ARRAYSIZE(kIflagFields), a.c_iflag);
+  os << ", c_oflag = "
+     << ParseFields(kOflagFields, ABSL_ARRAYSIZE(kOflagFields), a.c_oflag);
+  os << ", c_cflag = "
+     << ParseFields(kCflagFields, ABSL_ARRAYSIZE(kCflagFields), a.c_cflag);
+  os << ", c_lflag = "
+     << ParseFields(kLflagFields, ABSL_ARRAYSIZE(kLflagFields), a.c_lflag);
+  os << ", c_line = " << a.c_line;
+  os << ", c_cc = { [VINTR] = '" << FormatCC(a.c_cc[VINTR]);
+  os << "', [VQUIT] = '" << FormatCC(a.c_cc[VQUIT]);
+  os << "', [VERASE] = '" << FormatCC(a.c_cc[VERASE]);
+  os << "', [VKILL] = '" << FormatCC(a.c_cc[VKILL]);
+  os << "', [VEOF] = '" << FormatCC(a.c_cc[VEOF]);
+  os << "', [VTIME] = '" << static_cast<int>(a.c_cc[VTIME]);
+  os << "', [VMIN] = " << static_cast<int>(a.c_cc[VMIN]);
+  os << ", [VSWTC] = '" << FormatCC(a.c_cc[VSWTC]);
+  os << "', [VSTART] = '" << FormatCC(a.c_cc[VSTART]);
+  os << "', [VSTOP] = '" << FormatCC(a.c_cc[VSTOP]);
+  os << "', [VSUSP] = '" << FormatCC(a.c_cc[VSUSP]);
+  os << "', [VEOL] = '" << FormatCC(a.c_cc[VEOL]);
+  os << "', [VREPRINT] = '" << FormatCC(a.c_cc[VREPRINT]);
+  os << "', [VDISCARD] = '" << FormatCC(a.c_cc[VDISCARD]);
+  os << "', [VWERASE] = '" << FormatCC(a.c_cc[VWERASE]);
+  os << "', [VLNEXT] = '" << FormatCC(a.c_cc[VLNEXT]);
+  os << "', [VEOL2] = '" << FormatCC(a.c_cc[VEOL2]);
+  os << "'}";
+  return os;
+}
+
+// Return the default termios settings for a new terminal.
+struct kernel_termios DefaultTermios() {
+  struct kernel_termios t = {};
+  t.c_iflag = IXON | ICRNL;
+  t.c_oflag = OPOST | ONLCR;
+  t.c_cflag = B38400 | CSIZE | CS8 | CREAD;
+  t.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN;
+  t.c_line = 0;
+  t.c_cc[VINTR] = ControlCharacter('C');
+  t.c_cc[VQUIT] = ControlCharacter('\\');
+  t.c_cc[VERASE] = '\x7f';
+  t.c_cc[VKILL] = ControlCharacter('U');
+  t.c_cc[VEOF] = ControlCharacter('D');
+  t.c_cc[VTIME] = '\0';
+  t.c_cc[VMIN] = 1;
+  t.c_cc[VSWTC] = '\0';
+  t.c_cc[VSTART] = ControlCharacter('Q');
+  t.c_cc[VSTOP] = ControlCharacter('S');
+  t.c_cc[VSUSP] = ControlCharacter('Z');
+  t.c_cc[VEOL] = '\0';
+  t.c_cc[VREPRINT] = ControlCharacter('R');
+  t.c_cc[VDISCARD] = ControlCharacter('O');
+  t.c_cc[VWERASE] = ControlCharacter('W');
+  t.c_cc[VLNEXT] = ControlCharacter('V');
+  t.c_cc[VEOL2] = '\0';
+  return t;
+}
+
+// PollAndReadFd tries to read count bytes from buf within timeout.
+//
+// Returns a partial read if some bytes were read.
+//
+// fd must be non-blocking.
+PosixErrorOr<size_t> PollAndReadFd(int fd, void* buf, size_t count,
+                                   absl::Duration timeout) {
+  absl::Time end = absl::Now() + timeout;
+
+  size_t completed = 0;
+  absl::Duration remaining;
+  while ((remaining = end - absl::Now()) > absl::ZeroDuration()) {
+    struct pollfd pfd = {fd, POLLIN, 0};
+    int ret = RetryEINTR(poll)(&pfd, 1, absl::ToInt64Milliseconds(remaining));
+    if (ret < 0) {
+      return PosixError(errno, "poll failed");
+    } else if (ret == 0) {
+      // Timed out.
+      continue;
+    } else if (ret != 1) {
+      return PosixError(EINVAL, absl::StrCat("Bad poll ret ", ret));
+    }
+
+    ssize_t n =
+        ReadFd(fd, static_cast<char*>(buf) + completed, count - completed);
+    if (n < 0) {
+      if (errno == EAGAIN) {
+        // Linux sometimes returns EAGAIN from this read, despite the fact that
+        // poll returned success. Let's just do what do as we are told and try
+        // again.
+        continue;
+      }
+      return PosixError(errno, "read failed");
+    }
+    completed += n;
+    if (completed >= count) {
+      return completed;
+    }
+  }
+
+  if (completed) {
+    return completed;
+  }
+  return PosixError(ETIMEDOUT, "Poll timed out");
+}
+
+TEST(PtyTrunc, Truncate) {
+  // Opening PTYs with O_TRUNC shouldn't cause an error, but calls to
+  // (f)truncate should.
+  FileDescriptor master =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(kMasterPath, O_RDWR | O_TRUNC));
+  int n = ASSERT_NO_ERRNO_AND_VALUE(SlaveID(master));
+  std::string spath = absl::StrCat("/dev/pts/", n);
+  FileDescriptor slave =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(spath, O_RDWR | O_NONBLOCK | O_TRUNC));
+
+  EXPECT_THAT(truncate(kMasterPath, 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(truncate(spath.c_str(), 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(ftruncate(master.get(), 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(ftruncate(slave.get(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(BasicPtyTest, StatUnopenedMaster) {
+  struct stat s;
+  ASSERT_THAT(stat(kMasterPath, &s), SyscallSucceeds());
+
+  EXPECT_EQ(s.st_rdev, makedev(TTYAUX_MAJOR, kPtmxMinor));
+  EXPECT_EQ(s.st_size, 0);
+  EXPECT_EQ(s.st_blocks, 0);
+
+  // ptmx attached to a specific devpts mount uses block size 1024. See
+  // fs/devpts/inode.c:devpts_fill_super.
+  //
+  // The global ptmx device uses the block size of the filesystem it is created
+  // on (which is usually 4096 for disk filesystems).
+  EXPECT_THAT(s.st_blksize, AnyOf(Eq(1024), Eq(4096)));
+}
+
+// Waits for count bytes to be readable from fd. Unlike poll, which can return
+// before all data is moved into a pty's read buffer, this function waits for
+// all count bytes to become readable.
+PosixErrorOr<int> WaitUntilReceived(int fd, int count) {
+  int buffered = -1;
+  absl::Duration remaining;
+  absl::Time end = absl::Now() + kTimeout;
+  while ((remaining = end - absl::Now()) > absl::ZeroDuration()) {
+    if (ioctl(fd, FIONREAD, &buffered) < 0) {
+      return PosixError(errno, "failed FIONREAD ioctl");
+    }
+    if (buffered >= count) {
+      return buffered;
+    }
+    absl::SleepFor(absl::Milliseconds(500));
+  }
+  return PosixError(
+      ETIMEDOUT,
+      absl::StrFormat(
+          "FIONREAD timed out, receiving only %d of %d expected bytes",
+          buffered, count));
+}
+
+// Verifies that there is nothing left to read from fd.
+void ExpectFinished(const FileDescriptor& fd) {
+  // Nothing more to read.
+  char c;
+  EXPECT_THAT(ReadFd(fd.get(), &c, 1), SyscallFailsWithErrno(EAGAIN));
+}
+
+// Verifies that we can read expected bytes from fd into buf.
+void ExpectReadable(const FileDescriptor& fd, int expected, char* buf) {
+  size_t n = ASSERT_NO_ERRNO_AND_VALUE(
+      PollAndReadFd(fd.get(), buf, expected, kTimeout));
+  EXPECT_EQ(expected, n);
+}
+
+TEST(BasicPtyTest, OpenMasterSlave) {
+  FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master));
+}
+
+// The slave entry in /dev/pts/ disappears when the master is closed, even if
+// the slave is still open.
+TEST(BasicPtyTest, SlaveEntryGoneAfterMasterClose) {
+  FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master));
+
+  // Get pty index.
+  int index = -1;
+  ASSERT_THAT(ioctl(master.get(), TIOCGPTN, &index), SyscallSucceeds());
+
+  std::string path = absl::StrCat("/dev/pts/", index);
+
+  struct stat st;
+  EXPECT_THAT(stat(path.c_str(), &st), SyscallSucceeds());
+
+  master.reset();
+
+  EXPECT_THAT(stat(path.c_str(), &st), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(BasicPtyTest, Getdents) {
+  FileDescriptor master1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  int index1 = -1;
+  ASSERT_THAT(ioctl(master1.get(), TIOCGPTN, &index1), SyscallSucceeds());
+  FileDescriptor slave1 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master1));
+
+  FileDescriptor master2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+  int index2 = -1;
+  ASSERT_THAT(ioctl(master2.get(), TIOCGPTN, &index2), SyscallSucceeds());
+  FileDescriptor slave2 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master2));
+
+  // The directory contains ptmx, index1, and index2. (Plus any additional PTYs
+  // unrelated to this test.)
+
+  std::vector<std::string> contents =
+      ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true));
+  EXPECT_THAT(contents, Contains(absl::StrCat(index1)));
+  EXPECT_THAT(contents, Contains(absl::StrCat(index2)));
+
+  master2.reset();
+
+  // The directory contains ptmx and index1, but not index2 since the master is
+  // closed. (Plus any additional PTYs unrelated to this test.)
+
+  contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true));
+  EXPECT_THAT(contents, Contains(absl::StrCat(index1)));
+  EXPECT_THAT(contents, Not(Contains(absl::StrCat(index2))));
+
+  // N.B. devpts supports legacy "single-instance" mode and new "multi-instance"
+  // mode. In legacy mode, devpts does not contain a "ptmx" device (the distro
+  // must use mknod to create it somewhere, presumably /dev/ptmx).
+  // Multi-instance mode does include a "ptmx" device tied to that mount.
+  //
+  // We don't check for the presence or absence of "ptmx", as distros vary in
+  // their usage of the two modes.
+}
+
+class PtyTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
+    slave_ = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master_));
+  }
+
+  void DisableCanonical() {
+    struct kernel_termios t = {};
+    EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds());
+    t.c_lflag &= ~ICANON;
+    EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  }
+
+  void EnableCanonical() {
+    struct kernel_termios t = {};
+    EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds());
+    t.c_lflag |= ICANON;
+    EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  }
+
+  // Master and slave ends of the PTY. Non-blocking.
+  FileDescriptor master_;
+  FileDescriptor slave_;
+};
+
+// Master to slave sanity test.
+TEST_F(PtyTest, WriteMasterToSlave) {
+  // N.B. by default, the slave reads nothing until the master writes a newline.
+  constexpr char kBuf[] = "hello\n";
+
+  EXPECT_THAT(WriteFd(master_.get(), kBuf, sizeof(kBuf) - 1),
+              SyscallSucceedsWithValue(sizeof(kBuf) - 1));
+
+  // Linux moves data from the master to the slave via async work scheduled via
+  // tty_flip_buffer_push. Since it is asynchronous, the data may not be
+  // available for reading immediately. Instead we must poll and assert that it
+  // becomes available "soon".
+
+  char buf[sizeof(kBuf)] = {};
+  ExpectReadable(slave_, sizeof(buf) - 1, buf);
+
+  EXPECT_EQ(memcmp(buf, kBuf, sizeof(kBuf)), 0);
+}
+
+// Slave to master sanity test.
+TEST_F(PtyTest, WriteSlaveToMaster) {
+  // N.B. by default, the master reads nothing until the slave writes a newline,
+  // and the master gets a carriage return.
+  constexpr char kInput[] = "hello\n";
+  constexpr char kExpected[] = "hello\r\n";
+
+  EXPECT_THAT(WriteFd(slave_.get(), kInput, sizeof(kInput) - 1),
+              SyscallSucceedsWithValue(sizeof(kInput) - 1));
+
+  // Linux moves data from the master to the slave via async work scheduled via
+  // tty_flip_buffer_push. Since it is asynchronous, the data may not be
+  // available for reading immediately. Instead we must poll and assert that it
+  // becomes available "soon".
+
+  char buf[sizeof(kExpected)] = {};
+  ExpectReadable(master_, sizeof(buf) - 1, buf);
+
+  EXPECT_EQ(memcmp(buf, kExpected, sizeof(kExpected)), 0);
+}
+
+TEST_F(PtyTest, WriteInvalidUTF8) {
+  char c = 0xff;
+  ASSERT_THAT(syscall(__NR_write, master_.get(), &c, sizeof(c)),
+              SyscallSucceedsWithValue(sizeof(c)));
+}
+
+// Both the master and slave report the standard default termios settings.
+//
+// Note that TCGETS on the master actually redirects to the slave (see comment
+// on MasterTermiosUnchangable).
+TEST_F(PtyTest, DefaultTermios) {
+  struct kernel_termios t = {};
+  EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds());
+  EXPECT_EQ(t, DefaultTermios());
+
+  EXPECT_THAT(ioctl(master_.get(), TCGETS, &t), SyscallSucceeds());
+  EXPECT_EQ(t, DefaultTermios());
+}
+
+// Changing termios from the master actually affects the slave.
+//
+// TCSETS on the master actually redirects to the slave (see comment on
+// MasterTermiosUnchangable).
+TEST_F(PtyTest, TermiosAffectsSlave) {
+  struct kernel_termios master_termios = {};
+  EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds());
+  master_termios.c_lflag ^= ICANON;
+  EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds());
+
+  struct kernel_termios slave_termios = {};
+  EXPECT_THAT(ioctl(slave_.get(), TCGETS, &slave_termios), SyscallSucceeds());
+  EXPECT_EQ(master_termios, slave_termios);
+}
+
+// The master end of the pty has termios:
+//
+// struct kernel_termios t = {
+//   .c_iflag = 0;
+//   .c_oflag = 0;
+//   .c_cflag = B38400 | CS8 | CREAD;
+//   .c_lflag = 0;
+//   .c_cc = /* same as DefaultTermios */
+// }
+//
+// (From drivers/tty/pty.c:unix98_pty_init)
+//
+// All termios control ioctls on the master actually redirect to the slave
+// (drivers/tty/tty_ioctl.c:tty_mode_ioctl), making it impossible to change the
+// master termios.
+//
+// Verify this by setting ICRNL (which rewrites input \r to \n) and verify that
+// it has no effect on the master.
+TEST_F(PtyTest, MasterTermiosUnchangable) {
+  struct kernel_termios master_termios = {};
+  EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds());
+  master_termios.c_lflag |= ICRNL;
+  EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds());
+
+  char c = '\r';
+  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+  ExpectReadable(master_, 1, &c);
+  EXPECT_EQ(c, '\r');  // ICRNL had no effect!
+
+  ExpectFinished(master_);
+}
+
+// ICRNL rewrites input \r to \n.
+TEST_F(PtyTest, TermiosICRNL) {
+  struct kernel_termios t = DefaultTermios();
+  t.c_iflag |= ICRNL;
+  t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
+  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+  char c = '\r';
+  ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+  ExpectReadable(slave_, 1, &c);
+  EXPECT_EQ(c, '\n');
+
+  ExpectFinished(slave_);
+}
+
+// ONLCR rewrites output \n to \r\n.
+TEST_F(PtyTest, TermiosONLCR) {
+  struct kernel_termios t = DefaultTermios();
+  t.c_oflag |= ONLCR;
+  t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
+  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+  char c = '\n';
+  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+  // Extra byte for NUL for EXPECT_STREQ.
+  char buf[3] = {};
+  ExpectReadable(master_, 2, buf);
+  EXPECT_STREQ(buf, "\r\n");
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, TermiosIGNCR) {
+  struct kernel_termios t = DefaultTermios();
+  t.c_iflag |= IGNCR;
+  t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
+  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+  char c = '\r';
+  ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+  // Nothing to read.
+  ASSERT_THAT(PollAndReadFd(slave_.get(), &c, 1, kTimeout),
+              PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+}
+
+// Test that we can successfully poll for readable data from the slave.
+TEST_F(PtyTest, TermiosPollSlave) {
+  struct kernel_termios t = DefaultTermios();
+  t.c_iflag |= IGNCR;
+  t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
+  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+  absl::Notification notify;
+  int sfd = slave_.get();
+  ScopedThread th([sfd, &notify]() {
+    notify.Notify();
+
+    // Poll on the reader fd with POLLIN event.
+    struct pollfd poll_fd = {sfd, POLLIN, 0};
+    EXPECT_THAT(
+        RetryEINTR(poll)(&poll_fd, 1, absl::ToInt64Milliseconds(kTimeout)),
+        SyscallSucceedsWithValue(1));
+
+    // Should trigger POLLIN event.
+    EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+  });
+
+  notify.WaitForNotification();
+  // Sleep ensures that poll begins waiting before we write to the FD.
+  absl::SleepFor(absl::Seconds(1));
+
+  char s[] = "foo\n";
+  ASSERT_THAT(WriteFd(master_.get(), s, strlen(s) + 1), SyscallSucceeds());
+}
+
+// Test that we can successfully poll for readable data from the master.
+TEST_F(PtyTest, TermiosPollMaster) {
+  struct kernel_termios t = DefaultTermios();
+  t.c_iflag |= IGNCR;
+  t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
+  ASSERT_THAT(ioctl(master_.get(), TCSETS, &t), SyscallSucceeds());
+
+  absl::Notification notify;
+  int mfd = master_.get();
+  ScopedThread th([mfd, &notify]() {
+    notify.Notify();
+
+    // Poll on the reader fd with POLLIN event.
+    struct pollfd poll_fd = {mfd, POLLIN, 0};
+    EXPECT_THAT(
+        RetryEINTR(poll)(&poll_fd, 1, absl::ToInt64Milliseconds(kTimeout)),
+        SyscallSucceedsWithValue(1));
+
+    // Should trigger POLLIN event.
+    EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+  });
+
+  notify.WaitForNotification();
+  // Sleep ensures that poll begins waiting before we write to the FD.
+  absl::SleepFor(absl::Seconds(1));
+
+  char s[] = "foo\n";
+  ASSERT_THAT(WriteFd(slave_.get(), s, strlen(s) + 1), SyscallSucceeds());
+}
+
+TEST_F(PtyTest, TermiosINLCR) {
+  struct kernel_termios t = DefaultTermios();
+  t.c_iflag |= INLCR;
+  t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
+  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+  char c = '\n';
+  ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+  ExpectReadable(slave_, 1, &c);
+  EXPECT_EQ(c, '\r');
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, TermiosONOCR) {
+  struct kernel_termios t = DefaultTermios();
+  t.c_oflag |= ONOCR;
+  t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
+  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+  // The terminal is at column 0, so there should be no CR to read.
+  char c = '\r';
+  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+  // Nothing to read.
+  ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout),
+              PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+
+  // This time the column is greater than 0, so we should be able to read the CR
+  // out of the other end.
+  constexpr char kInput[] = "foo\r";
+  constexpr int kInputSize = sizeof(kInput) - 1;
+  ASSERT_THAT(WriteFd(slave_.get(), kInput, kInputSize),
+              SyscallSucceedsWithValue(kInputSize));
+
+  char buf[kInputSize] = {};
+  ExpectReadable(master_, kInputSize, buf);
+
+  EXPECT_EQ(memcmp(buf, kInput, kInputSize), 0);
+
+  ExpectFinished(master_);
+
+  // Terminal should be at column 0 again, so no CR can be read.
+  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+  // Nothing to read.
+  ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout),
+              PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+}
+
+TEST_F(PtyTest, TermiosOCRNL) {
+  struct kernel_termios t = DefaultTermios();
+  t.c_oflag |= OCRNL;
+  t.c_lflag &= ~ICANON;  // for byte-by-byte reading.
+  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+  // The terminal is at column 0, so there should be no CR to read.
+  char c = '\r';
+  ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+  ExpectReadable(master_, 1, &c);
+  EXPECT_EQ(c, '\n');
+
+  ExpectFinished(master_);
+}
+
+// Tests that VEOL is disabled when we start, and that we can set it to enable
+// it.
+TEST_F(PtyTest, VEOLTermination) {
+  // Write a few bytes ending with '\0', and confirm that we can't read.
+  constexpr char kInput[] = "hello";
+  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
+              SyscallSucceedsWithValue(sizeof(kInput)));
+  char buf[sizeof(kInput)] = {};
+  ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout),
+              PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+
+  // Set the EOL character to '=' and write it.
+  constexpr char delim = '=';
+  struct kernel_termios t = DefaultTermios();
+  t.c_cc[VEOL] = delim;
+  ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+
+  // Now we can read, as sending EOL caused the line to become available.
+  ExpectReadable(slave_, sizeof(kInput), buf);
+  EXPECT_EQ(memcmp(buf, kInput, sizeof(kInput)), 0);
+
+  ExpectReadable(slave_, 1, buf);
+  EXPECT_EQ(buf[0], '=');
+
+  ExpectFinished(slave_);
+}
+
+// Tests that we can write more than the 4096 character limit, then a
+// terminating character, then read out just the first 4095 bytes plus the
+// terminator.
+TEST_F(PtyTest, CanonBigWrite) {
+  constexpr int kWriteLen = kMaxLineSize + 4;
+  char input[kWriteLen];
+  memset(input, 'M', kWriteLen - 1);
+  input[kWriteLen - 1] = '\n';
+  ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
+              SyscallSucceedsWithValue(kWriteLen));
+
+  // We can read the line.
+  char buf[kMaxLineSize] = {};
+  ExpectReadable(slave_, kMaxLineSize, buf);
+
+  ExpectFinished(slave_);
+}
+
+// Tests that data written in canonical mode can be read immediately once
+// switched to noncanonical mode.
+TEST_F(PtyTest, SwitchCanonToNoncanon) {
+  // Write a few bytes without a terminating character, switch to noncanonical
+  // mode, and read them.
+  constexpr char kInput[] = "hello";
+  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
+              SyscallSucceedsWithValue(sizeof(kInput)));
+
+  // Nothing available yet.
+  char buf[sizeof(kInput)] = {};
+  ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout),
+              PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+
+  DisableCanonical();
+
+  ExpectReadable(slave_, sizeof(kInput), buf);
+  EXPECT_STREQ(buf, kInput);
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchCanonToNonCanonNewline) {
+  // Write a few bytes with a terminating character.
+  constexpr char kInput[] = "hello\n";
+  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
+              SyscallSucceedsWithValue(sizeof(kInput)));
+
+  DisableCanonical();
+
+  // We can read the line.
+  char buf[sizeof(kInput)] = {};
+  ExpectReadable(slave_, sizeof(kInput), buf);
+  EXPECT_STREQ(buf, kInput);
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchNoncanonToCanonNewlineBig) {
+  DisableCanonical();
+
+  // Write more than the maximum line size, then write a delimiter.
+  constexpr int kWriteLen = 4100;
+  char input[kWriteLen];
+  memset(input, 'M', kWriteLen);
+  ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
+              SyscallSucceedsWithValue(kWriteLen));
+  // Wait for the input queue to fill.
+  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1));
+  constexpr char delim = '\n';
+  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+
+  EnableCanonical();
+
+  // We can read the line.
+  char buf[kMaxLineSize] = {};
+  ExpectReadable(slave_, kMaxLineSize - 1, buf);
+
+  // We can also read the remaining characters.
+  ExpectReadable(slave_, 6, buf);
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchNoncanonToCanonNoNewline) {
+  DisableCanonical();
+
+  // Write a few bytes without a terminating character.
+  // mode, and read them.
+  constexpr char kInput[] = "hello";
+  ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput) - 1),
+              SyscallSucceedsWithValue(sizeof(kInput) - 1));
+
+  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput) - 1));
+  EnableCanonical();
+
+  // We can read the line.
+  char buf[sizeof(kInput)] = {};
+  ExpectReadable(slave_, sizeof(kInput) - 1, buf);
+  EXPECT_STREQ(buf, kInput);
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchNoncanonToCanonNoNewlineBig) {
+  DisableCanonical();
+
+  // Write a few bytes without a terminating character.
+  // mode, and read them.
+  constexpr int kWriteLen = 4100;
+  char input[kWriteLen];
+  memset(input, 'M', kWriteLen);
+  ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
+              SyscallSucceedsWithValue(kWriteLen));
+
+  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1));
+  EnableCanonical();
+
+  // We can read the line.
+  char buf[kMaxLineSize] = {};
+  ExpectReadable(slave_, kMaxLineSize - 1, buf);
+
+  ExpectFinished(slave_);
+}
+
+// Tests that we can write over the 4095 noncanonical limit, then read out
+// everything.
+TEST_F(PtyTest, NoncanonBigWrite) {
+  DisableCanonical();
+
+  // Write well over the 4095 internal buffer limit.
+  constexpr char kInput = 'M';
+  constexpr int kInputSize = kMaxLineSize * 2;
+  for (int i = 0; i < kInputSize; i++) {
+    // This makes too many syscalls for save/restore.
+    const DisableSave ds;
+    ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)),
+                SyscallSucceedsWithValue(sizeof(kInput)));
+  }
+
+  // We should be able to read out everything. Sleep a bit so that Linux has a
+  // chance to move data from the master to the slave.
+  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1));
+  for (int i = 0; i < kInputSize; i++) {
+    // This makes too many syscalls for save/restore.
+    const DisableSave ds;
+    char c;
+    ExpectReadable(slave_, 1, &c);
+    ASSERT_EQ(c, kInput);
+  }
+
+  ExpectFinished(slave_);
+}
+
+// ICANON doesn't make input available until a line delimiter is typed.
+//
+// Test newline.
+TEST_F(PtyTest, TermiosICANONNewline) {
+  char input[3] = {'a', 'b', 'c'};
+  ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)),
+              SyscallSucceedsWithValue(sizeof(input)));
+
+  // Extra bytes for newline (written later) and NUL for EXPECT_STREQ.
+  char buf[5] = {};
+
+  // Nothing available yet.
+  ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout),
+              PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+
+  char delim = '\n';
+  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+
+  // Now it is available.
+  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(input) + 1));
+  ExpectReadable(slave_, sizeof(input) + 1, buf);
+  EXPECT_STREQ(buf, "abc\n");
+
+  ExpectFinished(slave_);
+}
+
+// ICANON doesn't make input available until a line delimiter is typed.
+//
+// Test EOF (^D).
+TEST_F(PtyTest, TermiosICANONEOF) {
+  char input[3] = {'a', 'b', 'c'};
+  ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)),
+              SyscallSucceedsWithValue(sizeof(input)));
+
+  // Extra byte for NUL for EXPECT_STREQ.
+  char buf[4] = {};
+
+  // Nothing available yet.
+  ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout),
+              PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+  char delim = ControlCharacter('D');
+  ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+
+  // Now it is available. Note that ^D is not included.
+  ExpectReadable(slave_, sizeof(input), buf);
+  EXPECT_STREQ(buf, "abc");
+
+  ExpectFinished(slave_);
+}
+
+// ICANON limits us to 4096 bytes including a terminating character. Anything
+// after and 4095th character is discarded (although still processed for
+// signals and echoing).
+TEST_F(PtyTest, CanonDiscard) {
+  constexpr char kInput = 'M';
+  constexpr int kInputSize = 4100;
+  constexpr int kIter = 3;
+
+  // A few times write more than the 4096 character maximum, then a newline.
+  constexpr char delim = '\n';
+  for (int i = 0; i < kIter; i++) {
+    // This makes too many syscalls for save/restore.
+    const DisableSave ds;
+    for (int i = 0; i < kInputSize; i++) {
+      ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)),
+                  SyscallSucceedsWithValue(sizeof(kInput)));
+    }
+    ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+  }
+
+  // There should be multiple truncated lines available to read.
+  for (int i = 0; i < kIter; i++) {
+    char buf[kInputSize] = {};
+    ExpectReadable(slave_, kMaxLineSize, buf);
+    EXPECT_EQ(buf[kMaxLineSize - 1], delim);
+    EXPECT_EQ(buf[kMaxLineSize - 2], kInput);
+  }
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, CanonMultiline) {
+  constexpr char kInput1[] = "GO\n";
+  constexpr char kInput2[] = "BLUE\n";
+
+  // Write both lines.
+  ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
+              SyscallSucceedsWithValue(sizeof(kInput1) - 1));
+  ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1),
+              SyscallSucceedsWithValue(sizeof(kInput2) - 1));
+
+  // Get the first line.
+  char line1[8] = {};
+  ExpectReadable(slave_, sizeof(kInput1) - 1, line1);
+  EXPECT_STREQ(line1, kInput1);
+
+  // Get the second line.
+  char line2[8] = {};
+  ExpectReadable(slave_, sizeof(kInput2) - 1, line2);
+  EXPECT_STREQ(line2, kInput2);
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchNoncanonToCanonMultiline) {
+  DisableCanonical();
+
+  constexpr char kInput1[] = "GO\n";
+  constexpr char kInput2[] = "BLUE\n";
+  constexpr char kExpected[] = "GO\nBLUE\n";
+
+  // Write both lines.
+  ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
+              SyscallSucceedsWithValue(sizeof(kInput1) - 1));
+  ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1),
+              SyscallSucceedsWithValue(sizeof(kInput2) - 1));
+
+  ASSERT_NO_ERRNO(
+      WaitUntilReceived(slave_.get(), sizeof(kInput1) + sizeof(kInput2) - 2));
+  EnableCanonical();
+
+  // Get all together as one line.
+  char line[9] = {};
+  ExpectReadable(slave_, 8, line);
+  EXPECT_STREQ(line, kExpected);
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchTwiceMultiline) {
+  std::string kInputs[] = {"GO\n", "BLUE\n", "!"};
+  std::string kExpected = "GO\nBLUE\n!";
+
+  // Write each line.
+  for (const std::string& input : kInputs) {
+    ASSERT_THAT(WriteFd(master_.get(), input.c_str(), input.size()),
+                SyscallSucceedsWithValue(input.size()));
+  }
+
+  DisableCanonical();
+  // All written characters have to make it into the input queue before
+  // canonical mode is re-enabled. If the final '!' character hasn't been
+  // enqueued before canonical mode is re-enabled, it won't be readable.
+  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kExpected.size()));
+  EnableCanonical();
+
+  // Get all together as one line.
+  char line[10] = {};
+  ExpectReadable(slave_, 9, line);
+  EXPECT_STREQ(line, kExpected.c_str());
+
+  ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, QueueSize) {
+  // Write the line.
+  constexpr char kInput1[] = "GO\n";
+  ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
+              SyscallSucceedsWithValue(sizeof(kInput1) - 1));
+  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1));
+
+  // Ensure that writing more (beyond what is readable) does not impact the
+  // readable size.
+  char input[kMaxLineSize];
+  memset(input, 'M', kMaxLineSize);
+  ASSERT_THAT(WriteFd(master_.get(), input, kMaxLineSize),
+              SyscallSucceedsWithValue(kMaxLineSize));
+  int inputBufSize = ASSERT_NO_ERRNO_AND_VALUE(
+      WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1));
+  EXPECT_EQ(inputBufSize, sizeof(kInput1) - 1);
+}
+
+TEST_F(PtyTest, PartialBadBuffer) {
+  // Allocate 2 pages.
+  void* addr = mmap(nullptr, 2 * kPageSize, PROT_READ | PROT_WRITE,
+                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  ASSERT_NE(addr, MAP_FAILED);
+  char* buf = reinterpret_cast<char*>(addr);
+
+  // Guard the 2nd page for our read to run into.
+  ASSERT_THAT(
+      mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, PROT_NONE),
+      SyscallSucceeds());
+
+  // Leave only one free byte in the buffer.
+  char* bad_buffer = buf + kPageSize - 1;
+
+  // Write to the master.
+  constexpr char kBuf[] = "hello\n";
+  constexpr size_t size = sizeof(kBuf) - 1;
+  EXPECT_THAT(WriteFd(master_.get(), kBuf, size),
+              SyscallSucceedsWithValue(size));
+
+  // Read from the slave into bad_buffer.
+  ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), size));
+  EXPECT_THAT(ReadFd(slave_.get(), bad_buffer, size),
+              SyscallFailsWithErrno(EFAULT));
+
+  EXPECT_THAT(munmap(addr, 2 * kPageSize), SyscallSucceeds()) << addr;
+}
+
+TEST_F(PtyTest, SimpleEcho) {
+  constexpr char kInput[] = "Mr. Eko";
+  EXPECT_THAT(WriteFd(master_.get(), kInput, strlen(kInput)),
+              SyscallSucceedsWithValue(strlen(kInput)));
+
+  char buf[100] = {};
+  ExpectReadable(master_, strlen(kInput), buf);
+
+  EXPECT_STREQ(buf, kInput);
+  ExpectFinished(master_);
+}
+
+TEST_F(PtyTest, GetWindowSize) {
+  struct winsize ws;
+  ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &ws), SyscallSucceeds());
+  EXPECT_EQ(ws.ws_row, 0);
+  EXPECT_EQ(ws.ws_col, 0);
+}
+
+TEST_F(PtyTest, SetSlaveWindowSize) {
+  constexpr uint16_t kRows = 343;
+  constexpr uint16_t kCols = 2401;
+  struct winsize ws = {.ws_row = kRows, .ws_col = kCols};
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
+
+  struct winsize retrieved_ws = {};
+  ASSERT_THAT(ioctl(master_.get(), TIOCGWINSZ, &retrieved_ws),
+              SyscallSucceeds());
+  EXPECT_EQ(retrieved_ws.ws_row, kRows);
+  EXPECT_EQ(retrieved_ws.ws_col, kCols);
+}
+
+TEST_F(PtyTest, SetMasterWindowSize) {
+  constexpr uint16_t kRows = 343;
+  constexpr uint16_t kCols = 2401;
+  struct winsize ws = {.ws_row = kRows, .ws_col = kCols};
+  ASSERT_THAT(ioctl(master_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
+
+  struct winsize retrieved_ws = {};
+  ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &retrieved_ws),
+              SyscallSucceeds());
+  EXPECT_EQ(retrieved_ws.ws_row, kRows);
+  EXPECT_EQ(retrieved_ws.ws_col, kCols);
+}
+
+class JobControlTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
+    slave_ = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master_));
+
+    // Make this a session leader, which also drops the controlling terminal.
+    // In the gVisor test environment, this test will be run as the session
+    // leader already (as the sentry init process).
+    if (!IsRunningOnGvisor()) {
+      ASSERT_THAT(setsid(), SyscallSucceeds());
+    }
+  }
+
+  // Master and slave ends of the PTY. Non-blocking.
+  FileDescriptor master_;
+  FileDescriptor slave_;
+};
+
+TEST_F(JobControlTest, SetTTYMaster) {
+  ASSERT_THAT(ioctl(master_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+}
+
+TEST_F(JobControlTest, SetTTY) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+}
+
+TEST_F(JobControlTest, SetTTYNonLeader) {
+  // Fork a process that won't be the session leader.
+  pid_t child = fork();
+  if (!child) {
+    // We shouldn't be able to set the terminal.
+    TEST_PCHECK(ioctl(slave_.get(), TIOCSCTTY, 0));
+    _exit(0);
+  }
+
+  int wstatus;
+  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
+  ASSERT_EQ(wstatus, 0);
+}
+
+TEST_F(JobControlTest, SetTTYBadArg) {
+  // Despite the man page saying arg should be 0 here, Linux doesn't actually
+  // check.
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 1), SyscallSucceeds());
+}
+
+TEST_F(JobControlTest, SetTTYDifferentSession) {
+  SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  // Fork, join a new session, and try to steal the parent's controlling
+  // terminal, which should fail.
+  pid_t child = fork();
+  if (!child) {
+    TEST_PCHECK(setsid() >= 0);
+    // We shouldn't be able to steal the terminal.
+    TEST_PCHECK(ioctl(slave_.get(), TIOCSCTTY, 1));
+    _exit(0);
+  }
+
+  int wstatus;
+  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
+  ASSERT_EQ(wstatus, 0);
+}
+
+TEST_F(JobControlTest, ReleaseTTY) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  // Make sure we're ignoring SIGHUP, which will be sent to this process once we
+  // disconnect they TTY.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_IGN;
+  sa.sa_flags = 0;
+  sigemptyset(&sa.sa_mask);
+  struct sigaction old_sa;
+  EXPECT_THAT(sigaction(SIGHUP, &sa, &old_sa), SyscallSucceeds());
+  EXPECT_THAT(ioctl(slave_.get(), TIOCNOTTY), SyscallSucceeds());
+  EXPECT_THAT(sigaction(SIGHUP, &old_sa, NULL), SyscallSucceeds());
+}
+
+TEST_F(JobControlTest, ReleaseUnsetTTY) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCNOTTY), SyscallFailsWithErrno(ENOTTY));
+}
+
+TEST_F(JobControlTest, ReleaseWrongTTY) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  ASSERT_THAT(ioctl(master_.get(), TIOCNOTTY), SyscallFailsWithErrno(ENOTTY));
+}
+
+TEST_F(JobControlTest, ReleaseTTYNonLeader) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  pid_t child = fork();
+  if (!child) {
+    TEST_PCHECK(!ioctl(slave_.get(), TIOCNOTTY));
+    _exit(0);
+  }
+
+  int wstatus;
+  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
+  ASSERT_EQ(wstatus, 0);
+}
+
+TEST_F(JobControlTest, ReleaseTTYDifferentSession) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  pid_t child = fork();
+  if (!child) {
+    // Join a new session, then try to disconnect.
+    TEST_PCHECK(setsid() >= 0);
+    TEST_PCHECK(ioctl(slave_.get(), TIOCNOTTY));
+    _exit(0);
+  }
+
+  int wstatus;
+  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
+  ASSERT_EQ(wstatus, 0);
+}
+
+// Used by the child process spawned in ReleaseTTYSignals to track received
+// signals.
+static int received;
+
+void sig_handler(int signum) { received |= signum; }
+
+// When the session leader releases its controlling terminal, the foreground
+// process group gets SIGHUP, then SIGCONT. This test:
+// - Spawns 2 threads
+// - Has thread 1 return 0 if it gets both SIGHUP and SIGCONT
+// - Has thread 2 leave the foreground process group, and return non-zero if it
+//   receives any signals.
+// - Has the parent thread release its controlling terminal
+// - Checks that thread 1 got both signals
+// - Checks that thread 2 didn't get any signals.
+TEST_F(JobControlTest, ReleaseTTYSignals) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  received = 0;
+  struct sigaction sa = {};
+  sa.sa_handler = sig_handler;
+  sa.sa_flags = 0;
+  sigemptyset(&sa.sa_mask);
+  sigaddset(&sa.sa_mask, SIGHUP);
+  sigaddset(&sa.sa_mask, SIGCONT);
+  sigprocmask(SIG_BLOCK, &sa.sa_mask, NULL);
+
+  pid_t same_pgrp_child = fork();
+  if (!same_pgrp_child) {
+    // The child will wait for SIGHUP and SIGCONT, then return 0. It begins with
+    // SIGHUP and SIGCONT blocked. We install signal handlers for those signals,
+    // then use sigsuspend to wait for those specific signals.
+    TEST_PCHECK(!sigaction(SIGHUP, &sa, NULL));
+    TEST_PCHECK(!sigaction(SIGCONT, &sa, NULL));
+    sigset_t mask;
+    sigfillset(&mask);
+    sigdelset(&mask, SIGHUP);
+    sigdelset(&mask, SIGCONT);
+    while (received != (SIGHUP | SIGCONT)) {
+      sigsuspend(&mask);
+    }
+    _exit(0);
+  }
+
+  // We don't want to block these anymore.
+  sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL);
+
+  // This child will return non-zero if either SIGHUP or SIGCONT are received.
+  pid_t diff_pgrp_child = fork();
+  if (!diff_pgrp_child) {
+    TEST_PCHECK(!setpgid(0, 0));
+    TEST_PCHECK(pause());
+    _exit(1);
+  }
+
+  EXPECT_THAT(setpgid(diff_pgrp_child, diff_pgrp_child), SyscallSucceeds());
+
+  // Make sure we're ignoring SIGHUP, which will be sent to this process once we
+  // disconnect they TTY.
+  struct sigaction sighup_sa = {};
+  sighup_sa.sa_handler = SIG_IGN;
+  sighup_sa.sa_flags = 0;
+  sigemptyset(&sighup_sa.sa_mask);
+  struct sigaction old_sa;
+  EXPECT_THAT(sigaction(SIGHUP, &sighup_sa, &old_sa), SyscallSucceeds());
+
+  // Release the controlling terminal, sending SIGHUP and SIGCONT to all other
+  // processes in this process group.
+  EXPECT_THAT(ioctl(slave_.get(), TIOCNOTTY), SyscallSucceeds());
+
+  EXPECT_THAT(sigaction(SIGHUP, &old_sa, NULL), SyscallSucceeds());
+
+  // The child in the same process group will get signaled.
+  int wstatus;
+  EXPECT_THAT(waitpid(same_pgrp_child, &wstatus, 0),
+              SyscallSucceedsWithValue(same_pgrp_child));
+  EXPECT_EQ(wstatus, 0);
+
+  // The other child will not get signaled.
+  EXPECT_THAT(waitpid(diff_pgrp_child, &wstatus, WNOHANG),
+              SyscallSucceedsWithValue(0));
+  EXPECT_THAT(kill(diff_pgrp_child, SIGKILL), SyscallSucceeds());
+}
+
+TEST_F(JobControlTest, GetForegroundProcessGroup) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+  pid_t foreground_pgid;
+  pid_t pid;
+  ASSERT_THAT(ioctl(slave_.get(), TIOCGPGRP, &foreground_pgid),
+              SyscallSucceeds());
+  ASSERT_THAT(pid = getpid(), SyscallSucceeds());
+
+  ASSERT_EQ(foreground_pgid, pid);
+}
+
+TEST_F(JobControlTest, GetForegroundProcessGroupNonControlling) {
+  // At this point there's no controlling terminal, so TIOCGPGRP should fail.
+  pid_t foreground_pgid;
+  ASSERT_THAT(ioctl(slave_.get(), TIOCGPGRP, &foreground_pgid),
+              SyscallFailsWithErrno(ENOTTY));
+}
+
+// This test:
+// - sets itself as the foreground process group
+// - creates a child process in a new process group
+// - sets that child as the foreground process group
+// - kills its child and sets itself as the foreground process group.
+TEST_F(JobControlTest, SetForegroundProcessGroup) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  // Ignore SIGTTOU so that we don't stop ourself when calling tcsetpgrp.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_IGN;
+  sa.sa_flags = 0;
+  sigemptyset(&sa.sa_mask);
+  sigaction(SIGTTOU, &sa, NULL);
+
+  // Set ourself as the foreground process group.
+  ASSERT_THAT(tcsetpgrp(slave_.get(), getpgid(0)), SyscallSucceeds());
+
+  // Create a new process that just waits to be signaled.
+  pid_t child = fork();
+  if (!child) {
+    TEST_PCHECK(!pause());
+    // We should never reach this.
+    _exit(1);
+  }
+
+  // Make the child its own process group, then make it the controlling process
+  // group of the terminal.
+  ASSERT_THAT(setpgid(child, child), SyscallSucceeds());
+  ASSERT_THAT(tcsetpgrp(slave_.get(), child), SyscallSucceeds());
+
+  // Sanity check - we're still the controlling session.
+  ASSERT_EQ(getsid(0), getsid(child));
+
+  // Signal the child, wait for it to exit, then retake the terminal.
+  ASSERT_THAT(kill(child, SIGTERM), SyscallSucceeds());
+  int wstatus;
+  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
+  ASSERT_TRUE(WIFSIGNALED(wstatus));
+  ASSERT_EQ(WTERMSIG(wstatus), SIGTERM);
+
+  // Set ourself as the foreground process.
+  pid_t pgid;
+  ASSERT_THAT(pgid = getpgid(0), SyscallSucceeds());
+  ASSERT_THAT(tcsetpgrp(slave_.get(), pgid), SyscallSucceeds());
+}
+
+TEST_F(JobControlTest, SetForegroundProcessGroupWrongTTY) {
+  pid_t pid = getpid();
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSPGRP, &pid),
+              SyscallFailsWithErrno(ENOTTY));
+}
+
+TEST_F(JobControlTest, SetForegroundProcessGroupNegPgid) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  pid_t pid = -1;
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSPGRP, &pid),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(JobControlTest, SetForegroundProcessGroupEmptyProcessGroup) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  // Create a new process, put it in a new process group, make that group the
+  // foreground process group, then have the process wait.
+  pid_t child = fork();
+  if (!child) {
+    TEST_PCHECK(!setpgid(0, 0));
+    _exit(0);
+  }
+
+  // Wait for the child to exit.
+  int wstatus;
+  EXPECT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
+  // The child's process group doesn't exist anymore - this should fail.
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSPGRP, &child),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+TEST_F(JobControlTest, SetForegroundProcessGroupDifferentSession) {
+  ASSERT_THAT(ioctl(slave_.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  int sync_setsid[2];
+  int sync_exit[2];
+  ASSERT_THAT(pipe(sync_setsid), SyscallSucceeds());
+  ASSERT_THAT(pipe(sync_exit), SyscallSucceeds());
+
+  // Create a new process and put it in a new session.
+  pid_t child = fork();
+  if (!child) {
+    TEST_PCHECK(setsid() >= 0);
+    // Tell the parent we're in a new session.
+    char c = 'c';
+    TEST_PCHECK(WriteFd(sync_setsid[1], &c, 1) == 1);
+    TEST_PCHECK(ReadFd(sync_exit[0], &c, 1) == 1);
+    _exit(0);
+  }
+
+  // Wait for the child to tell us it's in a new session.
+  char c = 'c';
+  ASSERT_THAT(ReadFd(sync_setsid[0], &c, 1), SyscallSucceedsWithValue(1));
+
+  // Child is in a new session, so we can't make it the foregroup process group.
+  EXPECT_THAT(ioctl(slave_.get(), TIOCSPGRP, &child),
+              SyscallFailsWithErrno(EPERM));
+
+  EXPECT_THAT(WriteFd(sync_exit[1], &c, 1), SyscallSucceedsWithValue(1));
+
+  int wstatus;
+  EXPECT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
+  EXPECT_TRUE(WIFEXITED(wstatus));
+  EXPECT_EQ(WEXITSTATUS(wstatus), 0);
+}
+
+// Verify that we don't hang when creating a new session from an orphaned
+// process group (b/139968068). Calling setsid() creates an orphaned process
+// group, as process groups that contain the session's leading process are
+// orphans.
+//
+// We create 2 sessions in this test. The init process in gVisor is considered
+// not to be an orphan (see sessions.go), so we have to create a session from
+// which to create a session. The latter session is being created from an
+// orphaned process group.
+TEST_F(JobControlTest, OrphanRegression) {
+  pid_t session_2_leader = fork();
+  if (!session_2_leader) {
+    TEST_PCHECK(setsid() >= 0);
+
+    pid_t session_3_leader = fork();
+    if (!session_3_leader) {
+      TEST_PCHECK(setsid() >= 0);
+
+      _exit(0);
+    }
+
+    int wstatus;
+    TEST_PCHECK(waitpid(session_3_leader, &wstatus, 0) == session_3_leader);
+    TEST_PCHECK(wstatus == 0);
+
+    _exit(0);
+  }
+
+  int wstatus;
+  ASSERT_THAT(waitpid(session_2_leader, &wstatus, 0),
+              SyscallSucceedsWithValue(session_2_leader));
+  ASSERT_EQ(wstatus, 0);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/pty_root.cc b/test/syscalls/linux/pty_root.cc
new file mode 100644
index 000000000..1d7dbefdb
--- /dev/null
+++ b/test/syscalls/linux/pty_root.cc
@@ -0,0 +1,78 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/ioctl.h>
+#include <termios.h>
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/pty_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// StealTTY tests whether privileged processes can steal controlling terminals.
+// If the stealing process has CAP_SYS_ADMIN in the root user namespace, the
+// test ensures that stealing works. If it has non-root CAP_SYS_ADMIN, it
+// ensures stealing fails.
+TEST(JobControlRootTest, StealTTY) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  bool true_root = true;
+  if (!IsRunningOnGvisor()) {
+    // If running in Linux, we may only have CAP_SYS_ADMIN in a non-root user
+    // namespace (i.e. we are not truly root). We use init_module as a proxy for
+    // whether we are true root, as it returns EPERM immediately.
+    ASSERT_THAT(syscall(SYS_init_module, nullptr, 0, nullptr), SyscallFails());
+    true_root = errno != EPERM;
+
+    // Make this a session leader, which also drops the controlling terminal.
+    // In the gVisor test environment, this test will be run as the session
+    // leader already (as the sentry init process).
+    ASSERT_THAT(setsid(), SyscallSucceeds());
+  }
+
+  FileDescriptor master =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
+  FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master));
+
+  // Make slave the controlling terminal.
+  ASSERT_THAT(ioctl(slave.get(), TIOCSCTTY, 0), SyscallSucceeds());
+
+  // Fork, join a new session, and try to steal the parent's controlling
+  // terminal, which should succeed when we have CAP_SYS_ADMIN and pass an arg
+  // of 1.
+  pid_t child = fork();
+  if (!child) {
+    ASSERT_THAT(setsid(), SyscallSucceeds());
+    // We shouldn't be able to steal the terminal with the wrong arg value.
+    TEST_PCHECK(ioctl(slave.get(), TIOCSCTTY, 0));
+    // We should be able to steal it if we are true root.
+    TEST_PCHECK(true_root == !ioctl(slave.get(), TIOCSCTTY, 1));
+    _exit(0);
+  }
+
+  int wstatus;
+  ASSERT_THAT(waitpid(child, &wstatus, 0), SyscallSucceedsWithValue(child));
+  ASSERT_EQ(wstatus, 0);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/pwrite64.cc b/test/syscalls/linux/pwrite64.cc
new file mode 100644
index 000000000..e69794910
--- /dev/null
+++ b/test/syscalls/linux/pwrite64.cc
@@ -0,0 +1,83 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// TODO(gvisor.dev/issue/2370): This test is currently very rudimentary.
+class Pwrite64 : public ::testing::Test {
+  void SetUp() override {
+    name_ = NewTempAbsPath();
+    int fd;
+    ASSERT_THAT(fd = open(name_.c_str(), O_CREAT, 0644), SyscallSucceeds());
+    EXPECT_THAT(close(fd), SyscallSucceeds());
+  }
+
+  void TearDown() override { unlink(name_.c_str()); }
+
+ public:
+  std::string name_;
+};
+
+TEST_F(Pwrite64, AppendOnly) {
+  int fd;
+  ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds());
+  constexpr int64_t kBufSize = 1024;
+  std::vector<char> buf(kBufSize);
+  std::fill(buf.begin(), buf.end(), 'a');
+  EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), 0),
+              SyscallSucceedsWithValue(buf.size()));
+  EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(Pwrite64, InvalidArgs) {
+  int fd;
+  ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds());
+  constexpr int64_t kBufSize = 1024;
+  std::vector<char> buf(kBufSize);
+  std::fill(buf.begin(), buf.end(), 'a');
+  EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), -1),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(Pwrite64, Overflow) {
+  int fd;
+  ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds());
+  constexpr int64_t kBufSize = 1024;
+  std::vector<char> buf(kBufSize);
+  std::fill(buf.begin(), buf.end(), 'a');
+  EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), 0x7fffffffffffffffull),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/pwritev2.cc b/test/syscalls/linux/pwritev2.cc
new file mode 100644
index 000000000..63b686c62
--- /dev/null
+++ b/test/syscalls/linux/pwritev2.cc
@@ -0,0 +1,307 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#ifndef SYS_pwritev2
+#if defined(__x86_64__)
+#define SYS_pwritev2 328
+#elif defined(__aarch64__)
+#define SYS_pwritev2 287
+#else
+#error "Unknown architecture"
+#endif
+#endif  // SYS_pwrite2
+
+#ifndef RWF_HIPRI
+#define RWF_HIPRI 0x1
+#endif  // RWF_HIPRI
+
+#ifndef RWF_DSYNC
+#define RWF_DSYNC 0x2
+#endif  // RWF_DSYNC
+
+#ifndef RWF_SYNC
+#define RWF_SYNC 0x4
+#endif  // RWF_SYNC
+
+constexpr int kBufSize = 1024;
+
+void SetContent(std::vector<char>& content) {
+  for (uint i = 0; i < content.size(); i++) {
+    content[i] = static_cast<char>((i % 10) + '0');
+  }
+}
+
+ssize_t pwritev2(unsigned long fd, const struct iovec* iov,
+                 unsigned long iovcnt, off_t offset, unsigned long flags) {
+  // syscall on pwritev2 does some weird things (see man syscall and search
+  // pwritev2), so we insert a 0 to word align the flags argument on native.
+  return syscall(SYS_pwritev2, fd, iov, iovcnt, offset, 0, flags);
+}
+
+// This test is the base case where we call pwritev (no offset, no flags).
+TEST(Writev2Test, BaseCall) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  std::vector<char> content(kBufSize);
+  SetContent(content);
+  struct iovec iov[2];
+  iov[0].iov_base = content.data();
+  iov[0].iov_len = content.size() / 2;
+  iov[1].iov_base = static_cast<char*>(iov[0].iov_base) + (content.size() / 2);
+  iov[1].iov_len = content.size() / 2;
+
+  ASSERT_THAT(pwritev2(fd.get(), iov, /*iovcnt=*/2,
+                       /*offset=*/0, /*flags=*/0),
+              SyscallSucceedsWithValue(kBufSize));
+
+  std::vector<char> buf(kBufSize);
+  EXPECT_THAT(read(fd.get(), buf.data(), kBufSize),
+              SyscallSucceedsWithValue(kBufSize));
+
+  EXPECT_EQ(content, buf);
+}
+
+// This test is where we call pwritev2 with a positive offset and no flags.
+TEST(Pwritev2Test, ValidPositiveOffset) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  std::string prefix(kBufSize, '0');
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), prefix, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  std::vector<char> content(kBufSize);
+  SetContent(content);
+  struct iovec iov;
+  iov.iov_base = content.data();
+  iov.iov_len = content.size();
+
+  ASSERT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1,
+                       /*offset=*/prefix.size(), /*flags=*/0),
+              SyscallSucceedsWithValue(content.size()));
+
+  std::vector<char> buf(prefix.size() + content.size());
+  EXPECT_THAT(read(fd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  std::vector<char> want(prefix.begin(), prefix.end());
+  want.insert(want.end(), content.begin(), content.end());
+  EXPECT_EQ(want, buf);
+}
+
+// This test is the base case where we call writev by using -1 as the offset.
+// The write should use the file offset, so the test increments the file offset
+// prior to call pwritev2.
+TEST(Pwritev2Test, NegativeOneOffset) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  const std::string prefix = "00";
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), prefix.data(), TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+  ASSERT_THAT(lseek(fd.get(), prefix.size(), SEEK_SET),
+              SyscallSucceedsWithValue(prefix.size()));
+
+  std::vector<char> content(kBufSize);
+  SetContent(content);
+  struct iovec iov;
+  iov.iov_base = content.data();
+  iov.iov_len = content.size();
+
+  ASSERT_THAT(pwritev2(fd.get(), &iov, /*iovcnt*/ 1,
+                       /*offset=*/static_cast<off_t>(-1), /*flags=*/0),
+              SyscallSucceedsWithValue(content.size()));
+
+  ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR),
+              SyscallSucceedsWithValue(prefix.size() + content.size()));
+
+  std::vector<char> buf(prefix.size() + content.size());
+  EXPECT_THAT(pread(fd.get(), buf.data(), buf.size(), /*offset=*/0),
+              SyscallSucceedsWithValue(buf.size()));
+
+  std::vector<char> want(prefix.begin(), prefix.end());
+  want.insert(want.end(), content.begin(), content.end());
+  EXPECT_EQ(want, buf);
+}
+
+// pwritev2 requires if the RWF_HIPRI flag is passed, the fd must be opened with
+// O_DIRECT. This test implements a correct call with the RWF_HIPRI flag.
+TEST(Pwritev2Test, CallWithRWF_HIPRI) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  std::vector<char> content(kBufSize);
+  SetContent(content);
+  struct iovec iov;
+  iov.iov_base = content.data();
+  iov.iov_len = content.size();
+
+  EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1,
+                       /*offset=*/0, /*flags=*/RWF_HIPRI),
+              SyscallSucceedsWithValue(kBufSize));
+
+  std::vector<char> buf(content.size());
+  EXPECT_THAT(read(fd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  EXPECT_EQ(buf, content);
+}
+
+// This test calls pwritev2 with a bad file descriptor.
+TEST(Writev2Test, BadFile) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+  ASSERT_THAT(pwritev2(/*fd=*/-1, /*iov=*/nullptr, /*iovcnt=*/0,
+                       /*offset=*/0, /*flags=*/0),
+              SyscallFailsWithErrno(EBADF));
+}
+
+// This test calls pwrite2 with an invalid offset.
+TEST(Pwritev2Test, InvalidOffset) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  char buf[16];
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+
+  EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1,
+                       /*offset=*/static_cast<off_t>(-8), /*flags=*/0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Pwritev2Test, UnseekableFileValid) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  int pipe_fds[2];
+
+  ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+  std::vector<char> content(32, '0');
+  SetContent(content);
+  struct iovec iov;
+  iov.iov_base = content.data();
+  iov.iov_len = content.size();
+
+  EXPECT_THAT(pwritev2(pipe_fds[1], &iov, /*iovcnt=*/1,
+                       /*offset=*/static_cast<off_t>(-1), /*flags=*/0),
+              SyscallSucceedsWithValue(content.size()));
+
+  std::vector<char> buf(content.size());
+  EXPECT_THAT(read(pipe_fds[0], buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  EXPECT_EQ(content, buf);
+
+  EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds());
+  EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+}
+
+// Calling pwritev2 with a non-negative offset calls pwritev.  Calling pwritev
+// with an unseekable file is not allowed. A pipe is used for an unseekable
+// file.
+TEST(Pwritev2Test, UnseekableFileInvalid) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  int pipe_fds[2];
+  char buf[16];
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+
+  ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+  EXPECT_THAT(pwritev2(pipe_fds[1], &iov, /*iovcnt=*/1,
+                       /*offset=*/2, /*flags=*/0),
+              SyscallFailsWithErrno(ESPIPE));
+
+  EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds());
+  EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+}
+
+TEST(Pwritev2Test, ReadOnlyFile) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  char buf[16];
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+
+  EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1,
+                       /*offset=*/0, /*flags=*/0),
+              SyscallFailsWithErrno(EBADF));
+}
+
+// This test calls pwritev2 with an invalid flag.
+TEST(Pwritev2Test, InvalidFlag) {
+  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
+
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR | O_DIRECT));
+
+  char buf[16];
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+
+  EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1,
+                       /*offset=*/0, /*flags=*/0xF0),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc
new file mode 100644
index 000000000..05c4ed03f
--- /dev/null
+++ b/test/syscalls/linux/raw_socket.cc
@@ -0,0 +1,819 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/capability.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/ip_icmp.h>
+#include <poll.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <algorithm>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+// Note: in order to run these tests, /proc/sys/net/ipv4/ping_group_range will
+// need to be configured to let the superuser create ping sockets (see icmp(7)).
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Fixture for tests parameterized by protocol.
+class RawSocketTest : public ::testing::TestWithParam<std::tuple<int, int>> {
+ protected:
+  // Creates a socket to be used in tests.
+  void SetUp() override;
+
+  // Closes the socket created by SetUp().
+  void TearDown() override;
+
+  // Sends buf via s_.
+  void SendBuf(const char* buf, int buf_len);
+
+  // Reads from s_ into recv_buf.
+  void ReceiveBuf(char* recv_buf, size_t recv_buf_len);
+
+  void ReceiveBufFrom(int sock, char* recv_buf, size_t recv_buf_len);
+
+  int Protocol() { return std::get<0>(GetParam()); }
+
+  int Family() { return std::get<1>(GetParam()); }
+
+  socklen_t AddrLen() {
+    if (Family() == AF_INET) {
+      return sizeof(sockaddr_in);
+    }
+    return sizeof(sockaddr_in6);
+  }
+
+  int HdrLen() {
+    if (Family() == AF_INET) {
+      return sizeof(struct iphdr);
+    }
+    // IPv6 raw sockets don't include the header.
+    return 0;
+  }
+
+  // The socket used for both reading and writing.
+  int s_;
+
+  // The loopback address.
+  struct sockaddr_storage addr_;
+};
+
+void RawSocketTest::SetUp() {
+  if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    ASSERT_THAT(socket(Family(), SOCK_RAW, Protocol()),
+                SyscallFailsWithErrno(EPERM));
+    GTEST_SKIP();
+  }
+
+  ASSERT_THAT(s_ = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
+
+  addr_ = {};
+
+  // We don't set ports because raw sockets don't have a notion of ports.
+  if (Family() == AF_INET) {
+    struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr_);
+    sin->sin_family = AF_INET;
+    sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  } else {
+    struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr_);
+    sin6->sin6_family = AF_INET6;
+    sin6->sin6_addr = in6addr_loopback;
+  }
+}
+
+void RawSocketTest::TearDown() {
+  // TearDown will be run even if we skip the test.
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    EXPECT_THAT(close(s_), SyscallSucceeds());
+  }
+}
+
+// We should be able to create multiple raw sockets for the same protocol.
+// BasicRawSocket::Setup creates the first one, so we only have to create one
+// more here.
+TEST_P(RawSocketTest, MultipleCreation) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int s2;
+  ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
+
+  ASSERT_THAT(close(s2), SyscallSucceeds());
+}
+
+// Test that shutting down an unconnected socket fails.
+TEST_P(RawSocketTest, FailShutdownWithoutConnect) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
+  ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
+}
+
+// Shutdown is a no-op for raw sockets (and datagram sockets in general).
+TEST_P(RawSocketTest, ShutdownWriteNoop) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+  ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds());
+
+  // Arbitrary.
+  constexpr char kBuf[] = "noop";
+  ASSERT_THAT(RetryEINTR(write)(s_, kBuf, sizeof(kBuf)),
+              SyscallSucceedsWithValue(sizeof(kBuf)));
+}
+
+// Shutdown is a no-op for raw sockets (and datagram sockets in general).
+TEST_P(RawSocketTest, ShutdownReadNoop) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+  ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds());
+
+  // Arbitrary.
+  constexpr char kBuf[] = "gdg";
+  ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
+
+  std::vector<char> c(sizeof(kBuf) + HdrLen());
+  ASSERT_THAT(read(s_, c.data(), c.size()), SyscallSucceedsWithValue(c.size()));
+}
+
+// Test that listen() fails.
+TEST_P(RawSocketTest, FailListen) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP));
+}
+
+// Test that accept() fails.
+TEST_P(RawSocketTest, FailAccept) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  struct sockaddr saddr;
+  socklen_t addrlen;
+  ASSERT_THAT(accept(s_, &saddr, &addrlen), SyscallFailsWithErrno(ENOTSUP));
+}
+
+// Test that getpeername() returns nothing before connect().
+TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  struct sockaddr saddr;
+  socklen_t addrlen = sizeof(saddr);
+  ASSERT_THAT(getpeername(s_, &saddr, &addrlen),
+              SyscallFailsWithErrno(ENOTCONN));
+}
+
+// Test that getpeername() returns something after connect().
+TEST_P(RawSocketTest, GetPeerName) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+  struct sockaddr saddr;
+  socklen_t addrlen = sizeof(saddr);
+  ASSERT_THAT(getpeername(s_, &saddr, &addrlen),
+              SyscallFailsWithErrno(ENOTCONN));
+  ASSERT_GT(addrlen, 0);
+}
+
+// Test that the socket is writable immediately.
+TEST_P(RawSocketTest, PollWritableImmediately) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  struct pollfd pfd = {};
+  pfd.fd = s_;
+  pfd.events = POLLOUT;
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1));
+}
+
+// Test that the socket isn't readable before receiving anything.
+TEST_P(RawSocketTest, PollNotReadableInitially) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Try to receive data with MSG_DONTWAIT, which returns immediately if there's
+  // nothing to be read.
+  char buf[117];
+  ASSERT_THAT(RetryEINTR(recv)(s_, buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Test that the socket becomes readable once something is written to it.
+TEST_P(RawSocketTest, PollTriggeredOnWrite) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Write something so that there's data to be read.
+  // Arbitrary.
+  constexpr char kBuf[] = "JP5";
+  ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
+
+  struct pollfd pfd = {};
+  pfd.fd = s_;
+  pfd.events = POLLIN;
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1));
+}
+
+// Test that we can connect() to a valid IP (loopback).
+TEST_P(RawSocketTest, ConnectToLoopback) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+}
+
+// Test that calling send() without connect() fails.
+TEST_P(RawSocketTest, SendWithoutConnectFails) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Arbitrary.
+  constexpr char kBuf[] = "Endgame was good";
+  ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0),
+              SyscallFailsWithErrno(EDESTADDRREQ));
+}
+
+// Bind to localhost.
+TEST_P(RawSocketTest, BindToLocalhost) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+}
+
+// Bind to a different address.
+TEST_P(RawSocketTest, BindToInvalid) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  struct sockaddr_storage bind_addr = addr_;
+  if (Family() == AF_INET) {
+    struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&bind_addr);
+    sin->sin_addr = {1};  // 1.0.0.0 - An address that we can't bind to.
+  } else {
+    struct sockaddr_in6* sin6 =
+        reinterpret_cast<struct sockaddr_in6*>(&bind_addr);
+    memset(&sin6->sin6_addr.s6_addr, 0, sizeof(sin6->sin6_addr.s6_addr));
+    sin6->sin6_addr.s6_addr[0] = 1;  // 1: - An address that we can't bind to.
+  }
+  ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr),
+                   AddrLen()), SyscallFailsWithErrno(EADDRNOTAVAIL));
+}
+
+// Send and receive an packet.
+TEST_P(RawSocketTest, SendAndReceive) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Arbitrary.
+  constexpr char kBuf[] = "TB12";
+  ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
+
+  // Receive the packet and make sure it's identical.
+  std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
+  ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+  EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
+}
+
+// We should be able to create multiple raw sockets for the same protocol and
+// receive the same packet on both.
+TEST_P(RawSocketTest, MultipleSocketReceive) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int s2;
+  ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
+
+  // Arbitrary.
+  constexpr char kBuf[] = "TB10";
+  ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
+
+  // Receive it on socket 1.
+  std::vector<char> recv_buf1(sizeof(kBuf) + HdrLen());
+  ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf1.data(), recv_buf1.size()));
+
+  // Receive it on socket 2.
+  std::vector<char> recv_buf2(sizeof(kBuf) + HdrLen());
+  ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s2, recv_buf2.data(),
+                                         recv_buf2.size()));
+
+  EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(),
+                   recv_buf2.data() + HdrLen(), sizeof(kBuf)),
+            0);
+
+  ASSERT_THAT(close(s2), SyscallSucceeds());
+}
+
+// Test that connect sends packets to the right place.
+TEST_P(RawSocketTest, SendAndReceiveViaConnect) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+
+  // Arbitrary.
+  constexpr char kBuf[] = "JH4";
+  ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0),
+              SyscallSucceedsWithValue(sizeof(kBuf)));
+
+  // Receive the packet and make sure it's identical.
+  std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
+  ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+  EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
+}
+
+// Bind to localhost, then send and receive packets.
+TEST_P(RawSocketTest, BindSendAndReceive) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+
+  // Arbitrary.
+  constexpr char kBuf[] = "DR16";
+  ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
+
+  // Receive the packet and make sure it's identical.
+  std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
+  ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+  EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
+}
+
+// Bind and connect to localhost and send/receive packets.
+TEST_P(RawSocketTest, BindConnectSendAndReceive) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+
+  // Arbitrary.
+  constexpr char kBuf[] = "DG88";
+  ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
+
+  // Receive the packet and make sure it's identical.
+  std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
+  ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+  EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
+}
+
+// Check that setting SO_RCVBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Discover minimum receive buf size by trying to set it to zero.
+  // See:
+  // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
+  constexpr int kRcvBufSz = 0;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+      SyscallSucceeds());
+
+  int min = 0;
+  socklen_t min_len = sizeof(min);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value so let's use a value that when doubled will still
+  // be smaller than min.
+  int below_min = min / 2 - 1;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_RCVBUF above max is clamped to the maximum
+// receive buffer size.
+TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Discover max buf size by trying to set the largest possible buffer size.
+  constexpr int kRcvBufSz = 0xffffffff;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+      SyscallSucceeds());
+
+  int max = 0;
+  socklen_t max_len = sizeof(max);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+              SyscallSucceeds());
+
+  int above_max = max + 1;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+              SyscallSucceeds());
+  ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored.
+TEST_P(RawSocketTest, SetSocketRecvBuf) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int max = 0;
+  int min = 0;
+  {
+    // Discover max buf size by trying to set a really large buffer size.
+    constexpr int kRcvBufSz = 0xffffffff;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+        SyscallSucceeds());
+
+    max = 0;
+    socklen_t max_len = sizeof(max);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+                SyscallSucceeds());
+  }
+
+  {
+    // Discover minimum buffer size by trying to set a zero size receive buffer
+    // size.
+    // See:
+    // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
+    constexpr int kRcvBufSz = 0;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+        SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  int quarter_sz = min + (max - min) / 4;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+  // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior.
+  if (!IsRunningOnGvisor()) {
+    quarter_sz *= 2;
+  }
+  ASSERT_EQ(quarter_sz, val);
+}
+
+// Check that setting SO_SNDBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(RawSocketTest, SetSocketSendBufBelowMin) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Discover minimum buffer size by trying to set it to zero.
+  constexpr int kSndBufSz = 0;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+      SyscallSucceeds());
+
+  int min = 0;
+  socklen_t min_len = sizeof(min);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value so let's use a value that when doubled will still
+  // be smaller than min.
+  int below_min = min / 2 - 1;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_SNDBUF above max is clamped to the maximum
+// send buffer size.
+TEST_P(RawSocketTest, SetSocketSendBufAboveMax) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Discover maximum buffer size by trying to set it to a large value.
+  constexpr int kSndBufSz = 0xffffffff;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+      SyscallSucceeds());
+
+  int max = 0;
+  socklen_t max_len = sizeof(max);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+              SyscallSucceeds());
+
+  int above_max = max + 1;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+              SyscallSucceeds());
+  ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
+TEST_P(RawSocketTest, SetSocketSendBuf) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int max = 0;
+  int min = 0;
+  {
+    // Discover maximum buffer size by trying to set it to a large value.
+    constexpr int kSndBufSz = 0xffffffff;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+        SyscallSucceeds());
+
+    max = 0;
+    socklen_t max_len = sizeof(max);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+                SyscallSucceeds());
+  }
+
+  {
+    // Discover minimum buffer size by trying to set it to zero.
+    constexpr int kSndBufSz = 0;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+        SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  int quarter_sz = min + (max - min) / 4;
+  ASSERT_THAT(
+      setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)),
+      SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+  // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack
+  // matches linux behavior.
+  if (!IsRunningOnGvisor()) {
+    quarter_sz *= 2;
+  }
+
+  ASSERT_EQ(quarter_sz, val);
+}
+
+// Test that receive buffer limits are not enforced when the recv buffer is
+// empty.
+TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+
+  int min = 0;
+  {
+    // Discover minimum buffer size by trying to set it to zero.
+    constexpr int kRcvBufSz = 0;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+        SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  {
+    // Send data of size min and verify that it's received.
+    std::vector<char> buf(min);
+    RandomizeBuffer(buf.data(), buf.size());
+    ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+
+    // Receive the packet and make sure it's identical.
+    std::vector<char> recv_buf(buf.size() + HdrLen());
+    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+    EXPECT_EQ(
+        memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()),
+        0);
+  }
+
+  {
+    // Send data of size min + 1 and verify that its received. Both linux and
+    // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
+    // is currently empty.
+    std::vector<char> buf(min + 1);
+    RandomizeBuffer(buf.data(), buf.size());
+    ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+    // Receive the packet and make sure it's identical.
+    std::vector<char> recv_buf(buf.size() + HdrLen());
+    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+    EXPECT_EQ(
+        memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()),
+        0);
+  }
+}
+
+TEST_P(RawSocketTest, RecvBufLimits) {
+  // TCP stack generates RSTs for unknown endpoints and it complicates the test
+  // as we have to deal with the RST packets as well. For testing the raw socket
+  // endpoints buffer limit enforcement we can just test for UDP.
+  //
+  // We don't use SKIP_IF here because root_test_runner explicitly fails if a
+  // test is skipped.
+  if (Protocol() == IPPROTO_TCP) {
+    return;
+  }
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+      SyscallSucceeds());
+
+  int min = 0;
+  {
+    // Discover minimum buffer size by trying to set it to zero.
+    constexpr int kRcvBufSz = 0;
+    ASSERT_THAT(
+        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+        SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  // Now set the limit to min * 2.
+  int new_rcv_buf_sz = min * 4;
+  if (!IsRunningOnGvisor()) {
+    // Linux doubles the value specified so just set to min.
+    new_rcv_buf_sz = min * 2;
+  }
+
+  ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
+                         sizeof(new_rcv_buf_sz)),
+              SyscallSucceeds());
+  int rcv_buf_sz = 0;
+  {
+    socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
+    ASSERT_THAT(
+        getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len),
+        SyscallSucceeds());
+  }
+
+  // Set a receive timeout so that we don't block forever on reads if the test
+  // fails.
+  struct timeval tv {
+    .tv_sec = 1, .tv_usec = 0,
+  };
+  ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+              SyscallSucceeds());
+
+  {
+    std::vector<char> buf(min);
+    RandomizeBuffer(buf.data(), buf.size());
+
+    ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+    ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+    ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+    ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+    int sent = 4;
+    if (IsRunningOnGvisor()) {
+      // Linux seems to drop the 4th packet even though technically it should
+      // fit in the receive buffer.
+      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+      sent++;
+    }
+
+    // Verify that the expected number of packets are available to be read.
+    for (int i = 0; i < sent - 1; i++) {
+      // Receive the packet and make sure it's identical.
+      std::vector<char> recv_buf(buf.size() + HdrLen());
+      ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+      EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(),
+                       buf.size()),
+                0);
+    }
+
+    // Assert that the last packet is dropped because the receive buffer should
+    // be full after the first four packets.
+    std::vector<char> recv_buf(buf.size() + HdrLen());
+    struct iovec iov = {};
+    iov.iov_base = static_cast<void*>(const_cast<char*>(recv_buf.data()));
+    iov.iov_len = buf.size();
+    struct msghdr msg = {};
+    msg.msg_iov = &iov;
+    msg.msg_iovlen = 1;
+    msg.msg_control = NULL;
+    msg.msg_controllen = 0;
+    msg.msg_flags = 0;
+    ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT),
+                SyscallFailsWithErrno(EAGAIN));
+  }
+}
+
+void RawSocketTest::SendBuf(const char* buf, int buf_len) {
+  // It's safe to use const_cast here because sendmsg won't modify the iovec or
+  // address.
+  struct iovec iov = {};
+  iov.iov_base = static_cast<void*>(const_cast<char*>(buf));
+  iov.iov_len = static_cast<size_t>(buf_len);
+  struct msghdr msg = {};
+  msg.msg_name = static_cast<void*>(&addr_);
+  msg.msg_namelen = AddrLen();
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  msg.msg_control = NULL;
+  msg.msg_controllen = 0;
+  msg.msg_flags = 0;
+  ASSERT_THAT(sendmsg(s_, &msg, 0), SyscallSucceedsWithValue(buf_len));
+}
+
+void RawSocketTest::ReceiveBuf(char* recv_buf, size_t recv_buf_len) {
+  ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s_, recv_buf, recv_buf_len));
+}
+
+void RawSocketTest::ReceiveBufFrom(int sock, char* recv_buf,
+                                   size_t recv_buf_len) {
+  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sock, recv_buf, recv_buf_len));
+}
+
+INSTANTIATE_TEST_SUITE_P(AllInetTests, RawSocketTest,
+                         ::testing::Combine(
+                             ::testing::Values(IPPROTO_TCP, IPPROTO_UDP),
+                             ::testing::Values(AF_INET, AF_INET6)));
+
+// AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to.
+TEST(RawSocketTest, IPv6ProtoRaw) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  int sock;
+  ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW),
+              SyscallSucceeds());
+
+  // Verify that writing yields EINVAL.
+  char buf[] = "This is such a weird little edge case";
+  struct sockaddr_in6 sin6 = {};
+  sin6.sin6_family = AF_INET6;
+  sin6.sin6_addr = in6addr_loopback;
+  ASSERT_THAT(sendto(sock, buf, sizeof(buf), 0 /* flags */,
+                     reinterpret_cast<struct sockaddr*>(&sin6), sizeof(sin6)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc
new file mode 100644
index 000000000..5bb14d57c
--- /dev/null
+++ b/test/syscalls/linux/raw_socket_hdrincl.cc
@@ -0,0 +1,406 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/capability.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstring>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/endian.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Tests for IPPROTO_RAW raw sockets, which implies IP_HDRINCL.
+class RawHDRINCL : public ::testing::Test {
+ protected:
+  // Creates a socket to be used in tests.
+  void SetUp() override;
+
+  // Closes the socket created by SetUp().
+  void TearDown() override;
+
+  // Returns a valid looback IP header with no payload.
+  struct iphdr LoopbackHeader();
+
+  // Fills in buf with an IP header, UDP header, and payload. Returns false if
+  // buf_size isn't large enough to hold everything.
+  bool FillPacket(char* buf, size_t buf_size, int port, const char* payload,
+                  uint16_t payload_size);
+
+  // The socket used for both reading and writing.
+  int socket_;
+
+  // The loopback address.
+  struct sockaddr_in addr_;
+};
+
+void RawHDRINCL::SetUp() {
+  if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    ASSERT_THAT(socket(AF_INET, SOCK_RAW, IPPROTO_RAW),
+                SyscallFailsWithErrno(EPERM));
+    GTEST_SKIP();
+  }
+
+  ASSERT_THAT(socket_ = socket(AF_INET, SOCK_RAW, IPPROTO_RAW),
+              SyscallSucceeds());
+
+  addr_ = {};
+
+  addr_.sin_port = IPPROTO_IP;
+  addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  addr_.sin_family = AF_INET;
+}
+
+void RawHDRINCL::TearDown() {
+  // TearDown will be run even if we skip the test.
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    EXPECT_THAT(close(socket_), SyscallSucceeds());
+  }
+}
+
+struct iphdr RawHDRINCL::LoopbackHeader() {
+  struct iphdr hdr = {};
+  hdr.ihl = 5;
+  hdr.version = 4;
+  hdr.tos = 0;
+  hdr.tot_len = absl::gbswap_16(sizeof(hdr));
+  hdr.id = 0;
+  hdr.frag_off = 0;
+  hdr.ttl = 7;
+  hdr.protocol = 1;
+  hdr.daddr = htonl(INADDR_LOOPBACK);
+  // hdr.check is set by the network stack.
+  // hdr.tot_len is set by the network stack.
+  // hdr.saddr is set by the network stack.
+  return hdr;
+}
+
+bool RawHDRINCL::FillPacket(char* buf, size_t buf_size, int port,
+                            const char* payload, uint16_t payload_size) {
+  if (buf_size < sizeof(struct iphdr) + sizeof(struct udphdr) + payload_size) {
+    return false;
+  }
+
+  struct iphdr ip = LoopbackHeader();
+  ip.protocol = IPPROTO_UDP;
+
+  struct udphdr udp = {};
+  udp.source = absl::gbswap_16(port);
+  udp.dest = absl::gbswap_16(port);
+  udp.len = absl::gbswap_16(sizeof(udp) + payload_size);
+  udp.check = 0;
+
+  memcpy(buf, reinterpret_cast<char*>(&ip), sizeof(ip));
+  memcpy(buf + sizeof(ip), reinterpret_cast<char*>(&udp), sizeof(udp));
+  memcpy(buf + sizeof(ip) + sizeof(udp), payload, payload_size);
+
+  return true;
+}
+
+// We should be able to create multiple IPPROTO_RAW sockets. RawHDRINCL::Setup
+// creates the first one, so we only have to create one more here.
+TEST_F(RawHDRINCL, MultipleCreation) {
+  int s2;
+  ASSERT_THAT(s2 = socket(AF_INET, SOCK_RAW, IPPROTO_RAW), SyscallSucceeds());
+
+  ASSERT_THAT(close(s2), SyscallSucceeds());
+}
+
+// Test that shutting down an unconnected socket fails.
+TEST_F(RawHDRINCL, FailShutdownWithoutConnect) {
+  ASSERT_THAT(shutdown(socket_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
+  ASSERT_THAT(shutdown(socket_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
+}
+
+// Test that listen() fails.
+TEST_F(RawHDRINCL, FailListen) {
+  ASSERT_THAT(listen(socket_, 1), SyscallFailsWithErrno(ENOTSUP));
+}
+
+// Test that accept() fails.
+TEST_F(RawHDRINCL, FailAccept) {
+  struct sockaddr saddr;
+  socklen_t addrlen;
+  ASSERT_THAT(accept(socket_, &saddr, &addrlen),
+              SyscallFailsWithErrno(ENOTSUP));
+}
+
+// Test that the socket is writable immediately.
+TEST_F(RawHDRINCL, PollWritableImmediately) {
+  struct pollfd pfd = {};
+  pfd.fd = socket_;
+  pfd.events = POLLOUT;
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 0), SyscallSucceedsWithValue(1));
+}
+
+// Test that the socket isn't readable.
+TEST_F(RawHDRINCL, NotReadable) {
+  // Try to receive data with MSG_DONTWAIT, which returns immediately if there's
+  // nothing to be read.
+  char buf[117];
+  ASSERT_THAT(RetryEINTR(recv)(socket_, buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Test that we can connect() to a valid IP (loopback).
+TEST_F(RawHDRINCL, ConnectToLoopback) {
+  ASSERT_THAT(connect(socket_, reinterpret_cast<struct sockaddr*>(&addr_),
+                      sizeof(addr_)),
+              SyscallSucceeds());
+}
+
+TEST_F(RawHDRINCL, SendWithoutConnectSucceeds) {
+  struct iphdr hdr = LoopbackHeader();
+  ASSERT_THAT(send(socket_, &hdr, sizeof(hdr), 0),
+              SyscallSucceedsWithValue(sizeof(hdr)));
+}
+
+// HDRINCL implies write-only. Verify that we can't read a packet sent to
+// loopback.
+TEST_F(RawHDRINCL, NotReadableAfterWrite) {
+  ASSERT_THAT(connect(socket_, reinterpret_cast<struct sockaddr*>(&addr_),
+                      sizeof(addr_)),
+              SyscallSucceeds());
+
+  // Construct a packet with an IP header, UDP header, and payload.
+  constexpr char kPayload[] = "odst";
+  char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)];
+  ASSERT_TRUE(FillPacket(packet, sizeof(packet), 40000 /* port */, kPayload,
+                         sizeof(kPayload)));
+
+  socklen_t addrlen = sizeof(addr_);
+  ASSERT_NO_FATAL_FAILURE(
+      sendto(socket_, reinterpret_cast<void*>(&packet), sizeof(packet), 0,
+             reinterpret_cast<struct sockaddr*>(&addr_), addrlen));
+
+  struct pollfd pfd = {};
+  pfd.fd = socket_;
+  pfd.events = POLLIN;
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(RawHDRINCL, WriteTooSmall) {
+  ASSERT_THAT(connect(socket_, reinterpret_cast<struct sockaddr*>(&addr_),
+                      sizeof(addr_)),
+              SyscallSucceeds());
+
+  // This is smaller than the size of an IP header.
+  constexpr char kBuf[] = "JP5";
+  ASSERT_THAT(send(socket_, kBuf, sizeof(kBuf), 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Bind to localhost.
+TEST_F(RawHDRINCL, BindToLocalhost) {
+  ASSERT_THAT(
+      bind(socket_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+      SyscallSucceeds());
+}
+
+// Bind to a different address.
+TEST_F(RawHDRINCL, BindToInvalid) {
+  struct sockaddr_in bind_addr = {};
+  bind_addr.sin_family = AF_INET;
+  bind_addr.sin_addr = {1};  // 1.0.0.0 - An address that we can't bind to.
+  ASSERT_THAT(bind(socket_, reinterpret_cast<struct sockaddr*>(&bind_addr),
+                   sizeof(bind_addr)),
+              SyscallFailsWithErrno(EADDRNOTAVAIL));
+}
+
+// Send and receive a packet.
+TEST_F(RawHDRINCL, SendAndReceive) {
+  int port = 40000;
+  if (!IsRunningOnGvisor()) {
+    port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE(
+        PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false)));
+  }
+
+  // IPPROTO_RAW sockets are write-only. We'll have to open another socket to
+  // read what we write.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
+
+  // Construct a packet with an IP header, UDP header, and payload.
+  constexpr char kPayload[] = "toto";
+  char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)];
+  ASSERT_TRUE(
+      FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload)));
+
+  socklen_t addrlen = sizeof(addr_);
+  ASSERT_NO_FATAL_FAILURE(sendto(socket_, &packet, sizeof(packet), 0,
+                                 reinterpret_cast<struct sockaddr*>(&addr_),
+                                 addrlen));
+
+  // Receive the payload.
+  char recv_buf[sizeof(packet)];
+  struct sockaddr_in src;
+  socklen_t src_size = sizeof(src);
+  ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), 0,
+                       reinterpret_cast<struct sockaddr*>(&src), &src_size),
+              SyscallSucceedsWithValue(sizeof(packet)));
+  EXPECT_EQ(
+      memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr),
+             sizeof(kPayload)),
+      0);
+  // The network stack should have set the source address.
+  EXPECT_EQ(src.sin_family, AF_INET);
+  EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK);
+  // The packet ID should not be 0, as the packet has DF=0.
+  struct iphdr* iphdr = reinterpret_cast<struct iphdr*>(recv_buf);
+  EXPECT_NE(iphdr->id, 0);
+}
+
+// Send and receive a packet where the sendto address is not the same as the
+// provided destination.
+TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) {
+  int port = 40000;
+  if (!IsRunningOnGvisor()) {
+    port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE(
+        PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false)));
+  }
+
+  // IPPROTO_RAW sockets are write-only. We'll have to open another socket to
+  // read what we write.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
+
+  // Construct a packet with an IP header, UDP header, and payload.
+  constexpr char kPayload[] = "toto";
+  char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)];
+  ASSERT_TRUE(
+      FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload)));
+  // Overwrite the IP destination address with an IP we can't get to.
+  struct iphdr iphdr = {};
+  memcpy(&iphdr, packet, sizeof(iphdr));
+  iphdr.daddr = 42;
+  memcpy(packet, &iphdr, sizeof(iphdr));
+
+  socklen_t addrlen = sizeof(addr_);
+  ASSERT_NO_FATAL_FAILURE(sendto(socket_, &packet, sizeof(packet), 0,
+                                 reinterpret_cast<struct sockaddr*>(&addr_),
+                                 addrlen));
+
+  // Receive the payload, since sendto should replace the bad destination with
+  // localhost.
+  char recv_buf[sizeof(packet)];
+  struct sockaddr_in src;
+  socklen_t src_size = sizeof(src);
+  ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), 0,
+                       reinterpret_cast<struct sockaddr*>(&src), &src_size),
+              SyscallSucceedsWithValue(sizeof(packet)));
+  EXPECT_EQ(
+      memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr),
+             sizeof(kPayload)),
+      0);
+  // The network stack should have set the source address.
+  EXPECT_EQ(src.sin_family, AF_INET);
+  EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK);
+  // The packet ID should not be 0, as the packet has DF=0.
+  struct iphdr recv_iphdr = {};
+  memcpy(&recv_iphdr, recv_buf, sizeof(recv_iphdr));
+  EXPECT_NE(recv_iphdr.id, 0);
+  // The destination address should be localhost, not the bad IP we set
+  // initially.
+  EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK);
+}
+
+// Send and receive a packet w/ the IP_HDRINCL option set.
+TEST_F(RawHDRINCL, SendAndReceiveIPHdrIncl) {
+  int port = 40000;
+  if (!IsRunningOnGvisor()) {
+    port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE(
+        PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false)));
+  }
+
+  FileDescriptor recv_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
+
+  FileDescriptor send_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
+
+  // Enable IP_HDRINCL option so that we can build and send w/ an IP
+  // header.
+  constexpr int kSockOptOn = 1;
+  ASSERT_THAT(setsockopt(send_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  // This is not strictly required but we do it to make sure that setting
+  // IP_HDRINCL on a non IPPROTO_RAW socket does not prevent it from receiving
+  // packets.
+  ASSERT_THAT(setsockopt(recv_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Construct a packet with an IP header, UDP header, and payload.
+  constexpr char kPayload[] = "toto";
+  char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)];
+  ASSERT_TRUE(
+      FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload)));
+
+  socklen_t addrlen = sizeof(addr_);
+  ASSERT_NO_FATAL_FAILURE(sendto(send_sock.get(), &packet, sizeof(packet), 0,
+                                 reinterpret_cast<struct sockaddr*>(&addr_),
+                                 addrlen));
+
+  // Receive the payload.
+  char recv_buf[sizeof(packet)];
+  struct sockaddr_in src;
+  socklen_t src_size = sizeof(src);
+  ASSERT_THAT(recvfrom(recv_sock.get(), recv_buf, sizeof(recv_buf), 0,
+                       reinterpret_cast<struct sockaddr*>(&src), &src_size),
+              SyscallSucceedsWithValue(sizeof(packet)));
+  EXPECT_EQ(
+      memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr),
+             sizeof(kPayload)),
+      0);
+  // The network stack should have set the source address.
+  EXPECT_EQ(src.sin_family, AF_INET);
+  EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK);
+  struct iphdr iphdr = {};
+  memcpy(&iphdr, recv_buf, sizeof(iphdr));
+  EXPECT_NE(iphdr.id, 0);
+
+  // Also verify that the packet we just sent was not delivered to the
+  // IPPROTO_RAW socket.
+  {
+    char recv_buf[sizeof(packet)];
+    struct sockaddr_in src;
+    socklen_t src_size = sizeof(src);
+    ASSERT_THAT(recvfrom(socket_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT,
+                         reinterpret_cast<struct sockaddr*>(&src), &src_size),
+                SyscallFailsWithErrno(EAGAIN));
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/raw_socket_icmp.cc b/test/syscalls/linux/raw_socket_icmp.cc
new file mode 100644
index 000000000..3de898df7
--- /dev/null
+++ b/test/syscalls/linux/raw_socket_icmp.cc
@@ -0,0 +1,514 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/capability.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// The size of an empty ICMP packet and IP header together.
+constexpr size_t kEmptyICMPSize = 28;
+
+// ICMP raw sockets get their own special tests because Linux automatically
+// responds to ICMP echo requests, and thus a single echo request sent via
+// loopback leads to 2 received ICMP packets.
+
+class RawSocketICMPTest : public ::testing::Test {
+ protected:
+  // Creates a socket to be used in tests.
+  void SetUp() override;
+
+  // Closes the socket created by SetUp().
+  void TearDown() override;
+
+  // Checks that both an ICMP echo request and reply are received. Calls should
+  // be wrapped in ASSERT_NO_FATAL_FAILURE.
+  void ExpectICMPSuccess(const struct icmphdr& icmp);
+
+  // Sends icmp via s_.
+  void SendEmptyICMP(const struct icmphdr& icmp);
+
+  // Sends icmp via s_ to the given address.
+  void SendEmptyICMPTo(int sock, const struct sockaddr_in& addr,
+                       const struct icmphdr& icmp);
+
+  // Reads from s_ into recv_buf.
+  void ReceiveICMP(char* recv_buf, size_t recv_buf_len, size_t expected_size,
+                   struct sockaddr_in* src);
+
+  // Reads from sock into recv_buf.
+  void ReceiveICMPFrom(char* recv_buf, size_t recv_buf_len,
+                       size_t expected_size, struct sockaddr_in* src, int sock);
+
+  // The socket used for both reading and writing.
+  int s_;
+
+  // The loopback address.
+  struct sockaddr_in addr_;
+};
+
+void RawSocketICMPTest::SetUp() {
+  if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    ASSERT_THAT(socket(AF_INET, SOCK_RAW, IPPROTO_ICMP),
+                SyscallFailsWithErrno(EPERM));
+    GTEST_SKIP();
+  }
+
+  ASSERT_THAT(s_ = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP), SyscallSucceeds());
+
+  addr_ = {};
+
+  // "On raw sockets sin_port is set to the IP protocol." - ip(7).
+  addr_.sin_port = IPPROTO_IP;
+  addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  addr_.sin_family = AF_INET;
+}
+
+void RawSocketICMPTest::TearDown() {
+  // TearDown will be run even if we skip the test.
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
+    EXPECT_THAT(close(s_), SyscallSucceeds());
+  }
+}
+
+// We'll only read an echo in this case, as the kernel won't respond to the
+// malformed ICMP checksum.
+TEST_F(RawSocketICMPTest, SendAndReceiveBadChecksum) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Prepare and send an ICMP packet. Use arbitrary junk for checksum, sequence,
+  // and ID. None of that should matter for raw sockets - the kernel should
+  // still give us the packet.
+  struct icmphdr icmp;
+  icmp.type = ICMP_ECHO;
+  icmp.code = 0;
+  icmp.checksum = 0;
+  icmp.un.echo.sequence = 2012;
+  icmp.un.echo.id = 2014;
+  ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp));
+
+  // Veryify that we get the echo, then that there's nothing else to read.
+  char recv_buf[kEmptyICMPSize];
+  struct sockaddr_in src;
+  ASSERT_NO_FATAL_FAILURE(
+      ReceiveICMP(recv_buf, sizeof(recv_buf), sizeof(struct icmphdr), &src));
+  EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0);
+  // The packet should be identical to what we sent.
+  EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), &icmp, sizeof(icmp)), 0);
+
+  // And there should be nothing left to read.
+  EXPECT_THAT(RetryEINTR(recv)(s_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Send and receive an ICMP packet.
+TEST_F(RawSocketICMPTest, SendAndReceive) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  // Prepare and send an ICMP packet. Use arbitrary junk for sequence and ID.
+  // None of that should matter for raw sockets - the kernel should still give
+  // us the packet.
+  struct icmphdr icmp;
+  icmp.type = ICMP_ECHO;
+  icmp.code = 0;
+  icmp.checksum = 0;
+  icmp.un.echo.sequence = 2012;
+  icmp.un.echo.id = 2014;
+  icmp.checksum = ICMPChecksum(icmp, NULL, 0);
+  ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp));
+
+  ASSERT_NO_FATAL_FAILURE(ExpectICMPSuccess(icmp));
+}
+
+// We should be able to create multiple raw sockets for the same protocol and
+// receive the same packet on both.
+TEST_F(RawSocketICMPTest, MultipleSocketReceive) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  FileDescriptor s2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_ICMP));
+
+  // Prepare and send an ICMP packet. Use arbitrary junk for sequence and ID.
+  // None of that should matter for raw sockets - the kernel should still give
+  // us the packet.
+  struct icmphdr icmp;
+  icmp.type = ICMP_ECHO;
+  icmp.code = 0;
+  icmp.checksum = 0;
+  icmp.un.echo.sequence = 2016;
+  icmp.un.echo.id = 2018;
+  icmp.checksum = ICMPChecksum(icmp, NULL, 0);
+  ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp));
+
+  // Both sockets will receive the echo request and reply in indeterminate
+  // order, so we'll need to read 2 packets from each.
+
+  // Receive on socket 1.
+  constexpr int kBufSize = kEmptyICMPSize;
+  char recv_buf1[2][kBufSize];
+  struct sockaddr_in src;
+  for (int i = 0; i < 2; i++) {
+    ASSERT_NO_FATAL_FAILURE(ReceiveICMP(recv_buf1[i],
+                                        ABSL_ARRAYSIZE(recv_buf1[i]),
+                                        sizeof(struct icmphdr), &src));
+    EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0);
+  }
+
+  // Receive on socket 2.
+  char recv_buf2[2][kBufSize];
+  for (int i = 0; i < 2; i++) {
+    ASSERT_NO_FATAL_FAILURE(
+        ReceiveICMPFrom(recv_buf2[i], ABSL_ARRAYSIZE(recv_buf2[i]),
+                        sizeof(struct icmphdr), &src, s2.get()));
+    EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0);
+  }
+
+  // Ensure both sockets receive identical packets.
+  int types[] = {ICMP_ECHO, ICMP_ECHOREPLY};
+  for (int type : types) {
+    auto match_type = [=](char buf[kBufSize]) {
+      struct icmphdr* icmp =
+          reinterpret_cast<struct icmphdr*>(buf + sizeof(struct iphdr));
+      return icmp->type == type;
+    };
+    auto icmp1_it =
+        std::find_if(std::begin(recv_buf1), std::end(recv_buf1), match_type);
+    auto icmp2_it =
+        std::find_if(std::begin(recv_buf2), std::end(recv_buf2), match_type);
+    ASSERT_NE(icmp1_it, std::end(recv_buf1));
+    ASSERT_NE(icmp2_it, std::end(recv_buf2));
+    EXPECT_EQ(memcmp(*icmp1_it + sizeof(struct iphdr),
+                     *icmp2_it + sizeof(struct iphdr), sizeof(icmp)),
+              0);
+  }
+}
+
+// A raw ICMP socket and ping socket should both receive the ICMP packets
+// intended for the ping socket.
+TEST_F(RawSocketICMPTest, RawAndPingSockets) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  FileDescriptor ping_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP));
+
+  // Ping sockets take care of the ICMP ID and checksum.
+  struct icmphdr icmp;
+  icmp.type = ICMP_ECHO;
+  icmp.code = 0;
+  icmp.un.echo.sequence = *static_cast<unsigned short*>(&icmp.un.echo.sequence);
+  ASSERT_THAT(RetryEINTR(sendto)(ping_sock.get(), &icmp, sizeof(icmp), 0,
+                                 reinterpret_cast<struct sockaddr*>(&addr_),
+                                 sizeof(addr_)),
+              SyscallSucceedsWithValue(sizeof(icmp)));
+
+  // Receive on socket 1, which receives the echo request and reply in
+  // indeterminate order.
+  constexpr int kBufSize = kEmptyICMPSize;
+  char recv_buf1[2][kBufSize];
+  struct sockaddr_in src;
+  for (int i = 0; i < 2; i++) {
+    ASSERT_NO_FATAL_FAILURE(
+        ReceiveICMP(recv_buf1[i], kBufSize, sizeof(struct icmphdr), &src));
+    EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0);
+  }
+
+  // Receive on socket 2. Ping sockets only get the echo reply, not the initial
+  // echo.
+  char ping_recv_buf[kBufSize];
+  ASSERT_THAT(RetryEINTR(recv)(ping_sock.get(), ping_recv_buf, kBufSize, 0),
+              SyscallSucceedsWithValue(sizeof(struct icmphdr)));
+
+  // Ensure both sockets receive identical echo reply packets.
+  auto match_type_raw = [=](char buf[kBufSize]) {
+    struct icmphdr* icmp =
+        reinterpret_cast<struct icmphdr*>(buf + sizeof(struct iphdr));
+    return icmp->type == ICMP_ECHOREPLY;
+  };
+  auto raw_reply_it =
+      std::find_if(std::begin(recv_buf1), std::end(recv_buf1), match_type_raw);
+  ASSERT_NE(raw_reply_it, std::end(recv_buf1));
+  EXPECT_EQ(
+      memcmp(*raw_reply_it + sizeof(struct iphdr), ping_recv_buf, sizeof(icmp)),
+      0);
+}
+
+// A raw ICMP socket should be able to send a malformed short ICMP Echo Request,
+// while ping socket should not.
+// Neither should be able to receieve a short malformed packet.
+TEST_F(RawSocketICMPTest, ShortEchoRawAndPingSockets) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  FileDescriptor ping_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP));
+
+  struct icmphdr icmp;
+  icmp.type = ICMP_ECHO;
+  icmp.code = 0;
+  icmp.un.echo.sequence = 0;
+  icmp.un.echo.id = 6789;
+  icmp.checksum = 0;
+  icmp.checksum = ICMPChecksum(icmp, NULL, 0);
+
+  // Omit 2 bytes from ICMP packet.
+  constexpr int kShortICMPSize = sizeof(icmp) - 2;
+
+  // Sending a malformed short ICMP message to a ping socket should fail.
+  ASSERT_THAT(RetryEINTR(sendto)(ping_sock.get(), &icmp, kShortICMPSize, 0,
+                                 reinterpret_cast<struct sockaddr*>(&addr_),
+                                 sizeof(addr_)),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Sending a malformed short ICMP message to a raw socket should not fail.
+  ASSERT_THAT(RetryEINTR(sendto)(s_, &icmp, kShortICMPSize, 0,
+                                 reinterpret_cast<struct sockaddr*>(&addr_),
+                                 sizeof(addr_)),
+              SyscallSucceedsWithValue(kShortICMPSize));
+
+  // Neither Ping nor Raw socket should have anything to read.
+  char recv_buf[kEmptyICMPSize];
+  EXPECT_THAT(RetryEINTR(recv)(ping_sock.get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+  EXPECT_THAT(RetryEINTR(recv)(s_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// A raw ICMP socket should be able to send a malformed short ICMP Echo Reply,
+// while ping socket should not.
+// Neither should be able to receieve a short malformed packet.
+TEST_F(RawSocketICMPTest, ShortEchoReplyRawAndPingSockets) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  FileDescriptor ping_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP));
+
+  struct icmphdr icmp;
+  icmp.type = ICMP_ECHOREPLY;
+  icmp.code = 0;
+  icmp.un.echo.sequence = 0;
+  icmp.un.echo.id = 6789;
+  icmp.checksum = 0;
+  icmp.checksum = ICMPChecksum(icmp, NULL, 0);
+
+  // Omit 2 bytes from ICMP packet.
+  constexpr int kShortICMPSize = sizeof(icmp) - 2;
+
+  // Sending a malformed short ICMP message to a ping socket should fail.
+  ASSERT_THAT(RetryEINTR(sendto)(ping_sock.get(), &icmp, kShortICMPSize, 0,
+                                 reinterpret_cast<struct sockaddr*>(&addr_),
+                                 sizeof(addr_)),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Sending a malformed short ICMP message to a raw socket should not fail.
+  ASSERT_THAT(RetryEINTR(sendto)(s_, &icmp, kShortICMPSize, 0,
+                                 reinterpret_cast<struct sockaddr*>(&addr_),
+                                 sizeof(addr_)),
+              SyscallSucceedsWithValue(kShortICMPSize));
+
+  // Neither Ping nor Raw socket should have anything to read.
+  char recv_buf[kEmptyICMPSize];
+  EXPECT_THAT(RetryEINTR(recv)(ping_sock.get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+  EXPECT_THAT(RetryEINTR(recv)(s_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Test that connect() sends packets to the right place.
+TEST_F(RawSocketICMPTest, SendAndReceiveViaConnect) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+      SyscallSucceeds());
+
+  // Prepare and send an ICMP packet. Use arbitrary junk for sequence and ID.
+  // None of that should matter for raw sockets - the kernel should still give
+  // us the packet.
+  struct icmphdr icmp;
+  icmp.type = ICMP_ECHO;
+  icmp.code = 0;
+  icmp.checksum = 0;
+  icmp.un.echo.sequence = 2003;
+  icmp.un.echo.id = 2004;
+  icmp.checksum = ICMPChecksum(icmp, NULL, 0);
+  ASSERT_THAT(send(s_, &icmp, sizeof(icmp), 0),
+              SyscallSucceedsWithValue(sizeof(icmp)));
+
+  ASSERT_NO_FATAL_FAILURE(ExpectICMPSuccess(icmp));
+}
+
+// Bind to localhost, then send and receive packets.
+TEST_F(RawSocketICMPTest, BindSendAndReceive) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+      SyscallSucceeds());
+
+  // Prepare and send an ICMP packet. Use arbitrary junk for checksum, sequence,
+  // and ID. None of that should matter for raw sockets - the kernel should
+  // still give us the packet.
+  struct icmphdr icmp;
+  icmp.type = ICMP_ECHO;
+  icmp.code = 0;
+  icmp.checksum = 0;
+  icmp.un.echo.sequence = 2004;
+  icmp.un.echo.id = 2007;
+  icmp.checksum = ICMPChecksum(icmp, NULL, 0);
+  ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp));
+
+  ASSERT_NO_FATAL_FAILURE(ExpectICMPSuccess(icmp));
+}
+
+// Bind and connect to localhost and send/receive packets.
+TEST_F(RawSocketICMPTest, BindConnectSendAndReceive) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+  ASSERT_THAT(
+      bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+      SyscallSucceeds());
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+      SyscallSucceeds());
+
+  // Prepare and send an ICMP packet. Use arbitrary junk for checksum, sequence,
+  // and ID. None of that should matter for raw sockets - the kernel should
+  // still give us the packet.
+  struct icmphdr icmp;
+  icmp.type = ICMP_ECHO;
+  icmp.code = 0;
+  icmp.checksum = 0;
+  icmp.un.echo.sequence = 2010;
+  icmp.un.echo.id = 7;
+  icmp.checksum = ICMPChecksum(icmp, NULL, 0);
+  ASSERT_NO_FATAL_FAILURE(SendEmptyICMP(icmp));
+
+  ASSERT_NO_FATAL_FAILURE(ExpectICMPSuccess(icmp));
+}
+
+void RawSocketICMPTest::ExpectICMPSuccess(const struct icmphdr& icmp) {
+  // We're going to receive both the echo request and reply, but the order is
+  // indeterminate.
+  char recv_buf[kEmptyICMPSize];
+  struct sockaddr_in src;
+  bool received_request = false;
+  bool received_reply = false;
+
+  for (int i = 0; i < 2; i++) {
+    // Receive the packet.
+    ASSERT_NO_FATAL_FAILURE(ReceiveICMP(recv_buf, ABSL_ARRAYSIZE(recv_buf),
+                                        sizeof(struct icmphdr), &src));
+    EXPECT_EQ(memcmp(&src, &addr_, sizeof(src)), 0);
+    struct icmphdr* recvd_icmp =
+        reinterpret_cast<struct icmphdr*>(recv_buf + sizeof(struct iphdr));
+    switch (recvd_icmp->type) {
+      case ICMP_ECHO:
+        EXPECT_FALSE(received_request);
+        received_request = true;
+        // The packet should be identical to what we sent.
+        EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), &icmp, sizeof(icmp)),
+                  0);
+        break;
+
+      case ICMP_ECHOREPLY:
+        EXPECT_FALSE(received_reply);
+        received_reply = true;
+        // Most fields should be the same.
+        EXPECT_EQ(recvd_icmp->code, icmp.code);
+        EXPECT_EQ(recvd_icmp->un.echo.sequence, icmp.un.echo.sequence);
+        EXPECT_EQ(recvd_icmp->un.echo.id, icmp.un.echo.id);
+        // A couple are different.
+        EXPECT_EQ(recvd_icmp->type, ICMP_ECHOREPLY);
+        // The checksum computed over the reply should still be valid.
+        EXPECT_EQ(ICMPChecksum(*recvd_icmp, NULL, 0), 0);
+        break;
+    }
+  }
+
+  ASSERT_TRUE(received_request);
+  ASSERT_TRUE(received_reply);
+}
+
+void RawSocketICMPTest::SendEmptyICMP(const struct icmphdr& icmp) {
+  ASSERT_NO_FATAL_FAILURE(SendEmptyICMPTo(s_, addr_, icmp));
+}
+
+void RawSocketICMPTest::SendEmptyICMPTo(int sock,
+                                        const struct sockaddr_in& addr,
+                                        const struct icmphdr& icmp) {
+  // It's safe to use const_cast here because sendmsg won't modify the iovec or
+  // address.
+  struct iovec iov = {};
+  iov.iov_base = static_cast<void*>(const_cast<struct icmphdr*>(&icmp));
+  iov.iov_len = sizeof(icmp);
+  struct msghdr msg = {};
+  msg.msg_name = static_cast<void*>(const_cast<struct sockaddr_in*>(&addr));
+  msg.msg_namelen = sizeof(addr);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  msg.msg_control = NULL;
+  msg.msg_controllen = 0;
+  msg.msg_flags = 0;
+  ASSERT_THAT(sendmsg(sock, &msg, 0), SyscallSucceedsWithValue(sizeof(icmp)));
+}
+
+void RawSocketICMPTest::ReceiveICMP(char* recv_buf, size_t recv_buf_len,
+                                    size_t expected_size,
+                                    struct sockaddr_in* src) {
+  ASSERT_NO_FATAL_FAILURE(
+      ReceiveICMPFrom(recv_buf, recv_buf_len, expected_size, src, s_));
+}
+
+void RawSocketICMPTest::ReceiveICMPFrom(char* recv_buf, size_t recv_buf_len,
+                                        size_t expected_size,
+                                        struct sockaddr_in* src, int sock) {
+  struct iovec iov = {};
+  iov.iov_base = recv_buf;
+  iov.iov_len = recv_buf_len;
+  struct msghdr msg = {};
+  msg.msg_name = src;
+  msg.msg_namelen = sizeof(*src);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  msg.msg_control = NULL;
+  msg.msg_controllen = 0;
+  msg.msg_flags = 0;
+  // We should receive the ICMP packet plus 20 bytes of IP header.
+  ASSERT_THAT(recvmsg(sock, &msg, 0),
+              SyscallSucceedsWithValue(expected_size + sizeof(struct iphdr)));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/read.cc b/test/syscalls/linux/read.cc
new file mode 100644
index 000000000..2633ba31b
--- /dev/null
+++ b/test/syscalls/linux/read.cc
@@ -0,0 +1,118 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReadTest : public ::testing::Test {
+  void SetUp() override {
+    name_ = NewTempAbsPath();
+    int fd;
+    ASSERT_THAT(fd = open(name_.c_str(), O_CREAT, 0644), SyscallSucceeds());
+    ASSERT_THAT(close(fd), SyscallSucceeds());
+  }
+
+  void TearDown() override { unlink(name_.c_str()); }
+
+ public:
+  std::string name_;
+};
+
+TEST_F(ReadTest, ZeroBuffer) {
+  int fd;
+  ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds());
+
+  char msg[] = "hello world";
+  EXPECT_THAT(PwriteFd(fd, msg, strlen(msg), 0),
+              SyscallSucceedsWithValue(strlen(msg)));
+
+  char buf[10];
+  EXPECT_THAT(ReadFd(fd, buf, 0), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(ReadTest, EmptyFileReturnsZeroAtEOF) {
+  int fd;
+  ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds());
+
+  char eof_buf[10];
+  EXPECT_THAT(ReadFd(fd, eof_buf, 10), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(ReadTest, EofAfterRead) {
+  int fd;
+  ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds());
+
+  // Write some bytes to be read.
+  constexpr char kMessage[] = "hello world";
+  EXPECT_THAT(PwriteFd(fd, kMessage, sizeof(kMessage), 0),
+              SyscallSucceedsWithValue(sizeof(kMessage)));
+
+  // Read all of the bytes at once.
+  char buf[sizeof(kMessage)];
+  EXPECT_THAT(ReadFd(fd, buf, sizeof(kMessage)),
+              SyscallSucceedsWithValue(sizeof(kMessage)));
+
+  // Read again with a non-zero buffer and expect EOF.
+  char eof_buf[10];
+  EXPECT_THAT(ReadFd(fd, eof_buf, 10), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(ReadTest, DevNullReturnsEof) {
+  int fd;
+  ASSERT_THAT(fd = open("/dev/null", O_RDONLY), SyscallSucceeds());
+  std::vector<char> buf(1);
+  EXPECT_THAT(ReadFd(fd, buf.data(), 1), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+const int kReadSize = 128 * 1024;
+
+// Do not allow random save as it could lead to partial reads.
+TEST_F(ReadTest, CanReadFullyFromDevZero_NoRandomSave) {
+  int fd;
+  ASSERT_THAT(fd = open("/dev/zero", O_RDONLY), SyscallSucceeds());
+
+  std::vector<char> buf(kReadSize, 1);
+  EXPECT_THAT(ReadFd(fd, buf.data(), kReadSize),
+              SyscallSucceedsWithValue(kReadSize));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  EXPECT_EQ(std::vector<char>(kReadSize, 0), buf);
+}
+
+TEST_F(ReadTest, ReadDirectoryFails) {
+  const FileDescriptor file =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY));
+  std::vector<char> buf(1);
+  EXPECT_THAT(ReadFd(file.get(), buf.data(), 1), SyscallFailsWithErrno(EISDIR));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/readahead.cc b/test/syscalls/linux/readahead.cc
new file mode 100644
index 000000000..09703b5c1
--- /dev/null
+++ b/test/syscalls/linux/readahead.cc
@@ -0,0 +1,91 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ReadaheadTest, InvalidFD) {
+  EXPECT_THAT(readahead(-1, 1, 1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ReadaheadTest, InvalidOffset) {
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+  EXPECT_THAT(readahead(fd.get(), -1, 1), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(ReadaheadTest, ValidOffset) {
+  constexpr char kData[] = "123";
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+
+  // N.B. The implementation of readahead is filesystem-specific, and a file
+  // backed by ram may return EINVAL because there is nothing to be read.
+  EXPECT_THAT(readahead(fd.get(), 1, 1), AnyOf(SyscallSucceedsWithValue(0),
+                                               SyscallFailsWithErrno(EINVAL)));
+}
+
+TEST(ReadaheadTest, PastEnd) {
+  constexpr char kData[] = "123";
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+  // See above.
+  EXPECT_THAT(readahead(fd.get(), 2, 2), AnyOf(SyscallSucceedsWithValue(0),
+                                               SyscallFailsWithErrno(EINVAL)));
+}
+
+TEST(ReadaheadTest, CrossesEnd) {
+  constexpr char kData[] = "123";
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+  // See above.
+  EXPECT_THAT(readahead(fd.get(), 4, 2), AnyOf(SyscallSucceedsWithValue(0),
+                                               SyscallFailsWithErrno(EINVAL)));
+}
+
+TEST(ReadaheadTest, WriteOnly) {
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_WRONLY));
+  EXPECT_THAT(readahead(fd.get(), 0, 1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ReadaheadTest, InvalidSize) {
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+  EXPECT_THAT(readahead(fd.get(), 0, -1), SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/readv.cc b/test/syscalls/linux/readv.cc
new file mode 100644
index 000000000..baaf9f757
--- /dev/null
+++ b/test/syscalls/linux/readv.cc
@@ -0,0 +1,294 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/syscalls/linux/readv_common.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReadvTest : public FileTest {
+  void SetUp() override {
+    FileTest::SetUp();
+
+    ASSERT_THAT(write(test_file_fd_.get(), kReadvTestData, kReadvTestDataSize),
+                SyscallSucceedsWithValue(kReadvTestDataSize));
+    ASSERT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET),
+                SyscallSucceedsWithValue(0));
+    ASSERT_THAT(write(test_pipe_[1], kReadvTestData, kReadvTestDataSize),
+                SyscallSucceedsWithValue(kReadvTestDataSize));
+  }
+};
+
+TEST_F(ReadvTest, ReadOneBufferPerByte_File) {
+  ReadOneBufferPerByte(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadOneBufferPerByte_Pipe) {
+  ReadOneBufferPerByte(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadOneHalfAtATime_File) {
+  ReadOneHalfAtATime(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadOneHalfAtATime_Pipe) {
+  ReadOneHalfAtATime(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadAllOneBuffer_File) {
+  ReadAllOneBuffer(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadAllOneBuffer_Pipe) { ReadAllOneBuffer(test_pipe_[0]); }
+
+TEST_F(ReadvTest, ReadAllOneLargeBuffer_File) {
+  ReadAllOneLargeBuffer(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadAllOneLargeBuffer_Pipe) {
+  ReadAllOneLargeBuffer(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadBuffersOverlapping_File) {
+  ReadBuffersOverlapping(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadBuffersOverlapping_Pipe) {
+  ReadBuffersOverlapping(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadBuffersDiscontinuous_File) {
+  ReadBuffersDiscontinuous(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadBuffersDiscontinuous_Pipe) {
+  ReadBuffersDiscontinuous(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadIovecsCompletelyFilled_File) {
+  ReadIovecsCompletelyFilled(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadIovecsCompletelyFilled_Pipe) {
+  ReadIovecsCompletelyFilled(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, BadFileDescriptor) {
+  char buffer[1024];
+  struct iovec iov[1];
+  iov[0].iov_base = buffer;
+  iov[0].iov_len = 1024;
+
+  ASSERT_THAT(readv(-1, iov, 1024), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(ReadvTest, BadIovecsPointer_File) {
+  ASSERT_THAT(readv(test_file_fd_.get(), nullptr, 1),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvTest, BadIovecsPointer_Pipe) {
+  ASSERT_THAT(readv(test_pipe_[0], nullptr, 1), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvTest, BadIovecBase_File) {
+  struct iovec iov[1];
+  iov[0].iov_base = nullptr;
+  iov[0].iov_len = 1024;
+  ASSERT_THAT(readv(test_file_fd_.get(), iov, 1),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvTest, BadIovecBase_Pipe) {
+  struct iovec iov[1];
+  iov[0].iov_base = nullptr;
+  iov[0].iov_len = 1024;
+  ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvTest, ZeroIovecs_File) {
+  struct iovec iov[1];
+  iov[0].iov_base = 0;
+  iov[0].iov_len = 0;
+  ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), SyscallSucceeds());
+}
+
+TEST_F(ReadvTest, ZeroIovecs_Pipe) {
+  struct iovec iov[1];
+  iov[0].iov_base = 0;
+  iov[0].iov_len = 0;
+  ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallSucceeds());
+}
+
+TEST_F(ReadvTest, NotReadable_File) {
+  char buffer[1024];
+  struct iovec iov[1];
+  iov[0].iov_base = buffer;
+  iov[0].iov_len = 1024;
+
+  std::string wronly_file = NewTempAbsPath();
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(wronly_file, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR));
+  ASSERT_THAT(readv(fd.get(), iov, 1), SyscallFailsWithErrno(EBADF));
+  fd.reset();  // Close before unlinking.
+  ASSERT_THAT(unlink(wronly_file.c_str()), SyscallSucceeds());
+}
+
+TEST_F(ReadvTest, NotReadable_Pipe) {
+  char buffer[1024];
+  struct iovec iov[1];
+  iov[0].iov_base = buffer;
+  iov[0].iov_len = 1024;
+  ASSERT_THAT(readv(test_pipe_[1], iov, 1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(ReadvTest, DirNotReadable) {
+  char buffer[1024];
+  struct iovec iov[1];
+  iov[0].iov_base = buffer;
+  iov[0].iov_len = 1024;
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY));
+  ASSERT_THAT(readv(fd.get(), iov, 1), SyscallFailsWithErrno(EISDIR));
+}
+
+TEST_F(ReadvTest, OffsetIncremented) {
+  char* buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+  struct iovec iov[1];
+  iov[0].iov_base = buffer;
+  iov[0].iov_len = kReadvTestDataSize;
+
+  ASSERT_THAT(readv(test_file_fd_.get(), iov, 1),
+              SyscallSucceedsWithValue(kReadvTestDataSize));
+  ASSERT_THAT(lseek(test_file_fd_.get(), 0, SEEK_CUR),
+              SyscallSucceedsWithValue(kReadvTestDataSize));
+
+  free(buffer);
+}
+
+TEST_F(ReadvTest, EndOfFile) {
+  char* buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+  struct iovec iov[1];
+  iov[0].iov_base = buffer;
+  iov[0].iov_len = kReadvTestDataSize;
+  ASSERT_THAT(readv(test_file_fd_.get(), iov, 1),
+              SyscallSucceedsWithValue(kReadvTestDataSize));
+  free(buffer);
+
+  buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+  iov[0].iov_base = buffer;
+  iov[0].iov_len = kReadvTestDataSize;
+  ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), SyscallSucceedsWithValue(0));
+  free(buffer);
+}
+
+TEST_F(ReadvTest, WouldBlock_Pipe) {
+  struct iovec iov[1];
+  iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+  iov[0].iov_len = kReadvTestDataSize;
+  ASSERT_THAT(readv(test_pipe_[0], iov, 1),
+              SyscallSucceedsWithValue(kReadvTestDataSize));
+  free(iov[0].iov_base);
+
+  iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+  ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallFailsWithErrno(EAGAIN));
+  free(iov[0].iov_base);
+}
+
+TEST_F(ReadvTest, ZeroBuffer) {
+  char buf[10];
+  struct iovec iov[1];
+  iov[0].iov_base = buf;
+  iov[0].iov_len = 0;
+  ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ReadvTest, NullIovecInNonemptyArray) {
+  std::vector<char> buf(kReadvTestDataSize);
+  struct iovec iov[2];
+  iov[0].iov_base = nullptr;
+  iov[0].iov_len = 0;
+  iov[1].iov_base = buf.data();
+  iov[1].iov_len = kReadvTestDataSize;
+  ASSERT_THAT(readv(test_file_fd_.get(), iov, 2),
+              SyscallSucceedsWithValue(kReadvTestDataSize));
+}
+
+TEST_F(ReadvTest, IovecOutsideTaskAddressRangeInNonemptyArray) {
+  std::vector<char> buf(kReadvTestDataSize);
+  struct iovec iov[2];
+  iov[0].iov_base = reinterpret_cast<void*>(~static_cast<uintptr_t>(0));
+  iov[0].iov_len = 0;
+  iov[1].iov_base = buf.data();
+  iov[1].iov_len = kReadvTestDataSize;
+  ASSERT_THAT(readv(test_file_fd_.get(), iov, 2),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+// This test depends on the maximum extent of a single readv() syscall, so
+// we can't tolerate interruption from saving.
+TEST(ReadvTestNoFixture, TruncatedAtMax_NoRandomSave) {
+  // Ensure that we won't be interrupted by ITIMER_PROF. This is particularly
+  // important in environments where automated profiling tools may start
+  // ITIMER_PROF automatically.
+  struct itimerval itv = {};
+  auto const cleanup_itimer =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_PROF, itv));
+
+  // From Linux's include/linux/fs.h.
+  size_t const MAX_RW_COUNT = INT_MAX & ~(kPageSize - 1);
+
+  // Create an iovec array with 3 segments pointing to consecutive parts of a
+  // buffer. The first covers all but the last three pages, and should be
+  // written to in its entirety. The second covers the last page before
+  // MAX_RW_COUNT and the first page after; only the first page should be
+  // written to. The third covers the last page of the buffer, and should be
+  // skipped entirely.
+  size_t const kBufferSize = MAX_RW_COUNT + 2 * kPageSize;
+  size_t const kFirstOffset = MAX_RW_COUNT - kPageSize;
+  size_t const kSecondOffset = MAX_RW_COUNT + kPageSize;
+  // The buffer is too big to fit on the stack.
+  std::vector<char> buf(kBufferSize);
+  struct iovec iov[3];
+  iov[0].iov_base = buf.data();
+  iov[0].iov_len = kFirstOffset;
+  iov[1].iov_base = buf.data() + kFirstOffset;
+  iov[1].iov_len = kSecondOffset - kFirstOffset;
+  iov[2].iov_base = buf.data() + kSecondOffset;
+  iov[2].iov_len = kBufferSize - kSecondOffset;
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+  EXPECT_THAT(readv(fd.get(), iov, 3), SyscallSucceedsWithValue(MAX_RW_COUNT));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/readv_common.cc b/test/syscalls/linux/readv_common.cc
new file mode 100644
index 000000000..2694dc64f
--- /dev/null
+++ b/test/syscalls/linux/readv_common.cc
@@ -0,0 +1,220 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// MatchesStringLength checks that a tuple argument of (struct iovec *, int)
+// corresponding to an iovec array and its length, contains data that matches
+// the string length strlen.
+MATCHER_P(MatchesStringLength, strlen, "") {
+  struct iovec* iovs = arg.first;
+  int niov = arg.second;
+  int offset = 0;
+  for (int i = 0; i < niov; i++) {
+    offset += iovs[i].iov_len;
+  }
+  if (offset != static_cast<int>(strlen)) {
+    *result_listener << offset;
+    return false;
+  }
+  return true;
+}
+
+// MatchesStringValue checks that a tuple argument of (struct iovec *, int)
+// corresponding to an iovec array and its length, contains data that matches
+// the string value str.
+MATCHER_P(MatchesStringValue, str, "") {
+  struct iovec* iovs = arg.first;
+  int len = strlen(str);
+  int niov = arg.second;
+  int offset = 0;
+  for (int i = 0; i < niov; i++) {
+    struct iovec iov = iovs[i];
+    if (len < offset) {
+      *result_listener << "strlen " << len << " < offset " << offset;
+      return false;
+    }
+    if (strncmp(static_cast<char*>(iov.iov_base), &str[offset], iov.iov_len)) {
+      absl::string_view iovec_string(static_cast<char*>(iov.iov_base),
+                                     iov.iov_len);
+      *result_listener << iovec_string << " @offset " << offset;
+      return false;
+    }
+    offset += iov.iov_len;
+  }
+  return true;
+}
+
+extern const char kReadvTestData[] =
+    "127.0.0.1      localhost"
+    ""
+    "# The following lines are desirable for IPv6 capable hosts"
+    "::1     ip6-localhost ip6-loopback"
+    "fe00::0 ip6-localnet"
+    "ff00::0 ip6-mcastprefix"
+    "ff02::1 ip6-allnodes"
+    "ff02::2 ip6-allrouters"
+    "ff02::3 ip6-allhosts"
+    "192.168.1.100 a"
+    "93.184.216.34          foo.bar.example.com xcpu";
+extern const size_t kReadvTestDataSize = sizeof(kReadvTestData);
+
+static void ReadAllOneProvidedBuffer(int fd, std::vector<char>* buffer) {
+  struct iovec iovs[1];
+  iovs[0].iov_base = buffer->data();
+  iovs[0].iov_len = kReadvTestDataSize;
+
+  ASSERT_THAT(readv(fd, iovs, 1), SyscallSucceedsWithValue(kReadvTestDataSize));
+
+  std::pair<struct iovec*, int> iovec_desc(iovs, 1);
+  EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize));
+  EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+}
+
+void ReadAllOneBuffer(int fd) {
+  std::vector<char> buffer(kReadvTestDataSize);
+  ReadAllOneProvidedBuffer(fd, &buffer);
+}
+
+void ReadAllOneLargeBuffer(int fd) {
+  std::vector<char> buffer(10 * kReadvTestDataSize);
+  ReadAllOneProvidedBuffer(fd, &buffer);
+}
+
+void ReadOneHalfAtATime(int fd) {
+  int len0 = kReadvTestDataSize / 2;
+  int len1 = kReadvTestDataSize - len0;
+  std::vector<char> buffer0(len0);
+  std::vector<char> buffer1(len1);
+
+  struct iovec iovs[2];
+  iovs[0].iov_base = buffer0.data();
+  iovs[0].iov_len = len0;
+  iovs[1].iov_base = buffer1.data();
+  iovs[1].iov_len = len1;
+
+  ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(kReadvTestDataSize));
+
+  std::pair<struct iovec*, int> iovec_desc(iovs, 2);
+  EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize));
+  EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+}
+
+void ReadOneBufferPerByte(int fd) {
+  std::vector<char> buffer(kReadvTestDataSize);
+  std::vector<struct iovec> iovs(kReadvTestDataSize);
+  char* buffer_ptr = buffer.data();
+  struct iovec* iovs_ptr = iovs.data();
+
+  for (int i = 0; i < static_cast<int>(kReadvTestDataSize); i++) {
+    struct iovec iov = {
+        .iov_base = &buffer_ptr[i],
+        .iov_len = 1,
+    };
+    iovs_ptr[i] = iov;
+  }
+
+  ASSERT_THAT(readv(fd, iovs_ptr, kReadvTestDataSize),
+              SyscallSucceedsWithValue(kReadvTestDataSize));
+
+  std::pair<struct iovec*, int> iovec_desc(iovs.data(), kReadvTestDataSize);
+  EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize));
+  EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+}
+
+void ReadBuffersOverlapping(int fd) {
+  // overlap the first overlap_bytes.
+  int overlap_bytes = 8;
+  std::vector<char> buffer(kReadvTestDataSize);
+
+  // overlapping causes us to get more data.
+  int expected_size = kReadvTestDataSize + overlap_bytes;
+  std::vector<char> expected(expected_size);
+  char* expected_ptr = expected.data();
+  memcpy(expected_ptr, &kReadvTestData[overlap_bytes], overlap_bytes);
+  memcpy(&expected_ptr[overlap_bytes], &kReadvTestData[overlap_bytes],
+         kReadvTestDataSize - overlap_bytes);
+
+  struct iovec iovs[2];
+  iovs[0].iov_base = buffer.data();
+  iovs[0].iov_len = overlap_bytes;
+  iovs[1].iov_base = buffer.data();
+  iovs[1].iov_len = kReadvTestDataSize;
+
+  ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(kReadvTestDataSize));
+
+  std::pair<struct iovec*, int> iovec_desc(iovs, 2);
+  EXPECT_THAT(iovec_desc, MatchesStringLength(expected_size));
+  EXPECT_THAT(iovec_desc, MatchesStringValue(expected_ptr));
+}
+
+void ReadBuffersDiscontinuous(int fd) {
+  // Each iov is 1 byte separated by 1 byte.
+  std::vector<char> buffer(kReadvTestDataSize * 2);
+  std::vector<struct iovec> iovs(kReadvTestDataSize);
+
+  char* buffer_ptr = buffer.data();
+  struct iovec* iovs_ptr = iovs.data();
+
+  for (int i = 0; i < static_cast<int>(kReadvTestDataSize); i++) {
+    struct iovec iov = {
+        .iov_base = &buffer_ptr[i * 2],
+        .iov_len = 1,
+    };
+    iovs_ptr[i] = iov;
+  }
+
+  ASSERT_THAT(readv(fd, iovs_ptr, kReadvTestDataSize),
+              SyscallSucceedsWithValue(kReadvTestDataSize));
+
+  std::pair<struct iovec*, int> iovec_desc(iovs.data(), kReadvTestDataSize);
+  EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize));
+  EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+}
+
+void ReadIovecsCompletelyFilled(int fd) {
+  int half = kReadvTestDataSize / 2;
+  std::vector<char> buffer(kReadvTestDataSize);
+  char* buffer_ptr = buffer.data();
+  memset(buffer.data(), '\0', kReadvTestDataSize);
+
+  struct iovec iovs[2];
+  iovs[0].iov_base = buffer.data();
+  iovs[0].iov_len = half;
+  iovs[1].iov_base = &buffer_ptr[half];
+  iovs[1].iov_len = half;
+
+  ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(half * 2));
+
+  std::pair<struct iovec*, int> iovec_desc(iovs, 2);
+  EXPECT_THAT(iovec_desc, MatchesStringLength(half * 2));
+  EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+
+  char* str = static_cast<char*>(iovs[0].iov_base);
+  str[iovs[0].iov_len - 1] = '\0';
+  ASSERT_EQ(half - 1, strlen(str));
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/readv_common.h b/test/syscalls/linux/readv_common.h
new file mode 100644
index 000000000..2fa40c35f
--- /dev/null
+++ b/test/syscalls/linux/readv_common.h
@@ -0,0 +1,61 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_READV_COMMON_H_
+#define GVISOR_TEST_SYSCALLS_READV_COMMON_H_
+
+#include <stddef.h>
+
+namespace gvisor {
+namespace testing {
+
+// A NUL-terminated string containing the data used by tests using the following
+// test helpers.
+extern const char kReadvTestData[];
+
+// The size of kReadvTestData, including the terminating NUL.
+extern const size_t kReadvTestDataSize;
+
+// ReadAllOneBuffer asserts that it can read kReadvTestData from an fd using
+// exactly one iovec.
+void ReadAllOneBuffer(int fd);
+
+// ReadAllOneLargeBuffer asserts that it can read kReadvTestData from an fd
+// using exactly one iovec containing an overly large buffer.
+void ReadAllOneLargeBuffer(int fd);
+
+// ReadOneHalfAtATime asserts that it can read test_data_from an fd using
+// exactly two iovecs that are roughly equivalent in size.
+void ReadOneHalfAtATime(int fd);
+
+// ReadOneBufferPerByte asserts that it can read kReadvTestData from an fd
+// using one iovec per byte.
+void ReadOneBufferPerByte(int fd);
+
+// ReadBuffersOverlapping asserts that it can read kReadvTestData from an fd
+// where two iovecs are overlapping.
+void ReadBuffersOverlapping(int fd);
+
+// ReadBuffersDiscontinuous asserts that it can read kReadvTestData from an fd
+// where each iovec is discontinuous from the next by 1 byte.
+void ReadBuffersDiscontinuous(int fd);
+
+// ReadIovecsCompletelyFilled asserts that the previous iovec is completely
+// filled before moving onto the next.
+void ReadIovecsCompletelyFilled(int fd);
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_READV_COMMON_H_
diff --git a/test/syscalls/linux/readv_socket.cc b/test/syscalls/linux/readv_socket.cc
new file mode 100644
index 000000000..dd6fb7008
--- /dev/null
+++ b/test/syscalls/linux/readv_socket.cc
@@ -0,0 +1,212 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/readv_common.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReadvSocketTest : public ::testing::Test {
+ public:
+  void SetUp() override {
+    test_unix_stream_socket_[0] = -1;
+    test_unix_stream_socket_[1] = -1;
+    test_unix_dgram_socket_[0] = -1;
+    test_unix_dgram_socket_[1] = -1;
+    test_unix_seqpacket_socket_[0] = -1;
+    test_unix_seqpacket_socket_[1] = -1;
+
+    ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, test_unix_stream_socket_),
+                SyscallSucceeds());
+    ASSERT_THAT(fcntl(test_unix_stream_socket_[0], F_SETFL, O_NONBLOCK),
+                SyscallSucceeds());
+    ASSERT_THAT(socketpair(AF_UNIX, SOCK_DGRAM, 0, test_unix_dgram_socket_),
+                SyscallSucceeds());
+    ASSERT_THAT(fcntl(test_unix_dgram_socket_[0], F_SETFL, O_NONBLOCK),
+                SyscallSucceeds());
+    ASSERT_THAT(
+        socketpair(AF_UNIX, SOCK_SEQPACKET, 0, test_unix_seqpacket_socket_),
+        SyscallSucceeds());
+    ASSERT_THAT(fcntl(test_unix_seqpacket_socket_[0], F_SETFL, O_NONBLOCK),
+                SyscallSucceeds());
+
+    ASSERT_THAT(
+        write(test_unix_stream_socket_[1], kReadvTestData, kReadvTestDataSize),
+        SyscallSucceedsWithValue(kReadvTestDataSize));
+    ASSERT_THAT(
+        write(test_unix_dgram_socket_[1], kReadvTestData, kReadvTestDataSize),
+        SyscallSucceedsWithValue(kReadvTestDataSize));
+    ASSERT_THAT(write(test_unix_seqpacket_socket_[1], kReadvTestData,
+                      kReadvTestDataSize),
+                SyscallSucceedsWithValue(kReadvTestDataSize));
+  }
+
+  void TearDown() override {
+    close(test_unix_stream_socket_[0]);
+    close(test_unix_stream_socket_[1]);
+
+    close(test_unix_dgram_socket_[0]);
+    close(test_unix_dgram_socket_[1]);
+
+    close(test_unix_seqpacket_socket_[0]);
+    close(test_unix_seqpacket_socket_[1]);
+  }
+
+  int test_unix_stream_socket_[2];
+  int test_unix_dgram_socket_[2];
+  int test_unix_seqpacket_socket_[2];
+};
+
+TEST_F(ReadvSocketTest, ReadOneBufferPerByte_StreamSocket) {
+  ReadOneBufferPerByte(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadOneBufferPerByte_DgramSocket) {
+  ReadOneBufferPerByte(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadOneBufferPerByte_SeqPacketSocket) {
+  ReadOneBufferPerByte(test_unix_seqpacket_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadOneHalfAtATime_StreamSocket) {
+  ReadOneHalfAtATime(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadOneHalfAtATime_DgramSocket) {
+  ReadOneHalfAtATime(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadAllOneBuffer_StreamSocket) {
+  ReadAllOneBuffer(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadAllOneBuffer_DgramSocket) {
+  ReadAllOneBuffer(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadAllOneLargeBuffer_StreamSocket) {
+  ReadAllOneLargeBuffer(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadAllOneLargeBuffer_DgramSocket) {
+  ReadAllOneLargeBuffer(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadBuffersOverlapping_StreamSocket) {
+  ReadBuffersOverlapping(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadBuffersOverlapping_DgramSocket) {
+  ReadBuffersOverlapping(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadBuffersDiscontinuous_StreamSocket) {
+  ReadBuffersDiscontinuous(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadBuffersDiscontinuous_DgramSocket) {
+  ReadBuffersDiscontinuous(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadIovecsCompletelyFilled_StreamSocket) {
+  ReadIovecsCompletelyFilled(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadIovecsCompletelyFilled_DgramSocket) {
+  ReadIovecsCompletelyFilled(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, BadIovecsPointer_StreamSocket) {
+  ASSERT_THAT(readv(test_unix_stream_socket_[0], nullptr, 1),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvSocketTest, BadIovecsPointer_DgramSocket) {
+  ASSERT_THAT(readv(test_unix_dgram_socket_[0], nullptr, 1),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvSocketTest, BadIovecBase_StreamSocket) {
+  struct iovec iov[1];
+  iov[0].iov_base = nullptr;
+  iov[0].iov_len = 1024;
+  ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvSocketTest, BadIovecBase_DgramSocket) {
+  struct iovec iov[1];
+  iov[0].iov_base = nullptr;
+  iov[0].iov_len = 1024;
+  ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvSocketTest, ZeroIovecs_StreamSocket) {
+  struct iovec iov[1];
+  iov[0].iov_base = 0;
+  iov[0].iov_len = 0;
+  ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), SyscallSucceeds());
+}
+
+TEST_F(ReadvSocketTest, ZeroIovecs_DgramSocket) {
+  struct iovec iov[1];
+  iov[0].iov_base = 0;
+  iov[0].iov_len = 0;
+  ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), SyscallSucceeds());
+}
+
+TEST_F(ReadvSocketTest, WouldBlock_StreamSocket) {
+  struct iovec iov[1];
+  iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+  iov[0].iov_len = kReadvTestDataSize;
+  ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1),
+              SyscallSucceedsWithValue(kReadvTestDataSize));
+  free(iov[0].iov_base);
+
+  iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+  ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1),
+              SyscallFailsWithErrno(EAGAIN));
+  free(iov[0].iov_base);
+}
+
+TEST_F(ReadvSocketTest, WouldBlock_DgramSocket) {
+  struct iovec iov[1];
+  iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+  iov[0].iov_len = kReadvTestDataSize;
+  ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1),
+              SyscallSucceedsWithValue(kReadvTestDataSize));
+  free(iov[0].iov_base);
+
+  iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+  ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1),
+              SyscallFailsWithErrno(EAGAIN));
+  free(iov[0].iov_base);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/rename.cc b/test/syscalls/linux/rename.cc
new file mode 100644
index 000000000..833c0dc4f
--- /dev/null
+++ b/test/syscalls/linux/rename.cc
@@ -0,0 +1,394 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdio.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(RenameTest, RootToAnything) {
+  ASSERT_THAT(rename("/", "/bin"), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(RenameTest, AnythingToRoot) {
+  ASSERT_THAT(rename("/bin", "/"), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(RenameTest, SourceIsAncestorOfTarget) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto subdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+  ASSERT_THAT(rename(dir.path().c_str(), subdir.path().c_str()),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Try an even deeper directory.
+  auto deep_subdir =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(subdir.path()));
+  ASSERT_THAT(rename(dir.path().c_str(), deep_subdir.path().c_str()),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(RenameTest, TargetIsAncestorOfSource) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto subdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+  ASSERT_THAT(rename(subdir.path().c_str(), dir.path().c_str()),
+              SyscallFailsWithErrno(ENOTEMPTY));
+
+  // Try an even deeper directory.
+  auto deep_subdir =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(subdir.path()));
+  ASSERT_THAT(rename(deep_subdir.path().c_str(), dir.path().c_str()),
+              SyscallFailsWithErrno(ENOTEMPTY));
+}
+
+TEST(RenameTest, FileToSelf) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  EXPECT_THAT(rename(f.path().c_str(), f.path().c_str()), SyscallSucceeds());
+}
+
+TEST(RenameTest, DirectoryToSelf) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(rename(f.path().c_str(), f.path().c_str()), SyscallSucceeds());
+}
+
+TEST(RenameTest, FileToSameDirectory) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  std::string const newpath = NewTempAbsPath();
+  ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds());
+  std::string const oldpath = f.release();
+  f.reset(newpath);
+  EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+  EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, DirectoryToSameDirectory) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  std::string const newpath = NewTempAbsPath();
+  ASSERT_THAT(rename(dir.path().c_str(), newpath.c_str()), SyscallSucceeds());
+  std::string const oldpath = dir.release();
+  dir.reset(newpath);
+  EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+  EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, FileToParentDirectory) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path()));
+  std::string const newpath = NewTempAbsPathInDir(dir1.path());
+  ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds());
+  std::string const oldpath = f.release();
+  f.reset(newpath);
+  EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+  EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, DirectoryToParentDirectory) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+  auto dir3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir2.path()));
+  EXPECT_THAT(IsDirectory(dir3.path()), IsPosixErrorOkAndHolds(true));
+  std::string const newpath = NewTempAbsPathInDir(dir1.path());
+  ASSERT_THAT(rename(dir3.path().c_str(), newpath.c_str()), SyscallSucceeds());
+  std::string const oldpath = dir3.release();
+  dir3.reset(newpath);
+  EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+  EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+  EXPECT_THAT(IsDirectory(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, FileToChildDirectory) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+  std::string const newpath = NewTempAbsPathInDir(dir2.path());
+  ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds());
+  std::string const oldpath = f.release();
+  f.reset(newpath);
+  EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+  EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, DirectoryToChildDirectory) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+  auto dir3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+  std::string const newpath = NewTempAbsPathInDir(dir2.path());
+  ASSERT_THAT(rename(dir3.path().c_str(), newpath.c_str()), SyscallSucceeds());
+  std::string const oldpath = dir3.release();
+  dir3.reset(newpath);
+  EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+  EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+  EXPECT_THAT(IsDirectory(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, DirectoryToOwnChildDirectory) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+  std::string const newpath = NewTempAbsPathInDir(dir2.path());
+  ASSERT_THAT(rename(dir1.path().c_str(), newpath.c_str()),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(RenameTest, FileOverwritesFile) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      dir.path(), "first", TempPath::kDefaultFileMode));
+  auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      dir.path(), "second", TempPath::kDefaultFileMode));
+  ASSERT_THAT(rename(f1.path().c_str(), f2.path().c_str()), SyscallSucceeds());
+  EXPECT_THAT(Exists(f1.path()), IsPosixErrorOkAndHolds(false));
+
+  f1.release();
+  std::string f2_contents;
+  ASSERT_NO_ERRNO(GetContents(f2.path(), &f2_contents));
+  EXPECT_EQ("first", f2_contents);
+}
+
+TEST(RenameTest, DirectoryOverwritesDirectoryLinkCount) {
+  auto parent1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(Links(parent1.path()), IsPosixErrorOkAndHolds(2));
+
+  auto parent2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(Links(parent2.path()), IsPosixErrorOkAndHolds(2));
+
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent1.path()));
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent2.path()));
+
+  EXPECT_THAT(Links(parent1.path()), IsPosixErrorOkAndHolds(3));
+  EXPECT_THAT(Links(parent2.path()), IsPosixErrorOkAndHolds(3));
+
+  ASSERT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()),
+              SyscallSucceeds());
+
+  EXPECT_THAT(Links(parent1.path()), IsPosixErrorOkAndHolds(2));
+  EXPECT_THAT(Links(parent2.path()), IsPosixErrorOkAndHolds(3));
+}
+
+TEST(RenameTest, FileDoesNotExist) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string source = JoinPath(dir.path(), "source");
+  const std::string dest = JoinPath(dir.path(), "dest");
+  ASSERT_THAT(rename(source.c_str(), dest.c_str()),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(RenameTest, FileDoesNotOverwriteDirectory) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  ASSERT_THAT(rename(f.path().c_str(), dir.path().c_str()),
+              SyscallFailsWithErrno(EISDIR));
+}
+
+TEST(RenameTest, DirectoryDoesNotOverwriteFile) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  ASSERT_THAT(rename(dir.path().c_str(), f.path().c_str()),
+              SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(RenameTest, DirectoryOverwritesEmptyDirectory) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()),
+              SyscallSucceeds());
+  EXPECT_THAT(Exists(dir1.path()), IsPosixErrorOkAndHolds(false));
+  dir1.release();
+  EXPECT_THAT(Exists(JoinPath(dir2.path(), Basename(f.path()))),
+              IsPosixErrorOkAndHolds(true));
+  f.release();
+}
+
+TEST(RenameTest, FailsWithDots) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto dir1_dot = absl::StrCat(dir1.path(), "/.");
+  auto dir2_dot = absl::StrCat(dir2.path(), "/.");
+  auto dir1_dot_dot = absl::StrCat(dir1.path(), "/..");
+  auto dir2_dot_dot = absl::StrCat(dir2.path(), "/..");
+
+  // Try with dot paths in the first argument
+  EXPECT_THAT(rename(dir1_dot.c_str(), dir2.path().c_str()),
+              SyscallFailsWithErrno(EBUSY));
+  EXPECT_THAT(rename(dir1_dot_dot.c_str(), dir2.path().c_str()),
+              SyscallFailsWithErrno(EBUSY));
+
+  // Try with dot paths in the second argument
+  EXPECT_THAT(rename(dir1.path().c_str(), dir2_dot.c_str()),
+              SyscallFailsWithErrno(EBUSY));
+  EXPECT_THAT(rename(dir1.path().c_str(), dir2_dot_dot.c_str()),
+              SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(RenameTest, DirectoryDoesNotOverwriteNonemptyDirectory) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path()));
+  ASSERT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()),
+              SyscallFailsWithErrno(ENOTEMPTY));
+}
+
+TEST(RenameTest, FailsWhenOldParentNotWritable) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  // dir1 is not writable.
+  ASSERT_THAT(chmod(dir1.path().c_str(), 0555), SyscallSucceeds());
+
+  std::string const newpath = NewTempAbsPathInDir(dir2.path());
+  EXPECT_THAT(rename(f1.path().c_str(), newpath.c_str()),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(RenameTest, FailsWhenNewParentNotWritable) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+  // dir2 is not writable.
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555));
+
+  std::string const newpath = NewTempAbsPathInDir(dir2.path());
+  EXPECT_THAT(rename(f1.path().c_str(), newpath.c_str()),
+              SyscallFailsWithErrno(EACCES));
+}
+
+// Equivalent to FailsWhenNewParentNotWritable, but with a destination file
+// to overwrite.
+TEST(RenameTest, OverwriteFailsWhenNewParentNotWritable) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+
+  // dir2 is not writable.
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path()));
+  ASSERT_THAT(chmod(dir2.path().c_str(), 0555), SyscallSucceeds());
+
+  EXPECT_THAT(rename(f1.path().c_str(), f2.path().c_str()),
+              SyscallFailsWithErrno(EACCES));
+}
+
+// If the parent directory of source is not accessible, rename returns EACCES
+// because the user cannot determine if source exists.
+TEST(RenameTest, FileDoesNotExistWhenNewParentNotExecutable) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  // No execute permission.
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0400));
+
+  const std::string source = JoinPath(dir.path(), "source");
+  const std::string dest = JoinPath(dir.path(), "dest");
+  ASSERT_THAT(rename(source.c_str(), dest.c_str()),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(RenameTest, DirectoryWithOpenFdOverwritesEmptyDirectory) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  // Get an fd on dir1
+  int fd;
+  ASSERT_THAT(fd = open(dir1.path().c_str(), O_DIRECTORY), SyscallSucceeds());
+  auto close_f = Cleanup([fd] {
+    // Close the fd on f.
+    EXPECT_THAT(close(fd), SyscallSucceeds());
+  });
+
+  EXPECT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()),
+              SyscallSucceeds());
+
+  const std::string new_f_path = JoinPath(dir2.path(), Basename(f.path()));
+
+  auto remove_f = Cleanup([&] {
+    // Delete f in its new location.
+    ASSERT_NO_ERRNO(Delete(new_f_path));
+    f.release();
+  });
+
+  EXPECT_THAT(Exists(dir1.path()), IsPosixErrorOkAndHolds(false));
+  dir1.release();
+  EXPECT_THAT(Exists(new_f_path), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, FileWithOpenFd) {
+  TempPath root_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath dir1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path()));
+  TempPath dir2 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path()));
+  TempPath dir3 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path()));
+
+  // Create file in dir1.
+  constexpr char kContents[] = "foo";
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      dir1.path(), kContents, TempPath::kDefaultFileMode));
+
+  // Get fd on file.
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+
+  // Move f to dir2.
+  const std::string path2 = NewTempAbsPathInDir(dir2.path());
+  ASSERT_THAT(rename(f.path().c_str(), path2.c_str()), SyscallSucceeds());
+
+  // Read f's kContents.
+  char buf[sizeof(kContents)];
+  EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(kContents), 0),
+              SyscallSucceedsWithValue(sizeof(kContents) - 1));
+  EXPECT_EQ(absl::string_view(buf, sizeof(buf) - 1), kContents);
+
+  // Move f to dir3.
+  const std::string path3 = NewTempAbsPathInDir(dir3.path());
+  ASSERT_THAT(rename(path2.c_str(), path3.c_str()), SyscallSucceeds());
+
+  // Read f's kContents.
+  EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(kContents), 0),
+              SyscallSucceedsWithValue(sizeof(kContents) - 1));
+  EXPECT_EQ(absl::string_view(buf, sizeof(buf) - 1), kContents);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/rlimits.cc b/test/syscalls/linux/rlimits.cc
new file mode 100644
index 000000000..860f0f688
--- /dev/null
+++ b/test/syscalls/linux/rlimits.cc
@@ -0,0 +1,75 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/resource.h>
+#include <sys/time.h>
+
+#include "test/util/capability_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(RlimitTest, SetRlimitHigher) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE)));
+
+  struct rlimit rl = {};
+  EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+
+  // Lower the rlimit first, as it may be equal to /proc/sys/fs/nr_open, in
+  // which case even users with CAP_SYS_RESOURCE can't raise it.
+  rl.rlim_cur--;
+  rl.rlim_max--;
+  ASSERT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+
+  rl.rlim_max++;
+  EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+}
+
+TEST(RlimitTest, UnprivilegedSetRlimit) {
+  // Drop privileges if necessary.
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))) {
+    EXPECT_NO_ERRNO(SetCapability(CAP_SYS_RESOURCE, false));
+  }
+
+  struct rlimit rl = {};
+  rl.rlim_cur = 1000;
+  rl.rlim_max = 20000;
+  EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+
+  struct rlimit rl2 = {};
+  EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl2), SyscallSucceeds());
+  EXPECT_EQ(rl.rlim_cur, rl2.rlim_cur);
+  EXPECT_EQ(rl.rlim_max, rl2.rlim_max);
+
+  rl.rlim_max = 100000;
+  EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallFailsWithErrno(EPERM));
+}
+
+TEST(RlimitTest, SetSoftRlimitAboveHard) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE)));
+
+  struct rlimit rl = {};
+  EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+
+  rl.rlim_cur = rl.rlim_max + 1;
+  EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/rseq.cc b/test/syscalls/linux/rseq.cc
new file mode 100644
index 000000000..4bfb1ff56
--- /dev/null
+++ b/test/syscalls/linux/rseq.cc
@@ -0,0 +1,198 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/rseq/test.h"
+#include "test/syscalls/linux/rseq/uapi.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Syscall test for rseq (restartable sequences).
+//
+// We must be very careful about how these tests are written. Each thread may
+// only have one struct rseq registration, which may be done automatically at
+// thread start (as of 2019-11-13, glibc does *not* support rseq and thus does
+// not do so, but other libraries do).
+//
+// Testing of rseq is thus done primarily in a child process with no
+// registration. This means exec'ing a nostdlib binary, as rseq registration can
+// only be cleared by execve (or knowing the old rseq address), and glibc (based
+// on the current unmerged patches) register rseq before calling main()).
+
+int RSeq(struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) {
+  return syscall(kRseqSyscall, rseq, rseq_len, flags, sig);
+}
+
+// Returns true if this kernel supports the rseq syscall.
+PosixErrorOr<bool> RSeqSupported() {
+  // We have to be careful here, there are three possible cases:
+  //
+  // 1. rseq is not supported -> ENOSYS
+  // 2. rseq is supported and not registered -> success, but we should
+  //    unregister.
+  // 3. rseq is supported and registered -> EINVAL (most likely).
+
+  // The only validation done on new registrations is that rseq is aligned and
+  // writable.
+  rseq rseq = {};
+  int ret = RSeq(&rseq, sizeof(rseq), 0, 0);
+  if (ret == 0) {
+    // Successfully registered, rseq is supported. Unregister.
+    ret = RSeq(&rseq, sizeof(rseq), kRseqFlagUnregister, 0);
+    if (ret != 0) {
+      return PosixError(errno);
+    }
+    return true;
+  }
+
+  switch (errno) {
+    case ENOSYS:
+      // Not supported.
+      return false;
+    case EINVAL:
+      // Supported, but already registered. EINVAL returned because we provided
+      // a different address.
+      return true;
+    default:
+      // Unknown error.
+      return PosixError(errno);
+  }
+}
+
+constexpr char kRseqBinary[] = "test/syscalls/linux/rseq/rseq";
+
+void RunChildTest(std::string test_case, int want_status) {
+  std::string path = RunfilePath(kRseqBinary);
+
+  pid_t child_pid = -1;
+  int execve_errno = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(path, {path, test_case}, {}, &child_pid, &execve_errno));
+
+  ASSERT_GT(child_pid, 0);
+  ASSERT_EQ(execve_errno, 0);
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  ASSERT_EQ(status, want_status);
+}
+
+// Test that rseq must be aligned.
+TEST(RseqTest, Unaligned) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestUnaligned, 0);
+}
+
+// Sanity test that registration works.
+TEST(RseqTest, Register) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestRegister, 0);
+}
+
+// Registration can't be done twice.
+TEST(RseqTest, DoubleRegister) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestDoubleRegister, 0);
+}
+
+// Registration can be done again after unregister.
+TEST(RseqTest, RegisterUnregister) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestRegisterUnregister, 0);
+}
+
+// The pointer to rseq must match on register/unregister.
+TEST(RseqTest, UnregisterDifferentPtr) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestUnregisterDifferentPtr, 0);
+}
+
+// The signature must match on register/unregister.
+TEST(RseqTest, UnregisterDifferentSignature) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestUnregisterDifferentSignature, 0);
+}
+
+// The CPU ID is initialized.
+TEST(RseqTest, CPU) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestCPU, 0);
+}
+
+// Critical section is eventually aborted.
+TEST(RseqTest, Abort) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestAbort, 0);
+}
+
+// Abort may be before the critical section.
+TEST(RseqTest, AbortBefore) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestAbortBefore, 0);
+}
+
+// Signature must match.
+TEST(RseqTest, AbortSignature) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestAbortSignature, SIGSEGV);
+}
+
+// Abort must not be in the critical section.
+TEST(RseqTest, AbortPreCommit) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestAbortPreCommit, SIGSEGV);
+}
+
+// rseq.rseq_cs is cleared on abort.
+TEST(RseqTest, AbortClearsCS) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestAbortClearsCS, 0);
+}
+
+// rseq.rseq_cs is cleared on abort outside of critical section.
+TEST(RseqTest, InvalidAbortClearsCS) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported()));
+
+  RunChildTest(kRseqTestInvalidAbortClearsCS, 0);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/rseq/BUILD b/test/syscalls/linux/rseq/BUILD
new file mode 100644
index 000000000..853258b04
--- /dev/null
+++ b/test/syscalls/linux/rseq/BUILD
@@ -0,0 +1,61 @@
+# This package contains a standalone rseq test binary. This binary must not
+# depend on libc, which might use rseq itself.
+
+load("//tools:defs.bzl", "cc_flags_supplier", "cc_library", "cc_toolchain", "select_arch")
+
+package(licenses = ["notice"])
+
+genrule(
+    name = "rseq_binary",
+    srcs = [
+        "critical.h",
+        "critical_amd64.S",
+        "critical_arm64.S",
+        "rseq.cc",
+        "syscalls.h",
+        "start_amd64.S",
+        "start_arm64.S",
+        "test.h",
+        "types.h",
+        "uapi.h",
+    ],
+    outs = ["rseq"],
+    cmd = "$(CC) " +
+          "$(CC_FLAGS) " +
+          "-I. " +
+          "-Wall " +
+          "-Werror " +
+          "-O2 " +
+          "-std=c++17 " +
+          "-static " +
+          "-nostdlib " +
+          "-ffreestanding " +
+          "-o " +
+          "$(location rseq) " +
+          select_arch(
+              amd64 = "$(location critical_amd64.S) $(location start_amd64.S) ",
+              arm64 = "$(location critical_arm64.S) $(location start_arm64.S) ",
+              no_match_error = "unsupported architecture",
+          ) +
+          "$(location rseq.cc)",
+    toolchains = [
+        cc_toolchain,
+        ":no_pie_cc_flags",
+    ],
+    visibility = ["//:sandbox"],
+)
+
+cc_flags_supplier(
+    name = "no_pie_cc_flags",
+    features = ["-pie"],
+)
+
+cc_library(
+    name = "lib",
+    testonly = 1,
+    hdrs = [
+        "test.h",
+        "uapi.h",
+    ],
+    visibility = ["//:sandbox"],
+)
diff --git a/test/syscalls/linux/rseq/critical.h b/test/syscalls/linux/rseq/critical.h
new file mode 100644
index 000000000..ac987a25e
--- /dev/null
+++ b/test/syscalls/linux/rseq/critical.h
@@ -0,0 +1,39 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_CRITICAL_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_CRITICAL_H_
+
+#include "test/syscalls/linux/rseq/types.h"
+#include "test/syscalls/linux/rseq/uapi.h"
+
+constexpr uint32_t kRseqSignature = 0x90909090;
+
+extern "C" {
+
+extern void rseq_loop(struct rseq* r, struct rseq_cs* cs);
+extern void* rseq_loop_early_abort;
+extern void* rseq_loop_start;
+extern void* rseq_loop_pre_commit;
+extern void* rseq_loop_post_commit;
+extern void* rseq_loop_abort;
+
+extern int rseq_getpid(struct rseq* r, struct rseq_cs* cs);
+extern void* rseq_getpid_start;
+extern void* rseq_getpid_post_commit;
+extern void* rseq_getpid_abort;
+
+}  // extern "C"
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_CRITICAL_H_
diff --git a/test/syscalls/linux/rseq/critical_amd64.S b/test/syscalls/linux/rseq/critical_amd64.S
new file mode 100644
index 000000000..8c0687e6d
--- /dev/null
+++ b/test/syscalls/linux/rseq/critical_amd64.S
@@ -0,0 +1,66 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Restartable sequences critical sections.
+
+// Loops continuously until aborted.
+//
+// void rseq_loop(struct rseq* r, struct rseq_cs* cs)
+
+  .text
+  .globl  rseq_loop
+  .type   rseq_loop, @function
+
+rseq_loop:
+  jmp begin
+
+  // Abort block before the critical section.
+  // Abort signature is 4 nops for simplicity.
+  .byte 0x90, 0x90, 0x90, 0x90
+  .globl  rseq_loop_early_abort
+rseq_loop_early_abort:
+  ret
+
+begin:
+  // r->rseq_cs = cs
+  movq %rsi, 8(%rdi)
+
+  // N.B. rseq_cs will be cleared by any preempt, even outside the critical
+  // section. Thus it must be set in or immediately before the critical section
+  // to ensure it is not cleared before the section begins.
+  .globl  rseq_loop_start
+rseq_loop_start:
+  jmp rseq_loop_start
+
+  // "Pre-commit": extra instructions inside the critical section.  These are
+  // used as the abort point in TestAbortPreCommit, which is not valid.
+  .globl  rseq_loop_pre_commit
+rseq_loop_pre_commit:
+  // Extra abort signature + nop for TestAbortPostCommit.
+  .byte 0x90, 0x90, 0x90, 0x90
+  nop
+
+  // "Post-commit": never reached in this case.
+  .globl  rseq_loop_post_commit
+rseq_loop_post_commit:
+
+  // Abort signature is 4 nops for simplicity.
+  .byte 0x90, 0x90, 0x90, 0x90
+
+  .globl  rseq_loop_abort
+rseq_loop_abort:
+  ret
+
+  .size  rseq_loop,.-rseq_loop
+  .section  .note.GNU-stack,"",@progbits
diff --git a/test/syscalls/linux/rseq/critical_arm64.S b/test/syscalls/linux/rseq/critical_arm64.S
new file mode 100644
index 000000000..bfe7e8307
--- /dev/null
+++ b/test/syscalls/linux/rseq/critical_arm64.S
@@ -0,0 +1,66 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Restartable sequences critical sections.
+
+// Loops continuously until aborted.
+//
+// void rseq_loop(struct rseq* r, struct rseq_cs* cs)
+
+  .text
+  .globl  rseq_loop
+  .type   rseq_loop, @function
+
+rseq_loop:
+  b begin
+
+  // Abort block before the critical section.
+  // Abort signature.
+  .byte 0x90, 0x90, 0x90, 0x90
+  .globl  rseq_loop_early_abort
+rseq_loop_early_abort:
+  ret
+
+begin:
+  // r->rseq_cs = cs
+  str x1, [x0, #8]
+
+  // N.B. rseq_cs will be cleared by any preempt, even outside the critical
+  // section. Thus it must be set in or immediately before the critical section
+  // to ensure it is not cleared before the section begins.
+  .globl  rseq_loop_start
+rseq_loop_start:
+  b rseq_loop_start
+
+  // "Pre-commit": extra instructions inside the critical section.  These are
+  // used as the abort point in TestAbortPreCommit, which is not valid.
+  .globl  rseq_loop_pre_commit
+rseq_loop_pre_commit:
+  // Extra abort signature + nop for TestAbortPostCommit.
+  .byte 0x90, 0x90, 0x90, 0x90
+  nop
+
+  // "Post-commit": never reached in this case.
+  .globl  rseq_loop_post_commit
+rseq_loop_post_commit:
+
+  // Abort signature.
+  .byte 0x90, 0x90, 0x90, 0x90
+
+  .globl  rseq_loop_abort
+rseq_loop_abort:
+  ret
+
+  .size  rseq_loop,.-rseq_loop
+  .section  .note.GNU-stack,"",@progbits
diff --git a/test/syscalls/linux/rseq/rseq.cc b/test/syscalls/linux/rseq/rseq.cc
new file mode 100644
index 000000000..f036db26d
--- /dev/null
+++ b/test/syscalls/linux/rseq/rseq.cc
@@ -0,0 +1,366 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/rseq/critical.h"
+#include "test/syscalls/linux/rseq/syscalls.h"
+#include "test/syscalls/linux/rseq/test.h"
+#include "test/syscalls/linux/rseq/types.h"
+#include "test/syscalls/linux/rseq/uapi.h"
+
+namespace gvisor {
+namespace testing {
+
+extern "C" int main(int argc, char** argv, char** envp);
+
+// Standalone initialization before calling main().
+extern "C" void __init(uintptr_t* sp) {
+  int argc = sp[0];
+  char** argv = reinterpret_cast<char**>(&sp[1]);
+  char** envp = &argv[argc + 1];
+
+  // Call main() and exit.
+  sys_exit_group(main(argc, argv, envp));
+
+  // sys_exit_group does not return
+}
+
+int strcmp(const char* s1, const char* s2) {
+  const unsigned char* p1 = reinterpret_cast<const unsigned char*>(s1);
+  const unsigned char* p2 = reinterpret_cast<const unsigned char*>(s2);
+
+  while (*p1 == *p2) {
+    if (!*p1) {
+      return 0;
+    }
+    ++p1;
+    ++p2;
+  }
+  return static_cast<int>(*p1) - static_cast<int>(*p2);
+}
+
+int sys_rseq(struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) {
+  return raw_syscall(kRseqSyscall, rseq, rseq_len, flags, sig);
+}
+
+// Test that rseq must be aligned.
+int TestUnaligned() {
+  constexpr uintptr_t kRequiredAlignment = alignof(rseq);
+
+  char buf[2 * kRequiredAlignment] = {};
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(&buf[0]);
+  if ((ptr & (kRequiredAlignment - 1)) == 0) {
+    // buf is already aligned. Misalign it.
+    ptr++;
+  }
+
+  int ret = sys_rseq(reinterpret_cast<rseq*>(ptr), sizeof(rseq), 0, 0);
+  if (sys_errno(ret) != EINVAL) {
+    return 1;
+  }
+  return 0;
+}
+
+// Sanity test that registration works.
+int TestRegister() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+    return 1;
+  }
+  return 0;
+};
+
+// Registration can't be done twice.
+int TestDoubleRegister() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != EBUSY) {
+    return 1;
+  }
+
+  return 0;
+};
+
+// Registration can be done again after unregister.
+int TestRegisterUnregister() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  if (int ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, 0);
+      sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  return 0;
+};
+
+// The pointer to rseq must match on register/unregister.
+int TestUnregisterDifferentPtr() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  struct rseq r2 = {};
+  if (int ret = sys_rseq(&r2, sizeof(r2), kRseqFlagUnregister, 0);
+      sys_errno(ret) != EINVAL) {
+    return 1;
+  }
+
+  return 0;
+};
+
+// The signature must match on register/unregister.
+int TestUnregisterDifferentSignature() {
+  constexpr int kSignature = 0;
+
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, kSignature); sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  if (int ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, kSignature + 1);
+      sys_errno(ret) != EPERM) {
+    return 1;
+  }
+
+  return 0;
+};
+
+// The CPU ID is initialized.
+int TestCPU() {
+  struct rseq r = {};
+  r.cpu_id = kRseqCPUIDUninitialized;
+
+  if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  if (__atomic_load_n(&r.cpu_id, __ATOMIC_RELAXED) < 0) {
+    return 1;
+  }
+  if (__atomic_load_n(&r.cpu_id_start, __ATOMIC_RELAXED) < 0) {
+    return 1;
+  }
+
+  return 0;
+};
+
+// Critical section is eventually aborted.
+int TestAbort() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
+      sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  struct rseq_cs cs = {};
+  cs.version = 0;
+  cs.flags = 0;
+  cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) -
+                          reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_abort);
+
+  // Loops until abort. If this returns then abort occurred.
+  rseq_loop(&r, &cs);
+
+  return 0;
+};
+
+// Abort may be before the critical section.
+int TestAbortBefore() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
+      sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  struct rseq_cs cs = {};
+  cs.version = 0;
+  cs.flags = 0;
+  cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) -
+                          reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_early_abort);
+
+  // Loops until abort. If this returns then abort occurred.
+  rseq_loop(&r, &cs);
+
+  return 0;
+};
+
+// Signature must match.
+int TestAbortSignature() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1);
+      sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  struct rseq_cs cs = {};
+  cs.version = 0;
+  cs.flags = 0;
+  cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) -
+                          reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_abort);
+
+  // Loops until abort. This should SIGSEGV on abort.
+  rseq_loop(&r, &cs);
+
+  return 1;
+};
+
+// Abort must not be in the critical section.
+int TestAbortPreCommit() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1);
+      sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  struct rseq_cs cs = {};
+  cs.version = 0;
+  cs.flags = 0;
+  cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) -
+                          reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_pre_commit);
+
+  // Loops until abort. This should SIGSEGV on abort.
+  rseq_loop(&r, &cs);
+
+  return 1;
+};
+
+// rseq.rseq_cs is cleared on abort.
+int TestAbortClearsCS() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
+      sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  struct rseq_cs cs = {};
+  cs.version = 0;
+  cs.flags = 0;
+  cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) -
+                          reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_abort);
+
+  // Loops until abort. If this returns then abort occurred.
+  rseq_loop(&r, &cs);
+
+  if (__atomic_load_n(&r.rseq_cs, __ATOMIC_RELAXED)) {
+    return 1;
+  }
+
+  return 0;
+};
+
+// rseq.rseq_cs is cleared on abort outside of critical section.
+int TestInvalidAbortClearsCS() {
+  struct rseq r = {};
+  if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature);
+      sys_errno(ret) != 0) {
+    return 1;
+  }
+
+  struct rseq_cs cs = {};
+  cs.version = 0;
+  cs.flags = 0;
+  cs.start_ip = reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.post_commit_offset = reinterpret_cast<uint64_t>(&rseq_loop_post_commit) -
+                          reinterpret_cast<uint64_t>(&rseq_loop_start);
+  cs.abort_ip = reinterpret_cast<uint64_t>(&rseq_loop_abort);
+
+  __atomic_store_n(&r.rseq_cs, &cs, __ATOMIC_RELAXED);
+
+  // When the next abort condition occurs, the kernel will clear cs once it
+  // determines we aren't in the critical section.
+  while (1) {
+    if (!__atomic_load_n(&r.rseq_cs, __ATOMIC_RELAXED)) {
+      break;
+    }
+  }
+
+  return 0;
+};
+
+// Exit codes:
+//  0 - Pass
+//  1 - Fail
+//  2 - Missing argument
+//  3 - Unknown test case
+extern "C" int main(int argc, char** argv, char** envp) {
+  if (argc != 2) {
+    // Usage: rseq <test case>
+    return 2;
+  }
+
+  if (strcmp(argv[1], kRseqTestUnaligned) == 0) {
+    return TestUnaligned();
+  }
+  if (strcmp(argv[1], kRseqTestRegister) == 0) {
+    return TestRegister();
+  }
+  if (strcmp(argv[1], kRseqTestDoubleRegister) == 0) {
+    return TestDoubleRegister();
+  }
+  if (strcmp(argv[1], kRseqTestRegisterUnregister) == 0) {
+    return TestRegisterUnregister();
+  }
+  if (strcmp(argv[1], kRseqTestUnregisterDifferentPtr) == 0) {
+    return TestUnregisterDifferentPtr();
+  }
+  if (strcmp(argv[1], kRseqTestUnregisterDifferentSignature) == 0) {
+    return TestUnregisterDifferentSignature();
+  }
+  if (strcmp(argv[1], kRseqTestCPU) == 0) {
+    return TestCPU();
+  }
+  if (strcmp(argv[1], kRseqTestAbort) == 0) {
+    return TestAbort();
+  }
+  if (strcmp(argv[1], kRseqTestAbortBefore) == 0) {
+    return TestAbortBefore();
+  }
+  if (strcmp(argv[1], kRseqTestAbortSignature) == 0) {
+    return TestAbortSignature();
+  }
+  if (strcmp(argv[1], kRseqTestAbortPreCommit) == 0) {
+    return TestAbortPreCommit();
+  }
+  if (strcmp(argv[1], kRseqTestAbortClearsCS) == 0) {
+    return TestAbortClearsCS();
+  }
+  if (strcmp(argv[1], kRseqTestInvalidAbortClearsCS) == 0) {
+    return TestInvalidAbortClearsCS();
+  }
+
+  return 3;
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/rseq/start_amd64.S b/test/syscalls/linux/rseq/start_amd64.S
new file mode 100644
index 000000000..b9611b276
--- /dev/null
+++ b/test/syscalls/linux/rseq/start_amd64.S
@@ -0,0 +1,45 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+  .text
+  .align 4
+  .type  _start,@function
+  .globl  _start
+
+_start:
+  movq  %rsp,%rdi
+  call  __init
+  hlt
+
+  .size  _start,.-_start
+  .section  .note.GNU-stack,"",@progbits
+
+  .text
+  .globl  raw_syscall
+  .type   raw_syscall, @function
+
+raw_syscall:
+  mov  %rdi,%rax      // syscall #
+  mov  %rsi,%rdi      // arg0
+  mov  %rdx,%rsi      // arg1
+  mov  %rcx,%rdx      // arg2
+  mov  %r8,%r10       // arg3 (goes in r10 instead of rcx for system calls)
+  mov  %r9,%r8        // arg4
+  mov  0x8(%rsp),%r9  // arg5
+  syscall
+  ret
+
+  .size  raw_syscall,.-raw_syscall
+  .section  .note.GNU-stack,"",@progbits
diff --git a/test/syscalls/linux/rseq/start_arm64.S b/test/syscalls/linux/rseq/start_arm64.S
new file mode 100644
index 000000000..693c1c6eb
--- /dev/null
+++ b/test/syscalls/linux/rseq/start_arm64.S
@@ -0,0 +1,45 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+  .text
+  .align 4
+  .type  _start,@function
+  .globl  _start
+
+_start:
+  mov  x29, sp
+  bl   __init
+  wfi
+
+  .size  _start,.-_start
+  .section  .note.GNU-stack,"",@progbits
+
+  .text
+  .globl  raw_syscall
+  .type   raw_syscall, @function
+
+raw_syscall:
+  mov  x8,x0   // syscall #
+  mov  x0,x1   // arg0
+  mov  x1,x2   // arg1
+  mov  x2,x3   // arg2
+  mov  x3,x4   // arg3
+  mov  x4,x5   // arg4
+  mov  x5,x6   // arg5
+  svc  #0
+  ret
+
+  .size  raw_syscall,.-raw_syscall
+  .section  .note.GNU-stack,"",@progbits
diff --git a/test/syscalls/linux/rseq/syscalls.h b/test/syscalls/linux/rseq/syscalls.h
new file mode 100644
index 000000000..c4118e6c5
--- /dev/null
+++ b/test/syscalls/linux/rseq/syscalls.h
@@ -0,0 +1,69 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_SYSCALLS_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_SYSCALLS_H_
+
+#include "test/syscalls/linux/rseq/types.h"
+
+// Syscall numbers.
+#if defined(__x86_64__)
+constexpr int kGetpid = 39;
+constexpr int kExitGroup = 231;
+#elif defined(__aarch64__)
+constexpr int kGetpid = 172;
+constexpr int kExitGroup = 94;
+#else
+#error "Unknown architecture"
+#endif
+
+namespace gvisor {
+namespace testing {
+
+// Standalone system call interfaces.
+// Note that these are all "raw" system call interfaces which encode
+// errors by setting the return value to a small negative number.
+// Use sys_errno() to check system call return values for errors.
+
+// Maximum Linux error number.
+constexpr int kMaxErrno = 4095;
+
+// Errno values.
+#define EPERM 1
+#define EFAULT 14
+#define EBUSY 16
+#define EINVAL 22
+
+// Get the error number from a raw system call return value.
+// Returns a positive error number or 0 if there was no error.
+static inline int sys_errno(uintptr_t rval) {
+  if (rval >= static_cast<uintptr_t>(-kMaxErrno)) {
+    return -static_cast<int>(rval);
+  }
+  return 0;
+}
+
+extern "C" uintptr_t raw_syscall(int number, ...);
+
+static inline void sys_exit_group(int status) {
+  raw_syscall(kExitGroup, status);
+}
+static inline int sys_getpid() {
+  return static_cast<int>(raw_syscall(kGetpid));
+}
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_SYSCALLS_H_
diff --git a/test/syscalls/linux/rseq/test.h b/test/syscalls/linux/rseq/test.h
new file mode 100644
index 000000000..3b7bb74b1
--- /dev/null
+++ b/test/syscalls/linux/rseq/test.h
@@ -0,0 +1,43 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TEST_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TEST_H_
+
+namespace gvisor {
+namespace testing {
+
+// Test cases supported by rseq binary.
+
+inline constexpr char kRseqTestUnaligned[] = "unaligned";
+inline constexpr char kRseqTestRegister[] = "register";
+inline constexpr char kRseqTestDoubleRegister[] = "double-register";
+inline constexpr char kRseqTestRegisterUnregister[] = "register-unregister";
+inline constexpr char kRseqTestUnregisterDifferentPtr[] =
+    "unregister-different-ptr";
+inline constexpr char kRseqTestUnregisterDifferentSignature[] =
+    "unregister-different-signature";
+inline constexpr char kRseqTestCPU[] = "cpu";
+inline constexpr char kRseqTestAbort[] = "abort";
+inline constexpr char kRseqTestAbortBefore[] = "abort-before";
+inline constexpr char kRseqTestAbortSignature[] = "abort-signature";
+inline constexpr char kRseqTestAbortPreCommit[] = "abort-precommit";
+inline constexpr char kRseqTestAbortClearsCS[] = "abort-clears-cs";
+inline constexpr char kRseqTestInvalidAbortClearsCS[] =
+    "invalid-abort-clears-cs";
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TEST_H_
diff --git a/test/syscalls/linux/rseq/types.h b/test/syscalls/linux/rseq/types.h
new file mode 100644
index 000000000..b6afe9817
--- /dev/null
+++ b/test/syscalls/linux/rseq/types.h
@@ -0,0 +1,31 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_
+
+using size_t = __SIZE_TYPE__;
+using uintptr_t = __UINTPTR_TYPE__;
+
+using uint8_t = __UINT8_TYPE__;
+using uint16_t = __UINT16_TYPE__;
+using uint32_t = __UINT32_TYPE__;
+using uint64_t = __UINT64_TYPE__;
+
+using int8_t = __INT8_TYPE__;
+using int16_t = __INT16_TYPE__;
+using int32_t = __INT32_TYPE__;
+using int64_t = __INT64_TYPE__;
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_
diff --git a/test/syscalls/linux/rseq/uapi.h b/test/syscalls/linux/rseq/uapi.h
new file mode 100644
index 000000000..d3e60d0a4
--- /dev/null
+++ b/test/syscalls/linux/rseq/uapi.h
@@ -0,0 +1,51 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_
+
+#include <stdint.h>
+
+// User-kernel ABI for restartable sequences.
+
+// Syscall numbers.
+#if defined(__x86_64__)
+constexpr int kRseqSyscall = 334;
+#elif defined(__aarch64__)
+constexpr int kRseqSyscall = 293;
+#else
+#error "Unknown architecture"
+#endif  // __x86_64__
+
+struct rseq_cs {
+  uint32_t version;
+  uint32_t flags;
+  uint64_t start_ip;
+  uint64_t post_commit_offset;
+  uint64_t abort_ip;
+} __attribute__((aligned(4 * sizeof(uint64_t))));
+
+// N.B. alignment is enforced by the kernel.
+struct rseq {
+  uint32_t cpu_id_start;
+  uint32_t cpu_id;
+  struct rseq_cs* rseq_cs;
+  uint32_t flags;
+} __attribute__((aligned(4 * sizeof(uint64_t))));
+
+constexpr int kRseqFlagUnregister = 1 << 0;
+
+constexpr int kRseqCPUIDUninitialized = -1;
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_
diff --git a/test/syscalls/linux/rtsignal.cc b/test/syscalls/linux/rtsignal.cc
new file mode 100644
index 000000000..ed27e2566
--- /dev/null
+++ b/test/syscalls/linux/rtsignal.cc
@@ -0,0 +1,171 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <csignal>
+
+#include "gtest/gtest.h"
+#include "test/util/cleanup.h"
+#include "test/util/logging.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// saved_info is set by the handler.
+siginfo_t saved_info;
+
+// has_saved_info is set to true by the handler.
+volatile bool has_saved_info;
+
+void SigHandler(int sig, siginfo_t* info, void* context) {
+  // Copy to the given info.
+  saved_info = *info;
+  has_saved_info = true;
+}
+
+void ClearSavedInfo() {
+  // Clear the cached info.
+  memset(&saved_info, 0, sizeof(saved_info));
+  has_saved_info = false;
+}
+
+PosixErrorOr<Cleanup> SetupSignalHandler(int sig) {
+  struct sigaction sa;
+  sa.sa_sigaction = SigHandler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  return ScopedSigaction(sig, sa);
+}
+
+class RtSignalTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    action_cleanup_ = ASSERT_NO_ERRNO_AND_VALUE(SetupSignalHandler(SIGUSR1));
+    mask_cleanup_ =
+        ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGUSR1));
+  }
+
+  void TearDown() override { ClearSavedInfo(); }
+
+ private:
+  Cleanup action_cleanup_;
+  Cleanup mask_cleanup_;
+};
+
+static int rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t* uinfo) {
+  int ret;
+  do {
+    // NOTE(b/25434735): rt_sigqueueinfo(2) could return EAGAIN for RT signals.
+    ret = syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo);
+  } while (ret == -1 && errno == EAGAIN);
+  return ret;
+}
+
+TEST_F(RtSignalTest, InvalidTID) {
+  siginfo_t uinfo;
+  // Depending on the kernel version, these calls may fail with
+  // ESRCH (goobunutu machines) or EPERM (production machines). Thus,
+  // the test simply ensures that they do fail.
+  EXPECT_THAT(rt_sigqueueinfo(-1, SIGUSR1, &uinfo), SyscallFails());
+  EXPECT_FALSE(has_saved_info);
+  EXPECT_THAT(rt_sigqueueinfo(0, SIGUSR1, &uinfo), SyscallFails());
+  EXPECT_FALSE(has_saved_info);
+}
+
+TEST_F(RtSignalTest, InvalidCodes) {
+  siginfo_t uinfo;
+
+  // We need a child for the code checks to apply. If the process is delivering
+  // to itself, then it can use whatever codes it wants and they will go
+  // through.
+  pid_t child = fork();
+  if (child == 0) {
+    _exit(1);
+  }
+  ASSERT_THAT(child, SyscallSucceeds());
+
+  // These are not allowed for child processes.
+  uinfo.si_code = 0;  // SI_USER.
+  EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo),
+              SyscallFailsWithErrno(EPERM));
+  uinfo.si_code = 0x80;  // SI_KERNEL.
+  EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo),
+              SyscallFailsWithErrno(EPERM));
+  uinfo.si_code = -6;  // SI_TKILL.
+  EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo),
+              SyscallFailsWithErrno(EPERM));
+  uinfo.si_code = -1;  // SI_QUEUE (allowed).
+  EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), SyscallSucceeds());
+
+  // Join the child process.
+  EXPECT_THAT(waitpid(child, nullptr, 0), SyscallSucceeds());
+}
+
+TEST_F(RtSignalTest, ValueDelivered) {
+  siginfo_t uinfo;
+  uinfo.si_code = -1;  // SI_QUEUE (allowed).
+  uinfo.si_errno = 0x1234;
+
+  EXPECT_EQ(saved_info.si_errno, 0x0);
+  EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR1, &uinfo), SyscallSucceeds());
+  EXPECT_TRUE(has_saved_info);
+  EXPECT_EQ(saved_info.si_errno, 0x1234);
+}
+
+TEST_F(RtSignalTest, SignoMatch) {
+  auto action2_cleanup = ASSERT_NO_ERRNO_AND_VALUE(SetupSignalHandler(SIGUSR2));
+  auto mask2_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGUSR2));
+
+  siginfo_t uinfo;
+  uinfo.si_code = -1;  // SI_QUEUE (allowed).
+
+  EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR1, &uinfo), SyscallSucceeds());
+  EXPECT_TRUE(has_saved_info);
+  EXPECT_EQ(saved_info.si_signo, SIGUSR1);
+
+  ClearSavedInfo();
+
+  EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR2, &uinfo), SyscallSucceeds());
+  EXPECT_TRUE(has_saved_info);
+  EXPECT_EQ(saved_info.si_signo, SIGUSR2);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  // These tests depend on delivering SIGUSR1/2 to the main thread (so they can
+  // synchronously check has_saved_info). Block these so that any other threads
+  // created by TestInit will also have them blocked.
+  sigset_t set;
+  sigemptyset(&set);
+  sigaddset(&set, SIGUSR1);
+  sigaddset(&set, SIGUSR2);
+  TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+
+  gvisor::testing::TestInit(&argc, &argv);
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/sched.cc b/test/syscalls/linux/sched.cc
new file mode 100644
index 000000000..735e99411
--- /dev/null
+++ b/test/syscalls/linux/sched.cc
@@ -0,0 +1,71 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sched.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// In linux, pid is limited to 29 bits because how futex is implemented.
+constexpr int kImpossiblePID = (1 << 29) + 1;
+
+TEST(SchedGetparamTest, ReturnsZero) {
+  struct sched_param param;
+  EXPECT_THAT(sched_getparam(getpid(), &param), SyscallSucceeds());
+  EXPECT_EQ(param.sched_priority, 0);
+  EXPECT_THAT(sched_getparam(/*pid=*/0, &param), SyscallSucceeds());
+  EXPECT_EQ(param.sched_priority, 0);
+}
+
+TEST(SchedGetparamTest, InvalidPIDReturnsEINVAL) {
+  struct sched_param param;
+  EXPECT_THAT(sched_getparam(/*pid=*/-1, &param),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SchedGetparamTest, ImpossiblePIDReturnsESRCH) {
+  struct sched_param param;
+  EXPECT_THAT(sched_getparam(kImpossiblePID, &param),
+              SyscallFailsWithErrno(ESRCH));
+}
+
+TEST(SchedGetparamTest, NullParamReturnsEINVAL) {
+  EXPECT_THAT(sched_getparam(0, nullptr), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SchedGetschedulerTest, ReturnsSchedOther) {
+  EXPECT_THAT(sched_getscheduler(getpid()),
+              SyscallSucceedsWithValue(SCHED_OTHER));
+  EXPECT_THAT(sched_getscheduler(/*pid=*/0),
+              SyscallSucceedsWithValue(SCHED_OTHER));
+}
+
+TEST(SchedGetschedulerTest, ReturnsEINVAL) {
+  EXPECT_THAT(sched_getscheduler(/*pid=*/-1), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SchedGetschedulerTest, ReturnsESRCH) {
+  EXPECT_THAT(sched_getscheduler(kImpossiblePID), SyscallFailsWithErrno(ESRCH));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sched_yield.cc b/test/syscalls/linux/sched_yield.cc
new file mode 100644
index 000000000..5d24f5b58
--- /dev/null
+++ b/test/syscalls/linux/sched_yield.cc
@@ -0,0 +1,33 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SchedYieldTest, Success) {
+  EXPECT_THAT(sched_yield(), SyscallSucceeds());
+  EXPECT_THAT(sched_yield(), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc
new file mode 100644
index 000000000..ce88d90dd
--- /dev/null
+++ b/test/syscalls/linux/seccomp.cc
@@ -0,0 +1,425 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <linux/audit.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include <atomic>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/proc_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+#ifndef SYS_SECCOMP
+#define SYS_SECCOMP 1
+#endif
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// A syscall not implemented by Linux that we don't expect to be called.
+#ifdef __x86_64__
+constexpr uint32_t kFilteredSyscall = SYS_vserver;
+#elif __aarch64__
+// Use the last of arch_specific_syscalls which are not implemented on arm64.
+constexpr uint32_t kFilteredSyscall = __NR_arch_specific_syscall + 15;
+#endif
+
+// Applies a seccomp-bpf filter that returns `filtered_result` for
+// `sysno` and allows all other syscalls. Async-signal-safe.
+void ApplySeccompFilter(uint32_t sysno, uint32_t filtered_result,
+                        uint32_t flags = 0) {
+  // "Prior to [PR_SET_SECCOMP], the task must call prctl(PR_SET_NO_NEW_PRIVS,
+  // 1) or run with CAP_SYS_ADMIN privileges in its namespace." -
+  // Documentation/prctl/seccomp_filter.txt
+  //
+  // prctl(PR_SET_NO_NEW_PRIVS, 1) may be called repeatedly; calls after the
+  // first are no-ops.
+  TEST_PCHECK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0);
+  MaybeSave();
+
+  struct sock_filter filter[] = {
+    // A = seccomp_data.arch
+    BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 4),
+#if defined(__x86_64__)
+    // if (A != AUDIT_ARCH_X86_64) goto kill
+    BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4),
+#elif defined(__aarch64__)
+    // if (A != AUDIT_ARCH_AARCH64) goto kill
+    BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_AARCH64, 0, 4),
+#else
+#error "Unknown architecture"
+#endif
+    // A = seccomp_data.nr
+    BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0),
+    // if (A != sysno) goto allow
+    BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, sysno, 0, 1),
+    // return filtered_result
+    BPF_STMT(BPF_RET | BPF_K, filtered_result),
+    // allow: return SECCOMP_RET_ALLOW
+    BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+    // kill: return SECCOMP_RET_KILL
+    BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL),
+  };
+  struct sock_fprog prog;
+  prog.len = ABSL_ARRAYSIZE(filter);
+  prog.filter = filter;
+  if (flags) {
+    TEST_CHECK(syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, flags, &prog) ==
+               0);
+  } else {
+    TEST_PCHECK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == 0);
+  }
+  MaybeSave();
+}
+
+// Wrapper for sigaction. Async-signal-safe.
+void RegisterSignalHandler(int signum,
+                           void (*handler)(int, siginfo_t*, void*)) {
+  struct sigaction sa = {};
+  sa.sa_sigaction = handler;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  TEST_PCHECK(sigaction(signum, &sa, nullptr) == 0);
+  MaybeSave();
+}
+
+// All of the following tests execute in a subprocess to ensure that each test
+// is run in a separate process. This avoids cross-contamination of seccomp
+// state between tests, and is necessary to ensure that test processes killed
+// by SECCOMP_RET_KILL are single-threaded (since SECCOMP_RET_KILL only kills
+// the offending thread, not the whole thread group).
+
+TEST(SeccompTest, RetKillCausesDeathBySIGSYS) {
+  pid_t const pid = fork();
+  if (pid == 0) {
+    // Register a signal handler for SIGSYS that we don't expect to be invoked.
+    RegisterSignalHandler(
+        SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); });
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL);
+    syscall(kFilteredSyscall);
+    TEST_CHECK_MSG(false, "Survived invocation of test syscall");
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS)
+      << "status " << status;
+}
+
+TEST(SeccompTest, RetKillOnlyKillsOneThread) {
+  Mapping stack = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+
+  pid_t const pid = fork();
+  if (pid == 0) {
+    // Register a signal handler for SIGSYS that we don't expect to be invoked.
+    RegisterSignalHandler(
+        SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); });
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL);
+    // Pass CLONE_VFORK to block the original thread in the child process until
+    // the clone thread exits with SIGSYS.
+    //
+    // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's
+    // x86_64 implementation is safe. See glibc
+    // sysdeps/unix/sysv/linux/x86_64/clone.S.
+    clone(
+        +[](void* arg) {
+          syscall(kFilteredSyscall);  // should kill the thread
+          _exit(1);                   // should be unreachable
+          return 2;  // should be very unreachable, shut up the compiler
+        },
+        stack.endptr(),
+        CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM |
+            CLONE_VFORK,
+        nullptr);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+}
+
+TEST(SeccompTest, RetTrapCausesSIGSYS) {
+  pid_t const pid = fork();
+  if (pid == 0) {
+    constexpr uint16_t kTrapValue = 0xdead;
+    RegisterSignalHandler(
+        SIGSYS, +[](int signo, siginfo_t* info, void* ucv) {
+          ucontext_t* uc = static_cast<ucontext_t*>(ucv);
+          // This is a signal handler, so we must stay async-signal-safe.
+          TEST_CHECK(info->si_signo == SIGSYS);
+          TEST_CHECK(info->si_code == SYS_SECCOMP);
+          TEST_CHECK(info->si_errno == kTrapValue);
+          TEST_CHECK(info->si_call_addr != nullptr);
+          TEST_CHECK(info->si_syscall == kFilteredSyscall);
+#if defined(__x86_64__)
+          TEST_CHECK(info->si_arch == AUDIT_ARCH_X86_64);
+          TEST_CHECK(uc->uc_mcontext.gregs[REG_RAX] == kFilteredSyscall);
+#elif defined(__aarch64__)
+          TEST_CHECK(info->si_arch == AUDIT_ARCH_AARCH64);
+          TEST_CHECK(uc->uc_mcontext.regs[8] == kFilteredSyscall);
+#endif  // defined(__x86_64__)
+          _exit(0);
+        });
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRAP | kTrapValue);
+    syscall(kFilteredSyscall);
+    TEST_CHECK_MSG(false, "Survived invocation of test syscall");
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+}
+
+#ifdef __x86_64__
+
+constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400;
+
+time_t vsyscall_time(time_t* t) {
+  return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t);
+}
+
+TEST(SeccompTest, SeccompAppliesToVsyscall) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+
+  pid_t const pid = fork();
+  if (pid == 0) {
+    constexpr uint16_t kTrapValue = 0xdead;
+    RegisterSignalHandler(
+        SIGSYS, +[](int signo, siginfo_t* info, void* ucv) {
+          ucontext_t* uc = static_cast<ucontext_t*>(ucv);
+          // This is a signal handler, so we must stay async-signal-safe.
+          TEST_CHECK(info->si_signo == SIGSYS);
+          TEST_CHECK(info->si_code == SYS_SECCOMP);
+          TEST_CHECK(info->si_errno == kTrapValue);
+          TEST_CHECK(info->si_call_addr != nullptr);
+          TEST_CHECK(info->si_syscall == SYS_time);
+          TEST_CHECK(info->si_arch == AUDIT_ARCH_X86_64);
+          TEST_CHECK(uc->uc_mcontext.gregs[REG_RAX] == SYS_time);
+          _exit(0);
+        });
+    ApplySeccompFilter(SYS_time, SECCOMP_RET_TRAP | kTrapValue);
+    vsyscall_time(nullptr);  // Should result in death.
+    TEST_CHECK_MSG(false, "Survived invocation of test syscall");
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+}
+
+TEST(SeccompTest, RetKillVsyscallCausesDeathBySIGSYS) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+
+  pid_t const pid = fork();
+  if (pid == 0) {
+    // Register a signal handler for SIGSYS that we don't expect to be invoked.
+    RegisterSignalHandler(
+        SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); });
+    ApplySeccompFilter(SYS_time, SECCOMP_RET_KILL);
+    vsyscall_time(nullptr);  // Should result in death.
+    TEST_CHECK_MSG(false, "Survived invocation of test syscall");
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS)
+      << "status " << status;
+}
+
+#endif  // defined(__x86_64__)
+
+TEST(SeccompTest, RetTraceWithoutPtracerReturnsENOSYS) {
+  pid_t const pid = fork();
+  if (pid == 0) {
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRACE);
+    TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+}
+
+TEST(SeccompTest, RetErrnoReturnsErrno) {
+  pid_t const pid = fork();
+  if (pid == 0) {
+    // ENOTNAM: "Not a XENIX named type file"
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM);
+    TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOTNAM);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+}
+
+TEST(SeccompTest, RetAllowAllowsSyscall) {
+  pid_t const pid = fork();
+  if (pid == 0) {
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ALLOW);
+    TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+}
+
+// This test will validate that TSYNC will apply to all threads.
+TEST(SeccompTest, TsyncAppliesToAllThreads) {
+  Mapping stack = ASSERT_NO_ERRNO_AND_VALUE(
+      MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+
+  // We don't want to apply this policy to other test runner threads, so fork.
+  const pid_t pid = fork();
+
+  if (pid == 0) {
+    // First check that we receive a ENOSYS before the policy is applied.
+    TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS);
+
+    // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's
+    // x86_64 implementation is safe. See glibc
+    // sysdeps/unix/sysv/linux/x86_64/clone.S.
+    clone(
+        +[](void* arg) {
+          ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM,
+                             SECCOMP_FILTER_FLAG_TSYNC);
+          return 0;
+        },
+        stack.endptr(),
+        CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM |
+            CLONE_VFORK,
+        nullptr);
+
+    // Because we're using CLONE_VFORK this thread will be blocked until
+    // the second thread has released resources to our virtual memory, since
+    // we're not execing that will happen on _exit.
+
+    // Now verify that the policy applied to this thread too.
+    TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOTNAM);
+    _exit(0);
+  }
+
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status = 0;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+}
+
+// This test will validate that seccomp(2) rejects unsupported flags.
+TEST(SeccompTest, SeccompRejectsUnknownFlags) {
+  constexpr uint32_t kInvalidFlag = 123;
+  ASSERT_THAT(
+      syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, kInvalidFlag, nullptr),
+      SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SeccompTest, LeastPermissiveFilterReturnValueApplies) {
+  // This is RetKillCausesDeathBySIGSYS, plus extra filters before and after the
+  // one that causes the kill that should be ignored.
+  pid_t const pid = fork();
+  if (pid == 0) {
+    RegisterSignalHandler(
+        SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); });
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRACE);
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL);
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM);
+    syscall(kFilteredSyscall);
+    TEST_CHECK_MSG(false, "Survived invocation of test syscall");
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS)
+      << "status " << status;
+}
+
+// Passed as argv[1] to cause the test binary to invoke kFilteredSyscall and
+// exit. Not a real flag since flag parsing happens during initialization,
+// which may create threads.
+constexpr char kInvokeFilteredSyscallFlag[] = "--seccomp_test_child";
+
+TEST(SeccompTest, FiltersPreservedAcrossForkAndExecve) {
+  ExecveArray const grandchild_argv(
+      {"/proc/self/exe", kInvokeFilteredSyscallFlag});
+
+  pid_t const pid = fork();
+  if (pid == 0) {
+    ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL);
+    pid_t const grandchild_pid = fork();
+    if (grandchild_pid == 0) {
+      execve(grandchild_argv.get()[0], grandchild_argv.get(),
+             /* envp = */ nullptr);
+      TEST_PCHECK_MSG(false, "execve failed");
+    }
+    int status;
+    TEST_PCHECK(waitpid(grandchild_pid, &status, 0) == grandchild_pid);
+    TEST_CHECK(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  int status;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status " << status;
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  if (argc >= 2 &&
+      strcmp(argv[1], gvisor::testing::kInvokeFilteredSyscallFlag) == 0) {
+    syscall(gvisor::testing::kFilteredSyscall);
+    exit(0);
+  }
+
+  gvisor::testing::TestInit(&argc, &argv);
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/select.cc b/test/syscalls/linux/select.cc
new file mode 100644
index 000000000..be2364fb8
--- /dev/null
+++ b/test/syscalls/linux/select.cc
@@ -0,0 +1,168 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <sys/select.h>
+#include <sys/time.h>
+
+#include <climits>
+#include <csignal>
+#include <cstdio>
+
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/base_poll_test.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/rlimit_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+class SelectTest : public BasePollTest {
+ protected:
+  void SetUp() override { BasePollTest::SetUp(); }
+  void TearDown() override { BasePollTest::TearDown(); }
+};
+
+// See that when there are no FD sets, select behaves like sleep.
+TEST_F(SelectTest, NullFds) {
+  struct timeval timeout = absl::ToTimeval(absl::Milliseconds(10));
+  ASSERT_THAT(select(0, nullptr, nullptr, nullptr, &timeout),
+              SyscallSucceeds());
+  EXPECT_EQ(timeout.tv_sec, 0);
+  EXPECT_EQ(timeout.tv_usec, 0);
+
+  timeout = absl::ToTimeval(absl::Milliseconds(10));
+  ASSERT_THAT(select(1, nullptr, nullptr, nullptr, &timeout),
+              SyscallSucceeds());
+  EXPECT_EQ(timeout.tv_sec, 0);
+  EXPECT_EQ(timeout.tv_usec, 0);
+}
+
+TEST_F(SelectTest, NegativeNfds) {
+  EXPECT_THAT(select(-1, nullptr, nullptr, nullptr, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(select(-100000, nullptr, nullptr, nullptr, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(select(INT_MIN, nullptr, nullptr, nullptr, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(SelectTest, ClosedFds) {
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY));
+
+  // We can't rely on a file descriptor being closed in a multi threaded
+  // application so fork to get a clean process.
+  EXPECT_THAT(InForkedProcess([&] {
+                int fd_num = fd.get();
+                fd.reset();
+
+                fd_set read_set;
+                FD_ZERO(&read_set);
+                FD_SET(fd_num, &read_set);
+
+                struct timeval timeout =
+                    absl::ToTimeval(absl::Milliseconds(10));
+                TEST_PCHECK(select(fd_num + 1, &read_set, nullptr, nullptr,
+                                   &timeout) != 0);
+                TEST_PCHECK(errno == EBADF);
+              }),
+              IsPosixErrorOkAndHolds(0));
+}
+
+TEST_F(SelectTest, ZeroTimeout) {
+  struct timeval timeout = {};
+  EXPECT_THAT(select(1, nullptr, nullptr, nullptr, &timeout),
+              SyscallSucceeds());
+  // Ignore timeout as its value is now undefined.
+}
+
+// If random S/R interrupts the select, SIGALRM may be delivered before select
+// restarts, causing the select to hang forever.
+TEST_F(SelectTest, NoTimeout_NoRandomSave) {
+  // When there's no timeout, select may never return so set a timer.
+  SetTimer(absl::Milliseconds(100));
+  // See that we get interrupted by the timer.
+  ASSERT_THAT(select(1, nullptr, nullptr, nullptr, nullptr),
+              SyscallFailsWithErrno(EINTR));
+  EXPECT_TRUE(TimerFired());
+}
+
+TEST_F(SelectTest, InvalidTimeoutNegative) {
+  struct timeval timeout = absl::ToTimeval(absl::Microseconds(-1));
+  EXPECT_THAT(select(1, nullptr, nullptr, nullptr, &timeout),
+              SyscallFailsWithErrno(EINVAL));
+  // Ignore timeout as its value is now undefined.
+}
+
+// Verify that a signal interrupts select.
+//
+// If random S/R interrupts the select, SIGALRM may be delivered before select
+// restarts, causing the select to hang forever.
+TEST_F(SelectTest, InterruptedBySignal_NoRandomSave) {
+  absl::Duration duration(absl::Seconds(5));
+  struct timeval timeout = absl::ToTimeval(duration);
+  SetTimer(absl::Milliseconds(100));
+  ASSERT_FALSE(TimerFired());
+  ASSERT_THAT(select(1, nullptr, nullptr, nullptr, &timeout),
+              SyscallFailsWithErrno(EINTR));
+  EXPECT_TRUE(TimerFired());
+  // Ignore timeout as its value is now undefined.
+}
+
+TEST_F(SelectTest, IgnoreBitsAboveNfds) {
+  // fd_set is a bit array with at least FD_SETSIZE bits. Test that bits
+  // corresponding to file descriptors above nfds are ignored.
+  fd_set read_set;
+  FD_ZERO(&read_set);
+  constexpr int kNfds = 1;
+  for (int fd = kNfds; fd < FD_SETSIZE; fd++) {
+    FD_SET(fd, &read_set);
+  }
+  // Pass a zero timeout so that select returns immediately.
+  struct timeval timeout = {};
+  EXPECT_THAT(select(kNfds, &read_set, nullptr, nullptr, &timeout),
+              SyscallSucceedsWithValue(0));
+}
+
+// This test illustrates Linux's behavior of 'select' calls passing after
+// setrlimit RLIMIT_NOFILE is called. In particular, versions of sshd rely on
+// this behavior. See b/122318458.
+TEST_F(SelectTest, SetrlimitCallNOFILE) {
+  fd_set read_set;
+  FD_ZERO(&read_set);
+  timeval timeout = {};
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(NewTempAbsPath(), O_RDONLY | O_CREAT, S_IRUSR));
+
+  Cleanup reset_rlimit =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_NOFILE, 0));
+
+  FD_SET(fd.get(), &read_set);
+  // this call with zero timeout should return immediately
+  EXPECT_THAT(select(fd.get() + 1, &read_set, nullptr, nullptr, &timeout),
+              SyscallSucceeds());
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/semaphore.cc b/test/syscalls/linux/semaphore.cc
new file mode 100644
index 000000000..e9b131ca9
--- /dev/null
+++ b/test/syscalls/linux/semaphore.cc
@@ -0,0 +1,491 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <sys/types.h>
+
+#include <atomic>
+#include <cerrno>
+#include <ctime>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/memory/memory.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "test/util/capability_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+class AutoSem {
+ public:
+  explicit AutoSem(int id) : id_(id) {}
+  ~AutoSem() {
+    if (id_ >= 0) {
+      EXPECT_THAT(semctl(id_, 0, IPC_RMID), SyscallSucceeds());
+    }
+  }
+
+  int release() {
+    int old = id_;
+    id_ = -1;
+    return old;
+  }
+
+  int get() { return id_; }
+
+ private:
+  int id_ = -1;
+};
+
+TEST(SemaphoreTest, SemGet) {
+  // Test creation and lookup.
+  AutoSem sem(semget(1, 10, IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+  EXPECT_THAT(semget(1, 10, IPC_CREAT), SyscallSucceedsWithValue(sem.get()));
+  EXPECT_THAT(semget(1, 9, IPC_CREAT), SyscallSucceedsWithValue(sem.get()));
+
+  // Creation and lookup failure cases.
+  EXPECT_THAT(semget(1, 11, IPC_CREAT), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(semget(1, -1, IPC_CREAT), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(semget(1, 10, IPC_CREAT | IPC_EXCL),
+              SyscallFailsWithErrno(EEXIST));
+  EXPECT_THAT(semget(2, 1, 0), SyscallFailsWithErrno(ENOENT));
+  EXPECT_THAT(semget(2, 0, IPC_CREAT), SyscallFailsWithErrno(EINVAL));
+
+  // Private semaphores never conflict.
+  AutoSem sem2(semget(IPC_PRIVATE, 1, 0));
+  AutoSem sem3(semget(IPC_PRIVATE, 1, 0));
+  ASSERT_THAT(sem2.get(), SyscallSucceeds());
+  EXPECT_NE(sem.get(), sem2.get());
+  ASSERT_THAT(sem3.get(), SyscallSucceeds());
+  EXPECT_NE(sem3.get(), sem2.get());
+}
+
+// Tests simple operations that shouldn't block in a single-thread.
+TEST(SemaphoreTest, SemOpSingleNoBlock) {
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  struct sembuf buf = {};
+  buf.sem_op = 1;
+  ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+
+  buf.sem_op = -1;
+  ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+
+  buf.sem_op = 0;
+  ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+
+  // Error cases with invalid values.
+  ASSERT_THAT(semop(sem.get() + 1, &buf, 1), SyscallFailsWithErrno(EINVAL));
+
+  buf.sem_num = 1;
+  ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EFBIG));
+
+  ASSERT_THAT(semop(sem.get(), nullptr, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+// Tests multiple operations that shouldn't block in a single-thread.
+TEST(SemaphoreTest, SemOpMultiNoBlock) {
+  AutoSem sem(semget(IPC_PRIVATE, 4, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  struct sembuf bufs[5] = {};
+  bufs[0].sem_num = 0;
+  bufs[0].sem_op = 10;
+  bufs[0].sem_flg = 0;
+
+  bufs[1].sem_num = 1;
+  bufs[1].sem_op = 2;
+  bufs[1].sem_flg = 0;
+
+  bufs[2].sem_num = 2;
+  bufs[2].sem_op = 3;
+  bufs[2].sem_flg = 0;
+
+  bufs[3].sem_num = 0;
+  bufs[3].sem_op = -5;
+  bufs[3].sem_flg = 0;
+
+  bufs[4].sem_num = 2;
+  bufs[4].sem_op = 2;
+  bufs[4].sem_flg = 0;
+
+  ASSERT_THAT(semop(sem.get(), bufs, ABSL_ARRAYSIZE(bufs)), SyscallSucceeds());
+
+  ASSERT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(5));
+  ASSERT_THAT(semctl(sem.get(), 1, GETVAL), SyscallSucceedsWithValue(2));
+  ASSERT_THAT(semctl(sem.get(), 2, GETVAL), SyscallSucceedsWithValue(5));
+  ASSERT_THAT(semctl(sem.get(), 3, GETVAL), SyscallSucceedsWithValue(0));
+
+  for (auto& b : bufs) {
+    b.sem_op = -b.sem_op;
+  }
+  // 0 and 3 order must be reversed, otherwise it will block.
+  std::swap(bufs[0].sem_op, bufs[3].sem_op);
+  ASSERT_THAT(RetryEINTR(semop)(sem.get(), bufs, ABSL_ARRAYSIZE(bufs)),
+              SyscallSucceeds());
+
+  // All semaphores should be back to 0 now.
+  for (size_t i = 0; i < 4; ++i) {
+    ASSERT_THAT(semctl(sem.get(), i, GETVAL), SyscallSucceedsWithValue(0));
+  }
+}
+
+// Makes a best effort attempt to ensure that operation would block.
+TEST(SemaphoreTest, SemOpBlock) {
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  std::atomic<int> blocked = ATOMIC_VAR_INIT(1);
+  ScopedThread th([&sem, &blocked] {
+    absl::SleepFor(absl::Milliseconds(100));
+    ASSERT_EQ(blocked.load(), 1);
+
+    struct sembuf buf = {};
+    buf.sem_op = 1;
+    ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+  });
+
+  struct sembuf buf = {};
+  buf.sem_op = -1;
+  ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+  blocked.store(0);
+}
+
+// Tests that IPC_NOWAIT returns with no wait.
+TEST(SemaphoreTest, SemOpNoBlock) {
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  struct sembuf buf = {};
+  buf.sem_flg = IPC_NOWAIT;
+
+  buf.sem_op = -1;
+  ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EAGAIN));
+
+  buf.sem_op = 1;
+  ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+
+  buf.sem_op = 0;
+  ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EAGAIN));
+}
+
+// Test runs 2 threads, one signals the other waits the same number of times.
+TEST(SemaphoreTest, SemOpSimple) {
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  constexpr size_t kLoops = 100;
+  ScopedThread th([&sem] {
+    struct sembuf buf = {};
+    buf.sem_op = 1;
+    for (size_t i = 0; i < kLoops; i++) {
+      // Sleep to prevent making all increments in one shot without letting
+      // the waiter wait.
+      absl::SleepFor(absl::Milliseconds(1));
+      ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+    }
+  });
+
+  struct sembuf buf = {};
+  buf.sem_op = -1;
+  for (size_t i = 0; i < kLoops; i++) {
+    ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+  }
+}
+
+// Tests that semaphore can be removed while there are waiters.
+// NoRandomSave: Test relies on timing that random save throws off.
+TEST(SemaphoreTest, SemOpRemoveWithWaiter_NoRandomSave) {
+  AutoSem sem(semget(IPC_PRIVATE, 2, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  ScopedThread th([&sem] {
+    absl::SleepFor(absl::Milliseconds(250));
+    ASSERT_THAT(semctl(sem.release(), 0, IPC_RMID), SyscallSucceeds());
+  });
+
+  // This must happen before IPC_RMID runs above. Otherwise it fails with EINVAL
+  // instead because the semaphore has already been removed.
+  struct sembuf buf = {};
+  buf.sem_op = -1;
+  ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1),
+              SyscallFailsWithErrno(EIDRM));
+}
+
+// Semaphore isn't fair. It will execute any waiter that can satisfy the
+// request even if it gets in front of other waiters.
+TEST(SemaphoreTest, SemOpBestFitExecution) {
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  ScopedThread th([&sem] {
+    struct sembuf buf = {};
+    buf.sem_op = -2;
+    ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallFails());
+    // Ensure that wait will only unblock when the semaphore is removed. On
+    // EINTR retry it may race with deletion and return EINVAL.
+    ASSERT_TRUE(errno == EIDRM || errno == EINVAL) << "errno=" << errno;
+  });
+
+  // Ensures that '-1' below will unblock even though '-10' above is waiting
+  // for the same semaphore.
+  for (size_t i = 0; i < 10; ++i) {
+    struct sembuf buf = {};
+    buf.sem_op = 1;
+    ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+
+    absl::SleepFor(absl::Milliseconds(10));
+
+    buf.sem_op = -1;
+    ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+  }
+
+  ASSERT_THAT(semctl(sem.release(), 0, IPC_RMID), SyscallSucceeds());
+}
+
+// Executes random operations in multiple threads and verify correctness.
+TEST(SemaphoreTest, SemOpRandom) {
+  // Don't do cooperative S/R tests because there are too many syscalls in
+  // this test,
+  const DisableSave ds;
+
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  // Protects the seed below.
+  absl::Mutex mutex;
+  uint32_t seed = time(nullptr);
+
+  int count = 0;      // Tracks semaphore value.
+  bool done = false;  // Tells waiters to stop after signal threads are done.
+
+  // These threads will wait in a loop.
+  std::unique_ptr<ScopedThread> decs[5];
+  for (auto& dec : decs) {
+    dec = absl::make_unique<ScopedThread>([&sem, &mutex, &count, &seed, &done] {
+      for (size_t i = 0; i < 500; ++i) {
+        int16_t val;
+        {
+          absl::MutexLock l(&mutex);
+          if (done) {
+            return;
+          }
+          val = (rand_r(&seed) % 10 + 1);  // Rand between 1 and 10.
+          count -= val;
+        }
+        struct sembuf buf = {};
+        buf.sem_op = -val;
+        ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+        absl::SleepFor(absl::Milliseconds(val * 2));
+      }
+    });
+  }
+
+  // These threads will wait for zero in a loop.
+  std::unique_ptr<ScopedThread> zeros[5];
+  for (auto& zero : zeros) {
+    zero = absl::make_unique<ScopedThread>([&sem, &mutex, &done] {
+      for (size_t i = 0; i < 500; ++i) {
+        {
+          absl::MutexLock l(&mutex);
+          if (done) {
+            return;
+          }
+        }
+        struct sembuf buf = {};
+        buf.sem_op = 0;
+        ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+        absl::SleepFor(absl::Milliseconds(10));
+      }
+    });
+  }
+
+  // These threads will signal in a loop.
+  std::unique_ptr<ScopedThread> incs[5];
+  for (auto& inc : incs) {
+    inc = absl::make_unique<ScopedThread>([&sem, &mutex, &count, &seed] {
+      for (size_t i = 0; i < 500; ++i) {
+        int16_t val;
+        {
+          absl::MutexLock l(&mutex);
+          val = (rand_r(&seed) % 10 + 1);  // Rand between 1 and 10.
+          count += val;
+        }
+        struct sembuf buf = {};
+        buf.sem_op = val;
+        ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+        absl::SleepFor(absl::Milliseconds(val * 2));
+      }
+    });
+  }
+
+  // First wait for signal threads to be done.
+  for (auto& inc : incs) {
+    inc->Join();
+  }
+
+  // Now there could be waiters blocked (remember operations are random).
+  // Notify waiters that we're done and signal semaphore just the right amount.
+  {
+    absl::MutexLock l(&mutex);
+    done = true;
+    struct sembuf buf = {};
+    buf.sem_op = -count;
+    ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+  }
+
+  // Now all waiters should unblock and exit.
+  for (auto& dec : decs) {
+    dec->Join();
+  }
+  for (auto& zero : zeros) {
+    zero->Join();
+  }
+}
+
+TEST(SemaphoreTest, SemOpNamespace) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  AutoSem sem(semget(123, 1, 0600 | IPC_CREAT | IPC_EXCL));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  ScopedThread([]() {
+    EXPECT_THAT(unshare(CLONE_NEWIPC), SyscallSucceeds());
+    AutoSem sem(semget(123, 1, 0600 | IPC_CREAT | IPC_EXCL));
+    ASSERT_THAT(sem.get(), SyscallSucceeds());
+  });
+}
+
+TEST(SemaphoreTest, SemCtlVal) {
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  // Semaphore must start with 0.
+  EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(0));
+
+  // Increase value and ensure waiters are woken up.
+  ScopedThread th([&sem] {
+    struct sembuf buf = {};
+    buf.sem_op = -10;
+    ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+  });
+
+  ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 9), SyscallSucceeds());
+  EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(9));
+
+  ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 20), SyscallSucceeds());
+  const int value = semctl(sem.get(), 0, GETVAL);
+  // 10 or 20 because it could have raced with waiter above.
+  EXPECT_TRUE(value == 10 || value == 20) << "value=" << value;
+  th.Join();
+
+  // Set it back to 0 and ensure that waiters are woken up.
+  ScopedThread thZero([&sem] {
+    struct sembuf buf = {};
+    buf.sem_op = 0;
+    ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+  });
+  ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 0), SyscallSucceeds());
+  EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(0));
+  thZero.Join();
+}
+
+TEST(SemaphoreTest, SemCtlValAll) {
+  AutoSem sem(semget(IPC_PRIVATE, 3, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  // Semaphores must start with 0.
+  uint16_t get[3] = {10, 10, 10};
+  EXPECT_THAT(semctl(sem.get(), 1, GETALL, get), SyscallSucceedsWithValue(0));
+  for (auto v : get) {
+    EXPECT_EQ(v, 0);
+  }
+
+  // SetAll and check that they were set.
+  uint16_t vals[3] = {0, 10, 20};
+  EXPECT_THAT(semctl(sem.get(), 1, SETALL, vals), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(semctl(sem.get(), 1, GETALL, get), SyscallSucceedsWithValue(0));
+  for (size_t i = 0; i < ABSL_ARRAYSIZE(vals); ++i) {
+    EXPECT_EQ(get[i], vals[i]);
+  }
+
+  EXPECT_THAT(semctl(sem.get(), 1, SETALL, nullptr),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(SemaphoreTest, SemCtlGetPid) {
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 1), SyscallSucceeds());
+  EXPECT_THAT(semctl(sem.get(), 0, GETPID), SyscallSucceedsWithValue(getpid()));
+}
+
+TEST(SemaphoreTest, SemCtlGetPidFork) {
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  const pid_t child_pid = fork();
+  if (child_pid == 0) {
+    TEST_PCHECK(semctl(sem.get(), 0, SETVAL, 1) == 0);
+    TEST_PCHECK(semctl(sem.get(), 0, GETPID) == getpid());
+
+    _exit(0);
+  }
+  ASSERT_THAT(child_pid, SyscallSucceeds());
+
+  int status;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << " status " << status;
+}
+
+TEST(SemaphoreTest, SemIpcSet) {
+  // Drop CAP_IPC_OWNER which allows us to bypass semaphore permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_IPC_OWNER, false));
+
+  AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+  ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+  struct semid_ds semid = {};
+  semid.sem_perm.uid = getuid();
+  semid.sem_perm.gid = getgid();
+
+  // Make semaphore readonly and check that signal fails.
+  semid.sem_perm.mode = 0400;
+  EXPECT_THAT(semctl(sem.get(), 0, IPC_SET, &semid), SyscallSucceeds());
+  struct sembuf buf = {};
+  buf.sem_op = 1;
+  ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EACCES));
+
+  // Make semaphore writeonly and check that wait for zero fails.
+  semid.sem_perm.mode = 0200;
+  EXPECT_THAT(semctl(sem.get(), 0, IPC_SET, &semid), SyscallSucceeds());
+  buf.sem_op = 0;
+  ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EACCES));
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc
new file mode 100644
index 000000000..64123e904
--- /dev/null
+++ b/test/syscalls/linux/sendfile.cc
@@ -0,0 +1,587 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <sys/eventfd.h>
+#include <sys/sendfile.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/eventfd_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SendFileTest, SendZeroBytes) {
+  // Create temp files.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as write only.
+  const FileDescriptor outf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Send data and verify that sendfile returns the correct value.
+  EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, 0),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST(SendFileTest, InvalidOffset) {
+  // Create temp files.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as write only.
+  const FileDescriptor outf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Send data and verify that sendfile returns the correct value.
+  off_t offset = -1;
+  EXPECT_THAT(sendfile(outf.get(), inf.get(), &offset, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+int memfd_create(const std::string& name, unsigned int flags) {
+  return syscall(__NR_memfd_create, name.c_str(), flags);
+}
+
+TEST(SendFileTest, Overflow) {
+  // Create input file.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file.
+  int fd;
+  EXPECT_THAT(fd = memfd_create("overflow", 0), SyscallSucceeds());
+  const FileDescriptor outf(fd);
+
+  // out_offset + kSize overflows INT64_MAX.
+  loff_t out_offset = 0x7ffffffffffffffeull;
+  constexpr int kSize = 3;
+  EXPECT_THAT(sendfile(outf.get(), inf.get(), &out_offset, kSize),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SendFileTest, SendTrivially) {
+  // Create temp files.
+  constexpr char kData[] = "To be, or not to be, that is the question:";
+  constexpr int kDataSize = sizeof(kData) - 1;
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as write only.
+  FileDescriptor outf;
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Send data and verify that sendfile returns the correct value.
+  int bytes_sent;
+  EXPECT_THAT(bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+              SyscallSucceedsWithValue(kDataSize));
+
+  // Close outf to avoid leak.
+  outf.reset();
+
+  // Open the output file as read only.
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+  // Verify that the output file has the correct data.
+  char actual[kDataSize];
+  ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+              SyscallSucceedsWithValue(kDataSize));
+  EXPECT_EQ(kData, absl::string_view(actual, bytes_sent));
+}
+
+TEST(SendFileTest, SendTriviallyWithBothFilesReadWrite) {
+  // Create temp files.
+  constexpr char kData[] = "Whether 'tis nobler in the mind to suffer";
+  constexpr int kDataSize = sizeof(kData) - 1;
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as readwrite.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+
+  // Open the output file as readwrite.
+  FileDescriptor outf;
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR));
+
+  // Send data and verify that sendfile returns the correct value.
+  int bytes_sent;
+  EXPECT_THAT(bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+              SyscallSucceedsWithValue(kDataSize));
+
+  // Close outf to avoid leak.
+  outf.reset();
+
+  // Open the output file as read only.
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+  // Verify that the output file has the correct data.
+  char actual[kDataSize];
+  ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+              SyscallSucceedsWithValue(kDataSize));
+  EXPECT_EQ(kData, absl::string_view(actual, bytes_sent));
+}
+
+TEST(SendFileTest, SendAndUpdateFileOffset) {
+  // Create temp files.
+  // Test input string length must be > 2 AND even.
+  constexpr char kData[] = "The slings and arrows of outrageous fortune,";
+  constexpr int kDataSize = sizeof(kData) - 1;
+  constexpr int kHalfDataSize = kDataSize / 2;
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as write only.
+  FileDescriptor outf;
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Send data and verify that sendfile returns the correct value.
+  int bytes_sent;
+  EXPECT_THAT(
+      bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize),
+      SyscallSucceedsWithValue(kHalfDataSize));
+
+  // Close outf to avoid leak.
+  outf.reset();
+
+  // Open the output file as read only.
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+  // Verify that the output file has the correct data.
+  char actual[kHalfDataSize];
+  ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+              SyscallSucceedsWithValue(kHalfDataSize));
+  EXPECT_EQ(absl::string_view(kData, kHalfDataSize),
+            absl::string_view(actual, bytes_sent));
+
+  // Verify that the input file offset has been updated
+  ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent),
+              SyscallSucceedsWithValue(kHalfDataSize));
+  EXPECT_EQ(
+      absl::string_view(kData + kDataSize - bytes_sent, kDataSize - bytes_sent),
+      absl::string_view(actual, kHalfDataSize));
+}
+
+TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) {
+  // Create temp files.
+  // Test input string length must be > 2 AND divisible by 4.
+  constexpr char kData[] = "The slings and arrows of outrageous fortune,";
+  constexpr int kDataSize = sizeof(kData) - 1;
+  constexpr int kHalfDataSize = kDataSize / 2;
+  constexpr int kQuarterDataSize = kHalfDataSize / 2;
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as write only.
+  FileDescriptor outf;
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Read a quarter of the data from the infile which should update the file
+  // offset, we don't actually care about the data so it goes into the garbage.
+  char garbage[kQuarterDataSize];
+  ASSERT_THAT(read(inf.get(), &garbage, kQuarterDataSize),
+              SyscallSucceedsWithValue(kQuarterDataSize));
+
+  // Send data and verify that sendfile returns the correct value.
+  int bytes_sent;
+  EXPECT_THAT(
+      bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize),
+      SyscallSucceedsWithValue(kHalfDataSize));
+
+  // Close out_fd to avoid leak.
+  outf.reset();
+
+  // Open the output file as read only.
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+  // Verify that the output file has the correct data.
+  char actual[kHalfDataSize];
+  ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+              SyscallSucceedsWithValue(kHalfDataSize));
+  EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize),
+            absl::string_view(actual, bytes_sent));
+
+  // Verify that the input file offset has been updated
+  ASSERT_THAT(read(inf.get(), &actual, kQuarterDataSize),
+              SyscallSucceedsWithValue(kQuarterDataSize));
+
+  EXPECT_EQ(
+      absl::string_view(kData + kDataSize - kQuarterDataSize, kQuarterDataSize),
+      absl::string_view(actual, kQuarterDataSize));
+}
+
+TEST(SendFileTest, SendAndUpdateGivenOffset) {
+  // Create temp files.
+  // Test input string length must be >= 4 AND divisible by 4.
+  constexpr char kData[] = "Or to take Arms against a Sea of troubles,";
+  constexpr int kDataSize = sizeof(kData) + 1;
+  constexpr int kHalfDataSize = kDataSize / 2;
+  constexpr int kQuarterDataSize = kHalfDataSize / 2;
+  constexpr int kThreeFourthsDataSize = 3 * kDataSize / 4;
+
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as write only.
+  FileDescriptor outf;
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Create offset for sending.
+  off_t offset = kQuarterDataSize;
+
+  // Send data and verify that sendfile returns the correct value.
+  int bytes_sent;
+  EXPECT_THAT(
+      bytes_sent = sendfile(outf.get(), inf.get(), &offset, kHalfDataSize),
+      SyscallSucceedsWithValue(kHalfDataSize));
+
+  // Close out_fd to avoid leak.
+  outf.reset();
+
+  // Open the output file as read only.
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+  // Verify that the output file has the correct data.
+  char actual[kHalfDataSize];
+  ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+              SyscallSucceedsWithValue(kHalfDataSize));
+  EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize),
+            absl::string_view(actual, bytes_sent));
+
+  // Verify that the input file offset has NOT been updated.
+  ASSERT_THAT(read(inf.get(), &actual, kHalfDataSize),
+              SyscallSucceedsWithValue(kHalfDataSize));
+  EXPECT_EQ(absl::string_view(kData, kHalfDataSize),
+            absl::string_view(actual, kHalfDataSize));
+
+  // Verify that the offset pointer has been updated.
+  EXPECT_EQ(offset, kThreeFourthsDataSize);
+}
+
+TEST(SendFileTest, DoNotSendfileIfOutfileIsAppendOnly) {
+  // Create temp files.
+  constexpr char kData[] = "And by opposing end them: to die, to sleep";
+  constexpr int kDataSize = sizeof(kData) - 1;
+
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as append only.
+  const FileDescriptor outf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY | O_APPEND));
+
+  // Send data and verify that sendfile returns the correct errno.
+  EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SendFileTest, AppendCheckOrdering) {
+  constexpr char kData[] = "And by opposing end them: to die, to sleep";
+  constexpr int kDataSize = sizeof(kData) - 1;
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+
+  const FileDescriptor read =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+  const FileDescriptor write =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY));
+  const FileDescriptor append =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_APPEND));
+
+  // Check that read/write file mode is verified before append.
+  EXPECT_THAT(sendfile(append.get(), read.get(), nullptr, kDataSize),
+              SyscallFailsWithErrno(EBADF));
+  EXPECT_THAT(sendfile(write.get(), write.get(), nullptr, kDataSize),
+              SyscallFailsWithErrno(EBADF));
+}
+
+TEST(SendFileTest, DoNotSendfileIfOutfileIsNotWritable) {
+  // Create temp files.
+  constexpr char kData[] = "No more; and by a sleep, to say we end";
+  constexpr int kDataSize = sizeof(kData) - 1;
+
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as read only.
+  const FileDescriptor outf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+  // Send data and verify that sendfile returns the correct errno.
+  EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+              SyscallFailsWithErrno(EBADF));
+}
+
+TEST(SendFileTest, DoNotSendfileIfInfileIsNotReadable) {
+  // Create temp files.
+  constexpr char kData[] = "the heart-ache, and the thousand natural shocks";
+  constexpr int kDataSize = sizeof(kData) - 1;
+
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as write only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_WRONLY));
+
+  // Open the output file as write only.
+  const FileDescriptor outf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Send data and verify that sendfile returns the correct errno.
+  EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+              SyscallFailsWithErrno(EBADF));
+}
+
+TEST(SendFileTest, DoNotSendANegativeNumberOfBytes) {
+  // Create temp files.
+  constexpr char kData[] = "that Flesh is heir to? 'Tis a consummation";
+
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as write only.
+  const FileDescriptor outf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Send data and verify that sendfile returns the correct errno.
+  EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, -1),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SendFileTest, SendTheCorrectNumberOfBytesEvenIfWeTryToSendTooManyBytes) {
+  // Create temp files.
+  constexpr char kData[] = "devoutly to be wished. To die, to sleep,";
+  constexpr int kDataSize = sizeof(kData) - 1;
+
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as write only.
+  FileDescriptor outf;
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Send data and verify that sendfile returns the correct value.
+  int bytes_sent;
+  EXPECT_THAT(
+      bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize + 100),
+      SyscallSucceedsWithValue(kDataSize));
+
+  // Close outf to avoid leak.
+  outf.reset();
+
+  // Open the output file as read only.
+  outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+  // Verify that the output file has the correct data.
+  char actual[kDataSize];
+  ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+              SyscallSucceedsWithValue(kDataSize));
+  EXPECT_EQ(kData, absl::string_view(actual, bytes_sent));
+}
+
+TEST(SendFileTest, SendToNotARegularFile) {
+  // Make temp input directory and open as read only.
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY));
+
+  // Make temp output file and open as write only.
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor outf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Receive an error since a directory is not a regular file.
+  EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SendFileTest, SendPipeWouldBlock) {
+  // Create temp file.
+  constexpr char kData[] =
+      "The fool doth think he is wise, but the wise man knows himself to be a "
+      "fool.";
+  constexpr int kDataSize = sizeof(kData) - 1;
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Setup the output named pipe.
+  int fds[2];
+  ASSERT_THAT(pipe2(fds, O_NONBLOCK), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill up the pipe's buffer.
+  int pipe_size = -1;
+  ASSERT_THAT(pipe_size = fcntl(wfd.get(), F_GETPIPE_SZ), SyscallSucceeds());
+  std::vector<char> buf(2 * pipe_size);
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(pipe_size));
+
+  EXPECT_THAT(sendfile(wfd.get(), inf.get(), nullptr, kDataSize),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST(SendFileTest, SendPipeBlocks) {
+  // Create temp file.
+  constexpr char kData[] =
+      "The fault, dear Brutus, is not in our stars, but in ourselves.";
+  constexpr int kDataSize = sizeof(kData) - 1;
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Setup the output named pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill up the pipe's buffer.
+  int pipe_size = -1;
+  ASSERT_THAT(pipe_size = fcntl(wfd.get(), F_GETPIPE_SZ), SyscallSucceeds());
+  std::vector<char> buf(pipe_size);
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(pipe_size));
+
+  ScopedThread t([&]() {
+    absl::SleepFor(absl::Milliseconds(100));
+    ASSERT_THAT(read(rfd.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(pipe_size));
+  });
+
+  EXPECT_THAT(sendfile(wfd.get(), inf.get(), nullptr, kDataSize),
+              SyscallSucceedsWithValue(kDataSize));
+}
+
+TEST(SendFileTest, SendToSpecialFile) {
+  // Create temp file.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+  constexpr int kSize = 0x7ff;
+  ASSERT_THAT(ftruncate(inf.get(), kSize), SyscallSucceeds());
+
+  auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+
+  // eventfd can accept a number of bytes which is a multiple of 8.
+  EXPECT_THAT(sendfile(eventfd.get(), inf.get(), nullptr, 0xfffff),
+              SyscallSucceedsWithValue(kSize & (~7)));
+}
+
+TEST(SendFileTest, SendFileToPipe) {
+  // Create temp file.
+  constexpr char kData[] = "<insert-quote-here>";
+  constexpr int kDataSize = sizeof(kData) - 1;
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Create a pipe for sending to a pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Expect to read up to the given size.
+  std::vector<char> buf(kDataSize);
+  ScopedThread t([&]() {
+    absl::SleepFor(absl::Milliseconds(100));
+    ASSERT_THAT(read(rfd.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(kDataSize));
+  });
+
+  // Send with twice the size of the file, which should hit EOF.
+  EXPECT_THAT(sendfile(wfd.get(), inf.get(), nullptr, kDataSize * 2),
+              SyscallSucceedsWithValue(kDataSize));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sendfile_socket.cc b/test/syscalls/linux/sendfile_socket.cc
new file mode 100644
index 000000000..c101fe9d2
--- /dev/null
+++ b/test/syscalls/linux/sendfile_socket.cc
@@ -0,0 +1,231 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/sendfile.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+class SendFileTest : public ::testing::TestWithParam<int> {
+ protected:
+  PosixErrorOr<std::unique_ptr<SocketPair>> Sockets(int type) {
+    // Bind a server socket.
+    int family = GetParam();
+    switch (family) {
+      case AF_INET: {
+        if (type == SOCK_STREAM) {
+          return SocketPairKind{
+              "TCP", AF_INET, type, 0,
+              TCPAcceptBindSocketPairCreator(AF_INET, type, 0, false)}
+              .Create();
+        } else {
+          return SocketPairKind{
+              "UDP", AF_INET, type, 0,
+              UDPBidirectionalBindSocketPairCreator(AF_INET, type, 0, false)}
+              .Create();
+        }
+      }
+      case AF_UNIX: {
+        if (type == SOCK_STREAM) {
+          return SocketPairKind{
+              "UNIX", AF_UNIX, type, 0,
+              FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)}
+              .Create();
+        } else {
+          return SocketPairKind{
+              "UNIX", AF_UNIX, type, 0,
+              FilesystemBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)}
+              .Create();
+        }
+      }
+      default:
+        return PosixError(EINVAL);
+    }
+  }
+};
+
+// Sends large file to exercise the path that read and writes data multiple
+// times, esp. when more data is read than can be written.
+TEST_P(SendFileTest, SendMultiple) {
+  std::vector<char> data(5 * 1024 * 1024);
+  RandomizeBuffer(data.data(), data.size());
+
+  // Create temp files.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::string_view(data.data(), data.size()),
+      TempPath::kDefaultFileMode));
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Create sockets.
+  auto socks = ASSERT_NO_ERRNO_AND_VALUE(Sockets(SOCK_STREAM));
+
+  // Thread that reads data from socket and dumps to a file.
+  ScopedThread th([&] {
+    FileDescriptor outf =
+        ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+    // Read until socket is closed.
+    char buf[10240];
+    for (int cnt = 0;; cnt++) {
+      int r = RetryEINTR(read)(socks->first_fd(), buf, sizeof(buf));
+      // We cannot afford to save on every read() call.
+      if (cnt % 1000 == 0) {
+        ASSERT_THAT(r, SyscallSucceeds());
+      } else {
+        const DisableSave ds;
+        ASSERT_THAT(r, SyscallSucceeds());
+      }
+      if (r == 0) {
+        // EOF
+        break;
+      }
+      int w = RetryEINTR(write)(outf.get(), buf, r);
+      // We cannot afford to save on every write() call.
+      if (cnt % 1010 == 0) {
+        ASSERT_THAT(w, SyscallSucceedsWithValue(r));
+      } else {
+        const DisableSave ds;
+        ASSERT_THAT(w, SyscallSucceedsWithValue(r));
+      }
+    }
+  });
+
+  // Open the input file as read only.
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  int cnt = 0;
+  for (size_t sent = 0; sent < data.size(); cnt++) {
+    const size_t remain = data.size() - sent;
+    std::cout << "sendfile, size=" << data.size() << ", sent=" << sent
+              << ", remain=" << remain << std::endl;
+
+    // Send data and verify that sendfile returns the correct value.
+    int res = sendfile(socks->second_fd(), inf.get(), nullptr, remain);
+    // We cannot afford to save on every sendfile() call.
+    if (cnt % 120 == 0) {
+      MaybeSave();
+    }
+    if (res == 0) {
+      // EOF
+      break;
+    }
+    if (res > 0) {
+      sent += res;
+    } else {
+      ASSERT_TRUE(errno == EINTR || errno == EAGAIN) << "errno=" << errno;
+    }
+  }
+
+  // Close socket to stop thread.
+  close(socks->release_second_fd());
+  th.Join();
+
+  // Verify that the output file has the correct data.
+  const FileDescriptor outf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+  std::vector<char> actual(data.size(), '\0');
+  ASSERT_THAT(RetryEINTR(read)(outf.get(), actual.data(), actual.size()),
+              SyscallSucceedsWithValue(actual.size()));
+  ASSERT_EQ(memcmp(data.data(), actual.data(), data.size()), 0);
+}
+
+TEST_P(SendFileTest, Shutdown) {
+  // Create a socket.
+  auto socks = ASSERT_NO_ERRNO_AND_VALUE(Sockets(SOCK_STREAM));
+
+  // If this is a TCP socket, then turn off linger.
+  if (GetParam() == AF_INET) {
+    struct linger sl;
+    sl.l_onoff = 1;
+    sl.l_linger = 0;
+    ASSERT_THAT(
+        setsockopt(socks->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
+        SyscallSucceeds());
+  }
+
+  // Create a 1m file with random data.
+  std::vector<char> data(1024 * 1024);
+  RandomizeBuffer(data.data(), data.size());
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::string_view(data.data(), data.size()),
+      TempPath::kDefaultFileMode));
+  const FileDescriptor inf =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Read some data, then shutdown the socket. We don't actually care about
+  // checking the contents (other tests do that), so we just re-use the same
+  // buffer as above.
+  ScopedThread t([&]() {
+    size_t done = 0;
+    while (done < data.size()) {
+      int n = RetryEINTR(read)(socks->first_fd(), data.data(), data.size());
+      ASSERT_THAT(n, SyscallSucceeds());
+      done += n;
+    }
+    // Close the server side socket.
+    close(socks->release_first_fd());
+  });
+
+  // Continuously stream from the file to the socket. Note we do not assert
+  // that a specific amount of data has been written at any time, just that some
+  // data is written. Eventually, we should get a connection reset error.
+  while (1) {
+    off_t offset = 0;  // Always read from the start.
+    int n = sendfile(socks->second_fd(), inf.get(), &offset, data.size());
+    EXPECT_THAT(n, AnyOf(SyscallFailsWithErrno(ECONNRESET),
+                         SyscallFailsWithErrno(EPIPE), SyscallSucceeds()));
+    if (n <= 0) {
+      break;
+    }
+  }
+}
+
+TEST_P(SendFileTest, SendpageFromEmptyFileToUDP) {
+  auto socks = ASSERT_NO_ERRNO_AND_VALUE(Sockets(SOCK_DGRAM));
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+  // The value to the count argument has to be so that it is impossible to
+  // allocate a buffer of this size. In Linux, sendfile transfer at most
+  // 0x7ffff000 (MAX_RW_COUNT) bytes.
+  EXPECT_THAT(sendfile(socks->first_fd(), fd.get(), 0x0, 0x8000000000004),
+              SyscallSucceedsWithValue(0));
+}
+
+INSTANTIATE_TEST_SUITE_P(AddressFamily, SendFileTest,
+                         ::testing::Values(AF_UNIX, AF_INET));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc
new file mode 100644
index 000000000..c7fdbb924
--- /dev/null
+++ b/test/syscalls/linux/shm.cc
@@ -0,0 +1,508 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+
+#include "absl/time/clock.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using ::testing::_;
+
+const uint64_t kAllocSize = kPageSize * 128ULL;
+
+PosixErrorOr<char*> Shmat(int shmid, const void* shmaddr, int shmflg) {
+  const intptr_t addr =
+      reinterpret_cast<intptr_t>(shmat(shmid, shmaddr, shmflg));
+  if (addr == -1) {
+    return PosixError(errno, "shmat() failed");
+  }
+  return reinterpret_cast<char*>(addr);
+}
+
+PosixError Shmdt(const char* shmaddr) {
+  const int ret = shmdt(shmaddr);
+  if (ret == -1) {
+    return PosixError(errno, "shmdt() failed");
+  }
+  return NoError();
+}
+
+template <typename T>
+PosixErrorOr<int> Shmctl(int shmid, int cmd, T* buf) {
+  int ret = shmctl(shmid, cmd, reinterpret_cast<struct shmid_ds*>(buf));
+  if (ret == -1) {
+    return PosixError(errno, "shmctl() failed");
+  }
+  return ret;
+}
+
+// ShmSegment is a RAII object for automatically cleaning up shm segments.
+class ShmSegment {
+ public:
+  explicit ShmSegment(int id) : id_(id) {}
+
+  ~ShmSegment() {
+    if (id_ >= 0) {
+      EXPECT_NO_ERRNO(Rmid());
+      id_ = -1;
+    }
+  }
+
+  ShmSegment(ShmSegment&& other) : id_(other.release()) {}
+
+  ShmSegment& operator=(ShmSegment&& other) {
+    id_ = other.release();
+    return *this;
+  }
+
+  ShmSegment(ShmSegment const& other) = delete;
+  ShmSegment& operator=(ShmSegment const& other) = delete;
+
+  int id() const { return id_; }
+
+  int release() {
+    int id = id_;
+    id_ = -1;
+    return id;
+  }
+
+  PosixErrorOr<int> Rmid() {
+    RETURN_IF_ERRNO(Shmctl<void>(id_, IPC_RMID, nullptr));
+    return release();
+  }
+
+ private:
+  int id_ = -1;
+};
+
+PosixErrorOr<int> ShmgetRaw(key_t key, size_t size, int shmflg) {
+  int id = shmget(key, size, shmflg);
+  if (id == -1) {
+    return PosixError(errno, "shmget() failed");
+  }
+  return id;
+}
+
+PosixErrorOr<ShmSegment> Shmget(key_t key, size_t size, int shmflg) {
+  ASSIGN_OR_RETURN_ERRNO(int id, ShmgetRaw(key, size, shmflg));
+  return ShmSegment(id);
+}
+
+TEST(ShmTest, AttachDetach) {
+  const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  struct shmid_ds attr;
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+  EXPECT_EQ(attr.shm_segsz, kAllocSize);
+  EXPECT_EQ(attr.shm_nattch, 0);
+
+  const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+  EXPECT_EQ(attr.shm_nattch, 1);
+
+  const char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+  EXPECT_EQ(attr.shm_nattch, 2);
+
+  ASSERT_NO_ERRNO(Shmdt(addr));
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+  EXPECT_EQ(attr.shm_nattch, 1);
+
+  ASSERT_NO_ERRNO(Shmdt(addr2));
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+  EXPECT_EQ(attr.shm_nattch, 0);
+}
+
+TEST(ShmTest, LookupByKey) {
+  const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const key_t key = ftok(keyfile.path().c_str(), 1);
+  const ShmSegment shm =
+      ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777));
+  const int id2 = ASSERT_NO_ERRNO_AND_VALUE(ShmgetRaw(key, kAllocSize, 0777));
+  EXPECT_EQ(shm.id(), id2);
+}
+
+TEST(ShmTest, DetachedSegmentsPersist) {
+  const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  addr[0] = 'x';
+  ASSERT_NO_ERRNO(Shmdt(addr));
+
+  // We should be able to re-attach to the same segment and get our data back.
+  addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  EXPECT_EQ(addr[0], 'x');
+  ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+TEST(ShmTest, MultipleDetachFails) {
+  const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  ASSERT_NO_ERRNO(Shmdt(addr));
+  EXPECT_THAT(Shmdt(addr), PosixErrorIs(EINVAL, _));
+}
+
+TEST(ShmTest, IpcStat) {
+  const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const key_t key = ftok(keyfile.path().c_str(), 1);
+
+  const time_t start = time(nullptr);
+
+  const ShmSegment shm =
+      ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777));
+
+  const uid_t uid = getuid();
+  const gid_t gid = getgid();
+  const pid_t pid = getpid();
+
+  struct shmid_ds attr;
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+
+  EXPECT_EQ(attr.shm_perm.__key, key);
+  EXPECT_EQ(attr.shm_perm.uid, uid);
+  EXPECT_EQ(attr.shm_perm.gid, gid);
+  EXPECT_EQ(attr.shm_perm.cuid, uid);
+  EXPECT_EQ(attr.shm_perm.cgid, gid);
+  EXPECT_EQ(attr.shm_perm.mode, 0777);
+
+  EXPECT_EQ(attr.shm_segsz, kAllocSize);
+
+  EXPECT_EQ(attr.shm_atime, 0);
+  EXPECT_EQ(attr.shm_dtime, 0);
+
+  // Change time is set on creation.
+  EXPECT_GE(attr.shm_ctime, start);
+
+  EXPECT_EQ(attr.shm_cpid, pid);
+  EXPECT_EQ(attr.shm_lpid, 0);
+
+  EXPECT_EQ(attr.shm_nattch, 0);
+
+  // The timestamps only have a resolution of seconds; slow down so we actually
+  // see the timestamps change.
+  absl::SleepFor(absl::Seconds(1));
+  const time_t pre_attach = time(nullptr);
+
+  const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+
+  EXPECT_GE(attr.shm_atime, pre_attach);
+  EXPECT_EQ(attr.shm_dtime, 0);
+  EXPECT_LT(attr.shm_ctime, pre_attach);
+  EXPECT_EQ(attr.shm_lpid, pid);
+  EXPECT_EQ(attr.shm_nattch, 1);
+
+  absl::SleepFor(absl::Seconds(1));
+  const time_t pre_detach = time(nullptr);
+
+  ASSERT_NO_ERRNO(Shmdt(addr));
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+
+  EXPECT_LT(attr.shm_atime, pre_detach);
+  EXPECT_GE(attr.shm_dtime, pre_detach);
+  EXPECT_LT(attr.shm_ctime, pre_detach);
+  EXPECT_EQ(attr.shm_lpid, pid);
+  EXPECT_EQ(attr.shm_nattch, 0);
+}
+
+TEST(ShmTest, ShmStat) {
+  // This test relies on the segment we create to be the first one on the
+  // system, causing it to occupy slot 1. We can't reasonably expect this on a
+  // general Linux host.
+  SKIP_IF(!IsRunningOnGvisor());
+
+  const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  struct shmid_ds attr;
+  ASSERT_NO_ERRNO(Shmctl(1, SHM_STAT, &attr));
+  // This does the same thing as IPC_STAT, so only test that the syscall
+  // succeeds here.
+}
+
+TEST(ShmTest, IpcInfo) {
+  struct shminfo info;
+  ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info));
+
+  EXPECT_EQ(info.shmmin, 1);  // This is always 1, according to the man page.
+  EXPECT_GT(info.shmmax, info.shmmin);
+  EXPECT_GT(info.shmmni, 0);
+  EXPECT_GT(info.shmseg, 0);
+  EXPECT_GT(info.shmall, 0);
+}
+
+TEST(ShmTest, ShmInfo) {
+  struct shm_info info;
+
+  // We generally can't know what other processes on a linux machine
+  // does with shared memory segments, so we can't test specific
+  // numbers on Linux. When running under gvisor, we're guaranteed to
+  // be the only ones using shm, so we can easily verify machine-wide
+  // numbers.
+  if (IsRunningOnGvisor()) {
+    ASSERT_NO_ERRNO(Shmctl(0, SHM_INFO, &info));
+    EXPECT_EQ(info.used_ids, 0);
+    EXPECT_EQ(info.shm_tot, 0);
+    EXPECT_EQ(info.shm_rss, 0);
+    EXPECT_EQ(info.shm_swp, 0);
+  }
+
+  const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+
+  ASSERT_NO_ERRNO(Shmctl(1, SHM_INFO, &info));
+
+  if (IsRunningOnGvisor()) {
+    ASSERT_NO_ERRNO(Shmctl(shm.id(), SHM_INFO, &info));
+    EXPECT_EQ(info.used_ids, 1);
+    EXPECT_EQ(info.shm_tot, kAllocSize / kPageSize);
+    EXPECT_EQ(info.shm_rss, kAllocSize / kPageSize);
+    EXPECT_EQ(info.shm_swp, 0);  // Gvisor currently never swaps.
+  }
+
+  ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+TEST(ShmTest, ShmCtlSet) {
+  const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+
+  struct shmid_ds attr;
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+  ASSERT_EQ(attr.shm_perm.mode, 0777);
+
+  attr.shm_perm.mode = 0766;
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_SET, &attr));
+
+  ASSERT_NO_ERRNO(Shmctl(shm.id(), IPC_STAT, &attr));
+  ASSERT_EQ(attr.shm_perm.mode, 0766);
+
+  ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+TEST(ShmTest, RemovedSegmentsAreMarkedDeleted) {
+  ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  const int id = ASSERT_NO_ERRNO_AND_VALUE(shm.Rmid());
+  struct shmid_ds attr;
+  ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+  EXPECT_NE(attr.shm_perm.mode & SHM_DEST, 0);
+  ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+TEST(ShmTest, RemovedSegmentsAreDestroyed) {
+  ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+
+  const uint64_t alloc_pages = kAllocSize / kPageSize;
+
+  struct shm_info info;
+  ASSERT_NO_ERRNO(Shmctl(0 /*ignored*/, SHM_INFO, &info));
+  const uint64_t before = info.shm_tot;
+
+  ASSERT_NO_ERRNO(shm.Rmid());
+  ASSERT_NO_ERRNO(Shmdt(addr));
+
+  ASSERT_NO_ERRNO(Shmctl(0 /*ignored*/, SHM_INFO, &info));
+  if (IsRunningOnGvisor()) {
+    // No guarantees on system-wide shm memory usage on a generic linux host.
+    const uint64_t after = info.shm_tot;
+    EXPECT_EQ(after, before - alloc_pages);
+  }
+}
+
+TEST(ShmTest, AllowsAttachToRemovedSegmentWithRefs) {
+  ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  const int id = ASSERT_NO_ERRNO_AND_VALUE(shm.Rmid());
+  const char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+  ASSERT_NO_ERRNO(Shmdt(addr));
+  ASSERT_NO_ERRNO(Shmdt(addr2));
+}
+
+TEST(ShmTest, RemovedSegmentsAreNotDiscoverable) {
+  const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const key_t key = ftok(keyfile.path().c_str(), 1);
+  ShmSegment shm =
+      ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777));
+  ASSERT_NO_ERRNO(shm.Rmid());
+  EXPECT_THAT(Shmget(key, kAllocSize, 0777), PosixErrorIs(ENOENT, _));
+}
+
+TEST(ShmDeathTest, ReadonlySegment) {
+  SetupGvisorDeathTest();
+  const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, SHM_RDONLY));
+  // Reading succeeds.
+  static_cast<void>(addr[0]);
+  // Writing fails.
+  EXPECT_EXIT(addr[0] = 'x', ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+TEST(ShmDeathTest, SegmentNotAccessibleAfterDetach) {
+  // This test is susceptible to races with concurrent mmaps running in parallel
+  // gtest threads since the test relies on the address freed during a shm
+  // segment destruction to remain unused. We run the test body in a forked
+  // child to guarantee a single-threaded context to avoid this.
+
+  SetupGvisorDeathTest();
+
+  const auto rest = [&] {
+    ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+        Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+    char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+
+    // Mark the segment as destroyed so it's automatically cleaned up when we
+    // crash below. We can't rely on the standard cleanup since the destructor
+    // will not run after the SIGSEGV. Note that this doesn't destroy the
+    // segment immediately since we're still attached to it.
+    ASSERT_NO_ERRNO(shm.Rmid());
+
+    addr[0] = 'x';
+    ASSERT_NO_ERRNO(Shmdt(addr));
+
+    // This access should cause a SIGSEGV.
+    addr[0] = 'x';
+  };
+
+  EXPECT_THAT(InForkedProcess(rest),
+              IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV)));
+}
+
+TEST(ShmTest, RequestingSegmentSmallerThanSHMMINFails) {
+  struct shminfo info;
+  ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info));
+  const uint64_t size = info.shmmin - 1;
+  EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777),
+              PosixErrorIs(EINVAL, _));
+}
+
+TEST(ShmTest, RequestingSegmentLargerThanSHMMAXFails) {
+  struct shminfo info;
+  ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info));
+  const uint64_t size = info.shmmax + kPageSize;
+  EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777),
+              PosixErrorIs(EINVAL, _));
+}
+
+TEST(ShmTest, RequestingUnalignedSizeSucceeds) {
+  EXPECT_NO_ERRNO(Shmget(IPC_PRIVATE, 4097, IPC_CREAT | 0777));
+}
+
+TEST(ShmTest, RequestingDuplicateCreationFails) {
+  const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const key_t key = ftok(keyfile.path().c_str(), 1);
+  const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(key, kAllocSize, IPC_CREAT | IPC_EXCL | 0777));
+  EXPECT_THAT(Shmget(key, kAllocSize, IPC_CREAT | IPC_EXCL | 0777),
+              PosixErrorIs(EEXIST, _));
+}
+
+TEST(ShmTest, NonExistentSegmentsAreNotFound) {
+  const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const key_t key = ftok(keyfile.path().c_str(), 1);
+  // Do not request creation.
+  EXPECT_THAT(Shmget(key, kAllocSize, 0777), PosixErrorIs(ENOENT, _));
+}
+
+TEST(ShmTest, SegmentsSizeFixedOnCreation) {
+  const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const key_t key = ftok(keyfile.path().c_str(), 1);
+
+  // Base segment.
+  const ShmSegment shm =
+      ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777));
+
+  // Ask for the same segment at half size. This succeeds.
+  const int id2 =
+      ASSERT_NO_ERRNO_AND_VALUE(ShmgetRaw(key, kAllocSize / 2, 0777));
+
+  // Ask for the same segment at double size.
+  EXPECT_THAT(Shmget(key, kAllocSize * 2, 0777), PosixErrorIs(EINVAL, _));
+
+  char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id2, nullptr, 0));
+
+  // We have 2 different maps...
+  EXPECT_NE(addr, addr2);
+
+  // ... And both maps are kAllocSize bytes; despite asking for a half-sized
+  // segment for the second map.
+  addr[kAllocSize - 1] = 'x';
+  addr2[kAllocSize - 1] = 'x';
+
+  ASSERT_NO_ERRNO(Shmdt(addr));
+  ASSERT_NO_ERRNO(Shmdt(addr2));
+}
+
+TEST(ShmTest, PartialUnmap) {
+  const ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  EXPECT_THAT(munmap(addr + (kAllocSize / 4), kAllocSize / 2),
+              SyscallSucceeds());
+  ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+// Check that sentry does not panic when asked for a zero-length private shm
+// segment. Regression test for b/110694797.
+TEST(ShmTest, GracefullyFailOnZeroLenSegmentCreation) {
+  EXPECT_THAT(Shmget(IPC_PRIVATE, 0, 0), PosixErrorIs(EINVAL, _));
+}
+
+TEST(ShmTest, NoDestructionOfAttachedSegmentWithMultipleRmid) {
+  ShmSegment shm = ASSERT_NO_ERRNO_AND_VALUE(
+      Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+  char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+  char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0));
+
+  // There should be 2 refs to the segment from the 2 attachments, and a single
+  // self-reference. Mark the segment as destroyed more than 3 times through
+  // shmctl(RMID). If there's a bug with the ref counting, this should cause the
+  // count to drop to zero.
+  int id = shm.release();
+  for (int i = 0; i < 6; ++i) {
+    ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr));
+  }
+
+  // Segment should remain accessible.
+  addr[0] = 'x';
+  ASSERT_NO_ERRNO(Shmdt(addr));
+
+  // Segment should remain accessible even after one of the two attachments are
+  // detached.
+  addr2[0] = 'x';
+  ASSERT_NO_ERRNO(Shmdt(addr2));
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sigaction.cc b/test/syscalls/linux/sigaction.cc
new file mode 100644
index 000000000..9d9dd57a8
--- /dev/null
+++ b/test/syscalls/linux/sigaction.cc
@@ -0,0 +1,79 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/syscall.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SigactionTest, GetLessThanOrEqualToZeroFails) {
+  struct sigaction act = {};
+  ASSERT_THAT(sigaction(-1, nullptr, &act), SyscallFailsWithErrno(EINVAL));
+  ASSERT_THAT(sigaction(0, nullptr, &act), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, SetLessThanOrEqualToZeroFails) {
+  struct sigaction act = {};
+  ASSERT_THAT(sigaction(0, &act, nullptr), SyscallFailsWithErrno(EINVAL));
+  ASSERT_THAT(sigaction(0, &act, nullptr), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, GetGreaterThanMaxFails) {
+  struct sigaction act = {};
+  ASSERT_THAT(sigaction(SIGRTMAX + 1, nullptr, &act),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, SetGreaterThanMaxFails) {
+  struct sigaction act = {};
+  ASSERT_THAT(sigaction(SIGRTMAX + 1, &act, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, SetSigkillFails) {
+  struct sigaction act = {};
+  ASSERT_THAT(sigaction(SIGKILL, nullptr, &act), SyscallSucceeds());
+  ASSERT_THAT(sigaction(SIGKILL, &act, nullptr), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, SetSigstopFails) {
+  struct sigaction act = {};
+  ASSERT_THAT(sigaction(SIGSTOP, nullptr, &act), SyscallSucceeds());
+  ASSERT_THAT(sigaction(SIGSTOP, &act, nullptr), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, BadSigsetFails) {
+  constexpr size_t kWrongSigSetSize = 43;
+
+  struct sigaction act = {};
+
+  // The syscall itself (rather than the libc wrapper) takes the sigset_t size.
+  ASSERT_THAT(
+      syscall(SYS_rt_sigaction, SIGTERM, nullptr, &act, kWrongSigSetSize),
+      SyscallFailsWithErrno(EINVAL));
+  ASSERT_THAT(
+      syscall(SYS_rt_sigaction, SIGTERM, &act, nullptr, kWrongSigSetSize),
+      SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sigaltstack.cc b/test/syscalls/linux/sigaltstack.cc
new file mode 100644
index 000000000..24e7c4960
--- /dev/null
+++ b/test/syscalls/linux/sigaltstack.cc
@@ -0,0 +1,268 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <functional>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/util/cleanup.h"
+#include "test/util/fs_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<Cleanup> ScopedSigaltstack(stack_t const& stack) {
+  stack_t old_stack;
+  int rc = sigaltstack(&stack, &old_stack);
+  MaybeSave();
+  if (rc < 0) {
+    return PosixError(errno, "sigaltstack failed");
+  }
+  return Cleanup([old_stack] {
+    EXPECT_THAT(sigaltstack(&old_stack, nullptr), SyscallSucceeds());
+  });
+}
+
+volatile bool got_signal = false;
+volatile int sigaltstack_errno = 0;
+volatile int ss_flags = 0;
+
+void sigaltstack_handler(int sig, siginfo_t* siginfo, void* arg) {
+  got_signal = true;
+
+  stack_t stack;
+  int ret = sigaltstack(nullptr, &stack);
+  MaybeSave();
+  if (ret < 0) {
+    sigaltstack_errno = errno;
+    return;
+  }
+  ss_flags = stack.ss_flags;
+}
+
+TEST(SigaltstackTest, Success) {
+  std::vector<char> stack_mem(SIGSTKSZ);
+  stack_t stack = {};
+  stack.ss_sp = stack_mem.data();
+  stack.ss_size = stack_mem.size();
+  auto const cleanup_sigstack =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack));
+
+  struct sigaction sa = {};
+  sa.sa_sigaction = sigaltstack_handler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+  auto const cleanup_sa =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa));
+
+  // Send signal to this thread, as sigaltstack is per-thread.
+  EXPECT_THAT(tgkill(getpid(), gettid(), SIGUSR1), SyscallSucceeds());
+
+  EXPECT_TRUE(got_signal);
+  EXPECT_EQ(sigaltstack_errno, 0);
+  EXPECT_NE(0, ss_flags & SS_ONSTACK);
+}
+
+TEST(SigaltstackTest, ResetByExecve) {
+  std::vector<char> stack_mem(SIGSTKSZ);
+  stack_t stack = {};
+  stack.ss_sp = stack_mem.data();
+  stack.ss_size = stack_mem.size();
+  auto const cleanup_sigstack =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack));
+
+  std::string full_path = RunfilePath("test/syscalls/linux/sigaltstack_check");
+
+  pid_t child_pid = -1;
+  int execve_errno = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec(full_path, {"sigaltstack_check"}, {}, nullptr, &child_pid,
+                  &execve_errno));
+
+  ASSERT_GT(child_pid, 0);
+  ASSERT_EQ(execve_errno, 0);
+
+  int status = 0;
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  ASSERT_TRUE(WIFEXITED(status));
+  ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
+volatile bool badhandler_on_sigaltstack = true;      // Set by the handler.
+char* volatile badhandler_low_water_mark = nullptr;  // Set by the handler.
+volatile uint8_t badhandler_recursive_faults = 0;    // Consumed by the handler.
+
+void badhandler(int sig, siginfo_t* siginfo, void* arg) {
+  char stack_var = 0;
+  char* current_ss = &stack_var;
+
+  stack_t stack;
+  int ret = sigaltstack(nullptr, &stack);
+  if (ret < 0 || (stack.ss_flags & SS_ONSTACK) != SS_ONSTACK) {
+    // We should always be marked as being on the stack. Don't allow this to hit
+    // the bottom if this is ever not true (the main test will fail as a
+    // result, but we still need to unwind the recursive faults).
+    badhandler_on_sigaltstack = false;
+  }
+  if (current_ss < badhandler_low_water_mark) {
+    // Record the low point for the signal stack. We never expected this to be
+    // before stack bottom, but this is asserted in the actual test.
+    badhandler_low_water_mark = current_ss;
+  }
+  if (badhandler_recursive_faults > 0) {
+    badhandler_recursive_faults--;
+    Fault();
+  }
+  FixupFault(reinterpret_cast<ucontext_t*>(arg));
+}
+
+TEST(SigaltstackTest, WalksOffBottom) {
+  // This test marks the upper half of the stack_mem array as the signal stack.
+  // It asserts that when a fault occurs in the handler (already on the signal
+  // stack), we eventually continue to fault our way off the stack. We should
+  // not revert to the top of the signal stack when we fall off the bottom and
+  // the signal stack should remain "in use". When we fall off the signal stack,
+  // we should have an unconditional signal delivered and not start using the
+  // first part of the stack_mem array.
+  std::vector<char> stack_mem(SIGSTKSZ * 2);
+  stack_t stack = {};
+  stack.ss_sp = stack_mem.data() + SIGSTKSZ;  // See above: upper half.
+  stack.ss_size = SIGSTKSZ;                   // Only one half the array.
+  auto const cleanup_sigstack =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack));
+
+  // Setup the handler: this must be for SIGSEGV, and it must allow proper
+  // nesting (no signal mask, no defer) so that we can trigger multiple times.
+  //
+  // When we walk off the bottom of the signal stack and force signal delivery
+  // of a SIGSEGV, the handler will revert to the default behavior (kill).
+  struct sigaction sa = {};
+  sa.sa_sigaction = badhandler;
+  sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_NODEFER;
+  auto const cleanup_sa =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+  // Trigger a single fault.
+  badhandler_low_water_mark =
+      static_cast<char*>(stack.ss_sp) + SIGSTKSZ;  // Expected top.
+  badhandler_recursive_faults = 0;                 // Disable refault.
+  Fault();
+  EXPECT_TRUE(badhandler_on_sigaltstack);
+  EXPECT_THAT(sigaltstack(nullptr, &stack), SyscallSucceeds());
+  EXPECT_EQ(stack.ss_flags & SS_ONSTACK, 0);
+  EXPECT_LT(badhandler_low_water_mark,
+            reinterpret_cast<char*>(stack.ss_sp) + 2 * SIGSTKSZ);
+  EXPECT_GT(badhandler_low_water_mark, reinterpret_cast<char*>(stack.ss_sp));
+
+  // Trigger two faults.
+  char* prev_low_water_mark = badhandler_low_water_mark;  // Previous top.
+  badhandler_recursive_faults = 1;                        // One refault.
+  Fault();
+  ASSERT_TRUE(badhandler_on_sigaltstack);
+  EXPECT_THAT(sigaltstack(nullptr, &stack), SyscallSucceeds());
+  EXPECT_EQ(stack.ss_flags & SS_ONSTACK, 0);
+  EXPECT_LT(badhandler_low_water_mark, prev_low_water_mark);
+  EXPECT_GT(badhandler_low_water_mark, reinterpret_cast<char*>(stack.ss_sp));
+
+  // Calculate the stack growth for a fault, and set the recursive faults to
+  // ensure that the signal handler stack required exceeds our marked stack area
+  // by a minimal amount. It should remain in the valid stack_mem area so that
+  // we can test the signal is forced merely by going out of the signal stack
+  // bounds, not by a genuine fault.
+  uintptr_t frame_size =
+      static_cast<uintptr_t>(prev_low_water_mark - badhandler_low_water_mark);
+  badhandler_recursive_faults = (SIGSTKSZ + frame_size) / frame_size;
+  EXPECT_EXIT(Fault(), ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+volatile int setonstack_retval = 0;  // Set by the handler.
+volatile int setonstack_errno = 0;   // Set by the handler.
+
+void setonstack(int sig, siginfo_t* siginfo, void* arg) {
+  char stack_mem[SIGSTKSZ];
+  stack_t stack = {};
+  stack.ss_sp = &stack_mem[0];
+  stack.ss_size = SIGSTKSZ;
+  setonstack_retval = sigaltstack(&stack, nullptr);
+  setonstack_errno = errno;
+  FixupFault(reinterpret_cast<ucontext_t*>(arg));
+}
+
+TEST(SigaltstackTest, SetWhileOnStack) {
+  // Reserve twice as much stack here, since the handler will allocate a vector
+  // of size SIGTKSZ and attempt to set the sigaltstack to that value.
+  std::vector<char> stack_mem(2 * SIGSTKSZ);
+  stack_t stack = {};
+  stack.ss_sp = stack_mem.data();
+  stack.ss_size = stack_mem.size();
+  auto const cleanup_sigstack =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack));
+
+  // See above.
+  struct sigaction sa = {};
+  sa.sa_sigaction = setonstack;
+  sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+  auto const cleanup_sa =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+  // Trigger a fault.
+  Fault();
+
+  // The set should have failed.
+  EXPECT_EQ(setonstack_retval, -1);
+  EXPECT_EQ(setonstack_errno, EPERM);
+}
+
+TEST(SigaltstackTest, SetCurrentStack) {
+  // This is executed as an exit test because once the signal stack is set to
+  // the local stack, there's no good way to unwind. We don't want to taint the
+  // test of any other tests that might run within this process.
+  EXPECT_EXIT(
+      {
+        char stack_value = 0;
+        stack_t stack = {};
+        stack.ss_sp = &stack_value - kPageSize;  // Lower than current level.
+        stack.ss_size = 2 * kPageSize;  // => &stack_value +/- kPageSize.
+        TEST_CHECK(sigaltstack(&stack, nullptr) == 0);
+        TEST_CHECK(sigaltstack(nullptr, &stack) == 0);
+        TEST_CHECK((stack.ss_flags & SS_ONSTACK) != 0);
+
+        // Should not be able to change the stack (even no-op).
+        TEST_CHECK(sigaltstack(&stack, nullptr) == -1 && errno == EPERM);
+
+        // Should not be able to disable the stack.
+        stack.ss_flags = SS_DISABLE;
+        TEST_CHECK(sigaltstack(&stack, nullptr) == -1 && errno == EPERM);
+        exit(0);
+      },
+      ::testing::ExitedWithCode(0), "");
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sigaltstack_check.cc b/test/syscalls/linux/sigaltstack_check.cc
new file mode 100644
index 000000000..5ac1b661d
--- /dev/null
+++ b/test/syscalls/linux/sigaltstack_check.cc
@@ -0,0 +1,33 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Checks that there is no alternate signal stack by default.
+//
+// Used by a test in sigaltstack.cc.
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "test/util/logging.h"
+
+int main(int /* argc */, char** /* argv */) {
+  stack_t stack;
+  TEST_CHECK(sigaltstack(nullptr, &stack) >= 0);
+  TEST_CHECK(stack.ss_flags == SS_DISABLE);
+  TEST_CHECK(stack.ss_sp == 0);
+  TEST_CHECK(stack.ss_size == 0);
+  return 0;
+}
diff --git a/test/syscalls/linux/sigiret.cc b/test/syscalls/linux/sigiret.cc
new file mode 100644
index 000000000..6227774a4
--- /dev/null
+++ b/test/syscalls/linux/sigiret.cc
@@ -0,0 +1,136 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr uint64_t kOrigRcx = 0xdeadbeeffacefeed;
+constexpr uint64_t kOrigR11 = 0xfacefeedbaad1dea;
+
+volatile int gotvtalrm, ready;
+
+void sigvtalrm(int sig, siginfo_t* siginfo, void* _uc) {
+  ucontext_t* uc = reinterpret_cast<ucontext_t*>(_uc);
+
+  // Verify that:
+  // - test is in the busy-wait loop waiting for signal.
+  // - %rcx and %r11 values in mcontext_t match kOrigRcx and kOrigR11.
+  if (ready &&
+      static_cast<uint64_t>(uc->uc_mcontext.gregs[REG_RCX]) == kOrigRcx &&
+      static_cast<uint64_t>(uc->uc_mcontext.gregs[REG_R11]) == kOrigR11) {
+    // Modify the values %rcx and %r11 in the ucontext. These are the
+    // values seen by the application after the signal handler returns.
+    uc->uc_mcontext.gregs[REG_RCX] = ~kOrigRcx;
+    uc->uc_mcontext.gregs[REG_R11] = ~kOrigR11;
+    gotvtalrm = 1;
+  }
+}
+
+TEST(SigIretTest, CheckRcxR11) {
+  // Setup signal handler for SIGVTALRM.
+  struct sigaction sa = {};
+  sigfillset(&sa.sa_mask);
+  sa.sa_sigaction = sigvtalrm;
+  sa.sa_flags = SA_SIGINFO;
+  auto const action_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGVTALRM, sa));
+
+  auto const mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGVTALRM));
+
+  // Setup itimer to fire after 500 msecs.
+  struct itimerval itimer = {};
+  itimer.it_value.tv_usec = 500 * 1000;  // 500 msecs.
+  auto const timer_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_VIRTUAL, itimer));
+
+  // Initialize %rcx and %r11 and spin until the signal handler returns.
+  uint64_t rcx = kOrigRcx;
+  uint64_t r11 = kOrigR11;
+  asm volatile(
+      "movq %[rcx], %%rcx;"                      // %rcx = rcx
+      "movq %[r11], %%r11;"                      // %r11 = r11
+      "movl $1, %[ready];"                       // ready = 1
+      "1: pause; cmpl $0, %[gotvtalrm]; je 1b;"  // while (!gotvtalrm);
+      "movq %%rcx, %[rcx];"                      // rcx = %rcx
+      "movq %%r11, %[r11];"                      // r11 = %r11
+      : [ ready ] "=m"(ready), [ rcx ] "+m"(rcx), [ r11 ] "+m"(r11)
+      : [ gotvtalrm ] "m"(gotvtalrm)
+      : "cc", "memory", "rcx", "r11");
+
+  // If sigreturn(2) returns via 'sysret' then %rcx and %r11 will be
+  // clobbered and set to 'ptregs->rip' and 'ptregs->rflags' respectively.
+  //
+  // The following check verifies that %rcx and %r11 were not clobbered
+  // when returning from the signal handler (via sigreturn(2)).
+  EXPECT_EQ(rcx, ~kOrigRcx);
+  EXPECT_EQ(r11, ~kOrigR11);
+}
+
+constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000;
+
+// Test that a non-canonical signal handler faults as expected.
+TEST(SigIretTest, BadHandler) {
+  struct sigaction sa = {};
+  sa.sa_sigaction =
+      reinterpret_cast<void (*)(int, siginfo_t*, void*)>(kNonCanonicalRip);
+  auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa));
+
+  pid_t pid = fork();
+  if (pid == 0) {
+    // Child, wait for signal.
+    while (1) {
+      pause();
+    }
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  EXPECT_THAT(kill(pid, SIGUSR1), SyscallSucceeds());
+
+  int status;
+  EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV)
+      << "status = " << status;
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  // SigIretTest.CheckRcxR11 depends on delivering SIGVTALRM to the main thread.
+  // Block SIGVTALRM so that any other threads created by TestInit will also
+  // have SIGVTALRM blocked.
+  sigset_t set;
+  sigemptyset(&set);
+  sigaddset(&set, SIGVTALRM);
+  TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+
+  gvisor::testing::TestInit(&argc, &argv);
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/signalfd.cc b/test/syscalls/linux/signalfd.cc
new file mode 100644
index 000000000..389e5fca2
--- /dev/null
+++ b/test/syscalls/linux/signalfd.cc
@@ -0,0 +1,373 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/signalfd.h>
+#include <unistd.h>
+
+#include <functional>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/synchronization/mutex.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+using ::testing::KilledBySignal;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr int kSigno = SIGUSR1;
+constexpr int kSignoMax = 64;  // SIGRTMAX
+constexpr int kSignoAlt = SIGUSR2;
+
+// Returns a new signalfd.
+inline PosixErrorOr<FileDescriptor> NewSignalFD(sigset_t* mask, int flags = 0) {
+  int fd = signalfd(-1, mask, flags);
+  MaybeSave();
+  if (fd < 0) {
+    return PosixError(errno, "signalfd");
+  }
+  return FileDescriptor(fd);
+}
+
+class SignalfdTest : public ::testing::TestWithParam<int> {};
+
+TEST_P(SignalfdTest, Basic) {
+  int signo = GetParam();
+  // Create the signalfd.
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, signo);
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0));
+
+  // Deliver the blocked signal.
+  const auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo));
+  ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds());
+
+  // We should now read the signal.
+  struct signalfd_siginfo rbuf;
+  ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(sizeof(rbuf)));
+  EXPECT_EQ(rbuf.ssi_signo, signo);
+}
+
+TEST_P(SignalfdTest, MaskWorks) {
+  int signo = GetParam();
+  // Create two signalfds with different masks.
+  sigset_t mask1, mask2;
+  sigemptyset(&mask1);
+  sigemptyset(&mask2);
+  sigaddset(&mask1, signo);
+  sigaddset(&mask2, kSignoAlt);
+  FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask1, 0));
+  FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask2, 0));
+
+  // Deliver the two signals.
+  const auto scoped_sigmask1 =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo));
+  const auto scoped_sigmask2 =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, kSignoAlt));
+  ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds());
+  ASSERT_THAT(tgkill(getpid(), gettid(), kSignoAlt), SyscallSucceeds());
+
+  // We should see the signals on the appropriate signalfds.
+  //
+  // We read in the opposite order as the signals deliver above, to ensure that
+  // we don't happen to read the correct signal from the correct signalfd.
+  struct signalfd_siginfo rbuf1, rbuf2;
+  ASSERT_THAT(read(fd2.get(), &rbuf2, sizeof(rbuf2)),
+              SyscallSucceedsWithValue(sizeof(rbuf2)));
+  EXPECT_EQ(rbuf2.ssi_signo, kSignoAlt);
+  ASSERT_THAT(read(fd1.get(), &rbuf1, sizeof(rbuf1)),
+              SyscallSucceedsWithValue(sizeof(rbuf1)));
+  EXPECT_EQ(rbuf1.ssi_signo, signo);
+}
+
+TEST(Signalfd, Cloexec) {
+  // Exec tests confirm that O_CLOEXEC has the intended effect. We just create a
+  // signalfd with the appropriate flag here and assert that the FD has it set.
+  sigset_t mask;
+  sigemptyset(&mask);
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_CLOEXEC));
+  EXPECT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+}
+
+TEST_P(SignalfdTest, Blocking) {
+  int signo = GetParam();
+  // Create the signalfd in blocking mode.
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, signo);
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0));
+
+  // Shared tid variable.
+  absl::Mutex mu;
+  bool has_tid;
+  pid_t tid;
+
+  // Start a thread reading.
+  ScopedThread t([&] {
+    // Copy the tid and notify the caller.
+    {
+      absl::MutexLock ml(&mu);
+      tid = gettid();
+      has_tid = true;
+    }
+
+    // Read the signal from the signalfd.
+    struct signalfd_siginfo rbuf;
+    ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+                SyscallSucceedsWithValue(sizeof(rbuf)));
+    EXPECT_EQ(rbuf.ssi_signo, signo);
+  });
+
+  // Wait until blocked.
+  absl::MutexLock ml(&mu);
+  mu.Await(absl::Condition(&has_tid));
+
+  // Deliver the signal to either the waiting thread, or
+  // to this thread. N.B. this is a bug in the core gVisor
+  // behavior for signalfd, and needs to be fixed.
+  //
+  // See gvisor.dev/issue/139.
+  if (IsRunningOnGvisor()) {
+    ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds());
+  } else {
+    ASSERT_THAT(tgkill(getpid(), tid, signo), SyscallSucceeds());
+  }
+
+  // Ensure that it was received.
+  t.Join();
+}
+
+TEST_P(SignalfdTest, ThreadGroup) {
+  int signo = GetParam();
+  // Create the signalfd in blocking mode.
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, signo);
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0));
+
+  // Shared variable.
+  absl::Mutex mu;
+  bool first = false;
+  bool second = false;
+
+  // Start a thread reading.
+  ScopedThread t([&] {
+    // Read the signal from the signalfd.
+    struct signalfd_siginfo rbuf;
+    ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+                SyscallSucceedsWithValue(sizeof(rbuf)));
+    EXPECT_EQ(rbuf.ssi_signo, signo);
+
+    // Wait for the other thread.
+    absl::MutexLock ml(&mu);
+    first = true;
+    mu.Await(absl::Condition(&second));
+  });
+
+  // Deliver the signal to the threadgroup.
+  ASSERT_THAT(kill(getpid(), signo), SyscallSucceeds());
+
+  // Wait for the first thread to process.
+  {
+    absl::MutexLock ml(&mu);
+    mu.Await(absl::Condition(&first));
+  }
+
+  // Deliver to the thread group again (other thread still exists).
+  ASSERT_THAT(kill(getpid(), signo), SyscallSucceeds());
+
+  // Ensure that we can also receive it.
+  struct signalfd_siginfo rbuf;
+  ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(sizeof(rbuf)));
+  EXPECT_EQ(rbuf.ssi_signo, signo);
+
+  // Mark the test as done.
+  {
+    absl::MutexLock ml(&mu);
+    second = true;
+  }
+
+  // The other thread should be joinable.
+  t.Join();
+}
+
+TEST_P(SignalfdTest, Nonblock) {
+  int signo = GetParam();
+  // Create the signalfd in non-blocking mode.
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, signo);
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_NONBLOCK));
+
+  // We should return if we attempt to read.
+  struct signalfd_siginfo rbuf;
+  ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Block and deliver the signal.
+  const auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo));
+  ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds());
+
+  // Ensure that a read actually works.
+  ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(sizeof(rbuf)));
+  EXPECT_EQ(rbuf.ssi_signo, signo);
+
+  // Should block again.
+  EXPECT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(SignalfdTest, SetMask) {
+  int signo = GetParam();
+  // Create the signalfd matching nothing.
+  sigset_t mask;
+  sigemptyset(&mask);
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_NONBLOCK));
+
+  // Block and deliver a signal.
+  const auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo));
+  ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds());
+
+  // We should have nothing.
+  struct signalfd_siginfo rbuf;
+  ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Change the signal mask.
+  sigaddset(&mask, signo);
+  ASSERT_THAT(signalfd(fd.get(), &mask, 0), SyscallSucceeds());
+
+  // We should now have the signal.
+  ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(sizeof(rbuf)));
+  EXPECT_EQ(rbuf.ssi_signo, signo);
+}
+
+TEST_P(SignalfdTest, Poll) {
+  int signo = GetParam();
+  // Create the signalfd.
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, signo);
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0));
+
+  // Block the signal, and start a thread to deliver it.
+  const auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo));
+  pid_t orig_tid = gettid();
+  ScopedThread t([&] {
+    absl::SleepFor(absl::Seconds(5));
+    ASSERT_THAT(tgkill(getpid(), orig_tid, signo), SyscallSucceeds());
+  });
+
+  // Start polling for the signal. We expect that it is not available at the
+  // outset, but then becomes available when the signal is sent. We give a
+  // timeout of 10000ms (or the delay above + 5 seconds of additional grace
+  // time).
+  struct pollfd poll_fd = {fd.get(), POLLIN, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
+              SyscallSucceedsWithValue(1));
+
+  // Actually read the signal to prevent delivery.
+  struct signalfd_siginfo rbuf;
+  EXPECT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)),
+              SyscallSucceedsWithValue(sizeof(rbuf)));
+}
+
+std::string PrintSigno(::testing::TestParamInfo<int> info) {
+  switch (info.param) {
+    case kSigno:
+      return "kSigno";
+    case kSignoMax:
+      return "kSignoMax";
+    default:
+      return absl::StrCat(info.param);
+  }
+}
+INSTANTIATE_TEST_SUITE_P(Signalfd, SignalfdTest,
+                         ::testing::Values(kSigno, kSignoMax), PrintSigno);
+
+TEST(Signalfd, Ppoll) {
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, SIGKILL);
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_CLOEXEC));
+
+  // Ensure that the given ppoll blocks.
+  struct pollfd pfd = {};
+  pfd.fd = fd.get();
+  pfd.events = POLLIN;
+  struct timespec timeout = {};
+  timeout.tv_sec = 1;
+  EXPECT_THAT(RetryEINTR(ppoll)(&pfd, 1, &timeout, &mask),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST(Signalfd, KillStillKills) {
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, SIGKILL);
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_CLOEXEC));
+
+  // Just because there is a signalfd, we shouldn't see any change in behavior
+  // for unblockable signals. It's easier to test this with SIGKILL.
+  const auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGKILL));
+  EXPECT_EXIT(tgkill(getpid(), gettid(), SIGKILL), KilledBySignal(SIGKILL), "");
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  // These tests depend on delivering signals. Block them up front so that all
+  // other threads created by TestInit will also have them blocked, and they
+  // will not interface with the rest of the test.
+  sigset_t set;
+  sigemptyset(&set);
+  sigaddset(&set, gvisor::testing::kSigno);
+  sigaddset(&set, gvisor::testing::kSignoMax);
+  sigaddset(&set, gvisor::testing::kSignoAlt);
+  TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+
+  gvisor::testing::TestInit(&argc, &argv);
+
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/sigprocmask.cc b/test/syscalls/linux/sigprocmask.cc
new file mode 100644
index 000000000..a603fc1d1
--- /dev/null
+++ b/test/syscalls/linux/sigprocmask.cc
@@ -0,0 +1,269 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <stddef.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Signals numbers used for testing.
+static constexpr int kTestSignal1 = SIGUSR1;
+static constexpr int kTestSignal2 = SIGUSR2;
+
+static int raw_sigprocmask(int how, const sigset_t* set, sigset_t* oldset) {
+  return syscall(SYS_rt_sigprocmask, how, set, oldset, _NSIG / 8);
+}
+
+// count of the number of signals received
+int signal_count[kMaxSignal + 1];
+
+// signal handler increments the signal counter
+void SigHandler(int sig, siginfo_t* info, void* context) {
+  TEST_CHECK(sig > 0 && sig <= kMaxSignal);
+  signal_count[sig] += 1;
+}
+
+// The test fixture saves and restores the signal mask and
+// sets up handlers for kTestSignal1 and kTestSignal2.
+class SigProcMaskTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // Save the current signal mask.
+    EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &mask_),
+                SyscallSucceeds());
+
+    // Setup signal handlers for kTestSignal1 and kTestSignal2.
+    struct sigaction sa;
+    sa.sa_sigaction = SigHandler;
+    sigfillset(&sa.sa_mask);
+    sa.sa_flags = SA_SIGINFO;
+    EXPECT_THAT(sigaction(kTestSignal1, &sa, &sa_test_sig_1_),
+                SyscallSucceeds());
+    EXPECT_THAT(sigaction(kTestSignal2, &sa, &sa_test_sig_2_),
+                SyscallSucceeds());
+
+    // Clear the signal counters.
+    memset(signal_count, 0, sizeof(signal_count));
+  }
+
+  void TearDown() override {
+    // Restore the signal mask.
+    EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &mask_, nullptr),
+                SyscallSucceeds());
+
+    // Restore the signal handlers for kTestSignal1 and kTestSignal2.
+    EXPECT_THAT(sigaction(kTestSignal1, &sa_test_sig_1_, nullptr),
+                SyscallSucceeds());
+    EXPECT_THAT(sigaction(kTestSignal2, &sa_test_sig_2_, nullptr),
+                SyscallSucceeds());
+  }
+
+ private:
+  sigset_t mask_;
+  struct sigaction sa_test_sig_1_;
+  struct sigaction sa_test_sig_2_;
+};
+
+// Both sigsets nullptr should succeed and do nothing.
+TEST_F(SigProcMaskTest, NullAddress) {
+  EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, nullptr, NULL), SyscallSucceeds());
+  EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, nullptr, NULL), SyscallSucceeds());
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, NULL), SyscallSucceeds());
+}
+
+// Bad address for either sigset should fail with EFAULT.
+TEST_F(SigProcMaskTest, BadAddress) {
+  sigset_t* bad_addr = reinterpret_cast<sigset_t*>(-1);
+
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, bad_addr, nullptr),
+              SyscallFailsWithErrno(EFAULT));
+
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, bad_addr),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+// Bad value of the "how" parameter should fail with EINVAL.
+TEST_F(SigProcMaskTest, BadParameter) {
+  int bad_param_1 = -1;
+  int bad_param_2 = 42;
+
+  sigset_t set1;
+  sigemptyset(&set1);
+
+  EXPECT_THAT(raw_sigprocmask(bad_param_1, &set1, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+
+  EXPECT_THAT(raw_sigprocmask(bad_param_2, &set1, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Check that we can get the current signal mask.
+TEST_F(SigProcMaskTest, GetMask) {
+  sigset_t set1;
+  sigset_t set2;
+
+  sigemptyset(&set1);
+  sigfillset(&set2);
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &set1), SyscallSucceeds());
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &set2), SyscallSucceeds());
+  EXPECT_THAT(set1, EqualsSigset(set2));
+}
+
+// Check that we can set the signal mask.
+TEST_F(SigProcMaskTest, SetMask) {
+  sigset_t actual;
+  sigset_t expected;
+
+  // Try to mask all signals
+  sigfillset(&expected);
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr),
+              SyscallSucceeds());
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+              SyscallSucceeds());
+  // sigprocmask() should have silently ignored SIGKILL and SIGSTOP.
+  sigdelset(&expected, SIGSTOP);
+  sigdelset(&expected, SIGKILL);
+  EXPECT_THAT(actual, EqualsSigset(expected));
+
+  // Try to clear the signal mask
+  sigemptyset(&expected);
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr),
+              SyscallSucceeds());
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+              SyscallSucceeds());
+  EXPECT_THAT(actual, EqualsSigset(expected));
+
+  // Try to set a mask with one signal.
+  sigemptyset(&expected);
+  sigaddset(&expected, kTestSignal1);
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr),
+              SyscallSucceeds());
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+              SyscallSucceeds());
+  EXPECT_THAT(actual, EqualsSigset(expected));
+}
+
+// Check that we can add and remove signals.
+TEST_F(SigProcMaskTest, BlockUnblock) {
+  sigset_t actual;
+  sigset_t expected;
+
+  // Try to set a mask with one signal.
+  sigemptyset(&expected);
+  sigaddset(&expected, kTestSignal1);
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr),
+              SyscallSucceeds());
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+              SyscallSucceeds());
+  EXPECT_THAT(actual, EqualsSigset(expected));
+
+  // Try to add another signal.
+  sigset_t block;
+  sigemptyset(&block);
+  sigaddset(&block, kTestSignal2);
+  EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, &block, nullptr), SyscallSucceeds());
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+              SyscallSucceeds());
+  sigaddset(&expected, kTestSignal2);
+  EXPECT_THAT(actual, EqualsSigset(expected));
+
+  // Try to remove a signal.
+  sigset_t unblock;
+  sigemptyset(&unblock);
+  sigaddset(&unblock, kTestSignal1);
+  EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, &unblock, nullptr),
+              SyscallSucceeds());
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+              SyscallSucceeds());
+  sigdelset(&expected, kTestSignal1);
+  EXPECT_THAT(actual, EqualsSigset(expected));
+}
+
+// Test that the signal mask actually blocks signals.
+TEST_F(SigProcMaskTest, SignalHandler) {
+  sigset_t mask;
+
+  // clear the signal mask
+  sigemptyset(&mask);
+  EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, nullptr), SyscallSucceeds());
+
+  // Check the initial signal counts.
+  EXPECT_EQ(0, signal_count[kTestSignal1]);
+  EXPECT_EQ(0, signal_count[kTestSignal2]);
+
+  // Check that both kTestSignal1 and kTestSignal2 are not blocked.
+  raise(kTestSignal1);
+  raise(kTestSignal2);
+  EXPECT_EQ(1, signal_count[kTestSignal1]);
+  EXPECT_EQ(1, signal_count[kTestSignal2]);
+
+  // Block kTestSignal1.
+  sigaddset(&mask, kTestSignal1);
+  EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, &mask, nullptr), SyscallSucceeds());
+
+  // Check that kTestSignal1 is blocked.
+  raise(kTestSignal1);
+  raise(kTestSignal2);
+  EXPECT_EQ(1, signal_count[kTestSignal1]);
+  EXPECT_EQ(2, signal_count[kTestSignal2]);
+
+  // Unblock kTestSignal1.
+  sigaddset(&mask, kTestSignal1);
+  EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, &mask, nullptr), SyscallSucceeds());
+
+  // Check that the unblocked kTestSignal1 has been delivered.
+  EXPECT_EQ(2, signal_count[kTestSignal1]);
+  EXPECT_EQ(2, signal_count[kTestSignal2]);
+}
+
+// Check that sigprocmask correctly handles aliasing of the set and oldset
+// pointers. Regression test for b/30502311.
+TEST_F(SigProcMaskTest, AliasedSets) {
+  sigset_t mask;
+
+  // Set a mask in which only kTestSignal1 is blocked.
+  sigset_t mask1;
+  sigemptyset(&mask1);
+  sigaddset(&mask1, kTestSignal1);
+  mask = mask1;
+  ASSERT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, nullptr), SyscallSucceeds());
+
+  // Exchange it with a mask in which only kTestSignal2 is blocked.
+  sigset_t mask2;
+  sigemptyset(&mask2);
+  sigaddset(&mask2, kTestSignal2);
+  mask = mask2;
+  ASSERT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, &mask), SyscallSucceeds());
+
+  // Check that the exchange succeeeded:
+  // mask should now contain the previously-set mask blocking only kTestSignal1.
+  EXPECT_THAT(mask, EqualsSigset(mask1));
+  // The current mask should block only kTestSignal2.
+  ASSERT_THAT(raw_sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+  EXPECT_THAT(mask, EqualsSigset(mask2));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sigstop.cc b/test/syscalls/linux/sigstop.cc
new file mode 100644
index 000000000..b2fcedd62
--- /dev/null
+++ b/test/syscalls/linux/sigstop.cc
@@ -0,0 +1,151 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/select.h>
+
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+ABSL_FLAG(bool, sigstop_test_child, false,
+          "If true, run the SigstopTest child workload.");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr absl::Duration kChildStartupDelay = absl::Seconds(5);
+constexpr absl::Duration kChildMainThreadDelay = absl::Seconds(10);
+constexpr absl::Duration kChildExtraThreadDelay = absl::Seconds(15);
+constexpr absl::Duration kPostSIGSTOPDelay = absl::Seconds(20);
+
+// Comparisons on absl::Duration aren't yet constexpr (2017-07-14), so we
+// can't just use static_assert.
+TEST(SigstopTest, TimesAreRelativelyConsistent) {
+  EXPECT_LT(kChildStartupDelay, kChildMainThreadDelay)
+      << "Child process will exit before the parent process attempts to stop "
+         "it";
+  EXPECT_LT(kChildMainThreadDelay, kChildExtraThreadDelay)
+      << "Secondary thread in child process will exit before main thread, "
+         "causing it to exit with the wrong code";
+  EXPECT_LT(kChildExtraThreadDelay, kPostSIGSTOPDelay)
+      << "Parent process stops waiting before child process may exit if "
+         "improperly stopped, rendering the test ineffective";
+}
+
+// Exit codes communicated from the child workload to the parent test process.
+constexpr int kChildMainThreadExitCode = 10;
+constexpr int kChildExtraThreadExitCode = 11;
+
+TEST(SigstopTest, Correctness) {
+  pid_t child_pid = -1;
+  int execve_errno = 0;
+  auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+      ForkAndExec("/proc/self/exe", {"/proc/self/exe", "--sigstop_test_child"},
+                  {}, nullptr, &child_pid, &execve_errno));
+
+  ASSERT_GT(child_pid, 0);
+  ASSERT_EQ(execve_errno, 0);
+
+  // Wait for the child subprocess to start the second thread before stopping
+  // it.
+  absl::SleepFor(kChildStartupDelay);
+  ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds());
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, WUNTRACED),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFSTOPPED(status));
+  EXPECT_EQ(SIGSTOP, WSTOPSIG(status));
+
+  // Sleep for longer than either of the sleeps in the child subprocess,
+  // expecting the child to stay alive because it's stopped.
+  absl::SleepFor(kPostSIGSTOPDelay);
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, WNOHANG),
+              SyscallSucceedsWithValue(0));
+
+  // Resume the child.
+  ASSERT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds());
+
+  EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, WCONTINUED),
+              SyscallSucceedsWithValue(child_pid));
+  EXPECT_TRUE(WIFCONTINUED(status));
+
+  // Expect it to die.
+  ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+  ASSERT_TRUE(WIFEXITED(status));
+  ASSERT_EQ(WEXITSTATUS(status), kChildMainThreadExitCode);
+}
+
+// Like base:SleepFor, but tries to avoid counting time spent stopped due to a
+// stop signal toward the sleep.
+//
+// This is required due to an inconsistency in how nanosleep(2) and stop signals
+// interact on Linux. When nanosleep is interrupted, it writes the remaining
+// time back to its second timespec argument, so that if nanosleep is
+// interrupted by a signal handler then userspace can immediately call nanosleep
+// again with that timespec. However, if nanosleep is automatically restarted
+// (because it's interrupted by a signal that is not delivered to a handler,
+// such as a stop signal), it's restarted based on the timer's former *absolute*
+// expiration time (via ERESTART_RESTARTBLOCK => SYS_restart_syscall =>
+// hrtimer_nanosleep_restart). This means that time spent stopped is effectively
+// counted as time spent sleeping, resulting in less time spent sleeping than
+// expected.
+//
+// Dividing the sleep into multiple smaller sleeps limits the impact of this
+// effect to the length of each sleep during which a stop occurs; for example,
+// if a sleeping process is only stopped once, SleepIgnoreStopped can
+// under-sleep by at most 100ms.
+void SleepIgnoreStopped(absl::Duration d) {
+  absl::Duration const max_sleep = absl::Milliseconds(100);
+  while (d > absl::ZeroDuration()) {
+    absl::Duration to_sleep = std::min(d, max_sleep);
+    absl::SleepFor(to_sleep);
+    d -= to_sleep;
+  }
+}
+
+void RunChild() {
+  // Start another thread that attempts to call exit_group with a different
+  // error code, in order to verify that SIGSTOP stops this thread as well.
+  ScopedThread t([] {
+    SleepIgnoreStopped(kChildExtraThreadDelay);
+    exit(kChildExtraThreadExitCode);
+  });
+  SleepIgnoreStopped(kChildMainThreadDelay);
+  exit(kChildMainThreadExitCode);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  gvisor::testing::TestInit(&argc, &argv);
+
+  if (absl::GetFlag(FLAGS_sigstop_test_child)) {
+    gvisor::testing::RunChild();
+    return 1;
+  }
+
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/sigtimedwait.cc b/test/syscalls/linux/sigtimedwait.cc
new file mode 100644
index 000000000..4f8afff15
--- /dev/null
+++ b/test/syscalls/linux/sigtimedwait.cc
@@ -0,0 +1,323 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// N.B. main() blocks SIGALRM and SIGCHLD on all threads.
+
+constexpr int kAlarmSecs = 12;
+
+void NoopHandler(int sig, siginfo_t* info, void* context) {}
+
+TEST(SigtimedwaitTest, InvalidTimeout) {
+  sigset_t mask;
+  sigemptyset(&mask);
+  struct timespec timeout = {0, 1000000001};
+  EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout),
+              SyscallFailsWithErrno(EINVAL));
+  timeout = {-1, 0};
+  EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout),
+              SyscallFailsWithErrno(EINVAL));
+  timeout = {0, -1};
+  EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and wait.
+TEST(SigtimedwaitTest, AlarmReturnsAlarm_NoRandomSave) {
+  struct itimerval itv = {};
+  itv.it_value.tv_sec = kAlarmSecs;
+  const auto itimer_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv));
+
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, SIGALRM);
+  siginfo_t info = {};
+  EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, &info, nullptr),
+              SyscallSucceedsWithValue(SIGALRM));
+  EXPECT_EQ(SIGALRM, info.si_signo);
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and wait.
+TEST(SigtimedwaitTest, NullTimeoutReturnsEINTR_NoRandomSave) {
+  struct sigaction sa;
+  sa.sa_sigaction = NoopHandler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  const auto action_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+  const auto mask_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+  struct itimerval itv = {};
+  itv.it_value.tv_sec = kAlarmSecs;
+  const auto itimer_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv));
+
+  sigset_t mask;
+  sigemptyset(&mask);
+  EXPECT_THAT(sigtimedwait(&mask, nullptr, nullptr),
+              SyscallFailsWithErrno(EINTR));
+}
+
+TEST(SigtimedwaitTest, LegitTimeoutReturnsEAGAIN) {
+  sigset_t mask;
+  sigemptyset(&mask);
+  struct timespec timeout = {1, 0};  // 1 second
+  EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(SigtimedwaitTest, ZeroTimeoutReturnsEAGAIN) {
+  sigset_t mask;
+  sigemptyset(&mask);
+  struct timespec timeout = {0, 0};  // 0 second
+  EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(SigtimedwaitTest, KillGeneratedSIGCHLD) {
+  EXPECT_THAT(kill(getpid(), SIGCHLD), SyscallSucceeds());
+
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, SIGCHLD);
+  struct timespec ts = {5, 0};
+  EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts),
+              SyscallSucceedsWithValue(SIGCHLD));
+}
+
+TEST(SigtimedwaitTest, ChildExitGeneratedSIGCHLD) {
+  pid_t pid = fork();
+  if (pid == 0) {
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  int status;
+  EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status;
+
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, SIGCHLD);
+  struct timespec ts = {5, 0};
+  EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts),
+              SyscallSucceedsWithValue(SIGCHLD));
+}
+
+TEST(SigtimedwaitTest, ChildExitGeneratedSIGCHLDWithHandler) {
+  // Setup handler for SIGCHLD, but don't unblock it.
+  struct sigaction sa;
+  sa.sa_sigaction = NoopHandler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  const auto action_cleanup =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa));
+
+  pid_t pid = fork();
+  if (pid == 0) {
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, SIGCHLD);
+  struct timespec ts = {5, 0};
+  EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts),
+              SyscallSucceedsWithValue(SIGCHLD));
+
+  int status;
+  EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status;
+}
+
+// sigtimedwait cannot catch SIGKILL.
+TEST(SigtimedwaitTest, SIGKILLUncaught) {
+  // This is a regression test for sigtimedwait dequeuing SIGKILLs, thus
+  // preventing the task from exiting.
+  //
+  // The explanation below is specific to behavior in gVisor. The Linux behavior
+  // here is irrelevant because without a bug that prevents delivery of SIGKILL,
+  // none of this behavior is visible (in Linux or gVisor).
+  //
+  // SIGKILL is rather intrusive. Simply sending the SIGKILL marks
+  // ThreadGroup.exitStatus as exiting with SIGKILL, before the SIGKILL is even
+  // delivered.
+  //
+  // As a result, we cannot simply exit the child with a different exit code if
+  // it survives and expect to see that code in waitpid because:
+  //   1. PrepareGroupExit will override Task.exitStatus with
+  //      ThreadGroup.exitStatus.
+  //   2. waitpid(2) will always return ThreadGroup.exitStatus rather than
+  //      Task.exitStatus.
+  //
+  // We could use exit(2) to set Task.exitStatus without override, and a SIGCHLD
+  // handler to receive Task.exitStatus in the parent, but with that much
+  // test complexity, it is cleaner to simply use a pipe to notify the parent
+  // that we survived.
+  constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2);
+
+  int pipe_fds[2];
+  ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+  FileDescriptor rfd(pipe_fds[0]);
+  FileDescriptor wfd(pipe_fds[1]);
+
+  pid_t pid = fork();
+  if (pid == 0) {
+    rfd.reset();
+
+    sigset_t mask;
+    sigemptyset(&mask);
+    sigaddset(&mask, SIGKILL);
+    RetryEINTR(sigtimedwait)(&mask, nullptr, nullptr);
+
+    // Survived.
+    char c = 'a';
+    TEST_PCHECK(WriteFd(wfd.get(), &c, 1) == 1);
+    _exit(1);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  wfd.reset();
+
+  // Wait for child to block in sigtimedwait, then kill it.
+  absl::SleepFor(kSigtimedwaitSetupTime);
+
+  // Sending SIGKILL will attempt to enqueue the signal twice: once in the
+  // normal signal sending path, and once to all Tasks in the ThreadGroup when
+  // applying SIGKILL side-effects.
+  //
+  // If we use kill(2), the former will be on the ThreadGroup signal queue and
+  // the latter will be on the Task signal queue. sigtimedwait can only dequeue
+  // one signal, so the other would kill the Task, masking bugs.
+  //
+  // If we use tkill(2), the former will be on the Task signal queue and the
+  // latter will be dropped as a duplicate. Then sigtimedwait can theoretically
+  // dequeue the single SIGKILL.
+  EXPECT_THAT(syscall(SYS_tkill, pid, SIGKILL), SyscallSucceeds());
+
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+              SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) << status;
+
+  // Child shouldn't have survived.
+  char c;
+  EXPECT_THAT(ReadFd(rfd.get(), &c, 1), SyscallSucceedsWithValue(0));
+}
+
+TEST(SigtimedwaitTest, IgnoredUnmaskedSignal) {
+  constexpr int kSigno = SIGUSR1;
+  constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2);
+  constexpr auto kSigtimedwaitTimeout = absl::Seconds(5);
+  ASSERT_GT(kSigtimedwaitTimeout, kSigtimedwaitSetupTime);
+
+  // Ensure that kSigno is ignored, and unmasked on this thread.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_IGN;
+  const auto scoped_sigaction =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa));
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, kSigno);
+  auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, mask));
+
+  // Create a thread which will send us kSigno while we are blocked in
+  // sigtimedwait.
+  pid_t tid = gettid();
+  ScopedThread sigthread([&] {
+    absl::SleepFor(kSigtimedwaitSetupTime);
+    EXPECT_THAT(tgkill(getpid(), tid, kSigno), SyscallSucceeds());
+  });
+
+  // sigtimedwait should not observe kSigno since it is ignored and already
+  // unmasked, causing it to be dropped before it is enqueued.
+  struct timespec timeout_ts = absl::ToTimespec(kSigtimedwaitTimeout);
+  EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout_ts),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(SigtimedwaitTest, IgnoredMaskedSignal) {
+  constexpr int kSigno = SIGUSR1;
+  constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2);
+  constexpr auto kSigtimedwaitTimeout = absl::Seconds(5);
+  ASSERT_GT(kSigtimedwaitTimeout, kSigtimedwaitSetupTime);
+
+  // Ensure that kSigno is ignored, and masked on this thread.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_IGN;
+  const auto scoped_sigaction =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa));
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, kSigno);
+  auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask));
+
+  // Create a thread which will send us kSigno while we are blocked in
+  // sigtimedwait.
+  pid_t tid = gettid();
+  ScopedThread sigthread([&] {
+    absl::SleepFor(kSigtimedwaitSetupTime);
+    EXPECT_THAT(tgkill(getpid(), tid, kSigno), SyscallSucceeds());
+  });
+
+  // sigtimedwait should observe kSigno since it is normally masked, causing it
+  // to be enqueued despite being ignored.
+  struct timespec timeout_ts = absl::ToTimespec(kSigtimedwaitTimeout);
+  EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout_ts),
+              SyscallSucceedsWithValue(kSigno));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  // These tests depend on delivering SIGALRM/SIGCHLD to the main thread or in
+  // sigtimedwait. Block them so that any other threads created by TestInit will
+  // also have them blocked.
+  sigset_t set;
+  sigemptyset(&set);
+  sigaddset(&set, SIGALRM);
+  sigaddset(&set, SIGCHLD);
+  TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+
+  gvisor::testing::TestInit(&argc, &argv);
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/socket.cc b/test/syscalls/linux/socket.cc
new file mode 100644
index 000000000..c20cd3fcc
--- /dev/null
+++ b/test/syscalls/linux/socket.cc
@@ -0,0 +1,121 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_umask.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST(SocketTest, UnixSocketPairProtocol) {
+  int socks[2];
+  ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, socks),
+              SyscallSucceeds());
+  close(socks[0]);
+  close(socks[1]);
+}
+
+TEST(SocketTest, ProtocolUnix) {
+  struct {
+    int domain, type, protocol;
+  } tests[] = {
+      {AF_UNIX, SOCK_STREAM, PF_UNIX},
+      {AF_UNIX, SOCK_SEQPACKET, PF_UNIX},
+      {AF_UNIX, SOCK_DGRAM, PF_UNIX},
+  };
+  for (int i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
+    ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(tests[i].domain, tests[i].type, tests[i].protocol));
+  }
+}
+
+TEST(SocketTest, ProtocolInet) {
+  struct {
+    int domain, type, protocol;
+  } tests[] = {
+      {AF_INET, SOCK_DGRAM, IPPROTO_UDP},
+      {AF_INET, SOCK_STREAM, IPPROTO_TCP},
+  };
+  for (int i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
+    ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(tests[i].domain, tests[i].type, tests[i].protocol));
+  }
+}
+
+TEST(SocketTest, UnixSocketStat) {
+  SKIP_IF(IsRunningWithVFS1());
+
+  FileDescriptor bound =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX));
+
+  // The permissions of the file created with bind(2) should be defined by the
+  // permissions of the bound socket and the umask.
+  mode_t sock_perm = 0765, mask = 0123;
+  ASSERT_THAT(fchmod(bound.get(), sock_perm), SyscallSucceeds());
+  TempUmask m(mask);
+
+  struct sockaddr_un addr =
+      ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX));
+  ASSERT_THAT(bind(bound.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                   sizeof(addr)),
+              SyscallSucceeds());
+
+  struct stat statbuf = {};
+  ASSERT_THAT(stat(addr.sun_path, &statbuf), SyscallSucceeds());
+
+  // Mode should be S_IFSOCK.
+  EXPECT_EQ(statbuf.st_mode, S_IFSOCK | sock_perm & ~mask);
+
+  // Timestamps should be equal and non-zero.
+  // TODO(b/158882152): Sockets currently don't implement timestamps.
+  if (!IsRunningOnGvisor()) {
+    EXPECT_NE(statbuf.st_atime, 0);
+    EXPECT_EQ(statbuf.st_atime, statbuf.st_mtime);
+    EXPECT_EQ(statbuf.st_atime, statbuf.st_ctime);
+  }
+}
+
+using SocketOpenTest = ::testing::TestWithParam<int>;
+
+// UDS cannot be opened.
+TEST_P(SocketOpenTest, Unix) {
+  // FIXME(b/142001530): Open incorrectly succeeds on gVisor.
+  SKIP_IF(IsRunningWithVFS1());
+
+  FileDescriptor bound =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX));
+
+  struct sockaddr_un addr =
+      ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX));
+
+  ASSERT_THAT(bind(bound.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                   sizeof(addr)),
+              SyscallSucceeds());
+
+  EXPECT_THAT(open(addr.sun_path, GetParam()), SyscallFailsWithErrno(ENXIO));
+}
+
+INSTANTIATE_TEST_SUITE_P(OpenModes, SocketOpenTest,
+                         ::testing::Values(O_RDONLY, O_RDWR));
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_abstract.cc b/test/syscalls/linux/socket_abstract.cc
new file mode 100644
index 000000000..00999f192
--- /dev/null
+++ b/test/syscalls/linux/socket_abstract.cc
@@ -0,0 +1,49 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix.h"
+#include "test/syscalls/linux/socket_unix_cmsg.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return ApplyVec<SocketPairKind>(
+      AbstractBoundUnixDomainSocketPair,
+      AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+                             List<int>{0, SOCK_NONBLOCK}));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AbstractUnixSockets, AllSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    AbstractUnixSockets, UnixSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    AbstractUnixSockets, UnixSocketPairCmsgTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_bind_to_device.cc b/test/syscalls/linux/socket_bind_to_device.cc
new file mode 100644
index 000000000..6b27f6eab
--- /dev/null
+++ b/test/syscalls/linux/socket_bind_to_device.cc
@@ -0,0 +1,313 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_bind_to_device_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+using std::string;
+
+// Test fixture for SO_BINDTODEVICE tests.
+class BindToDeviceTest : public ::testing::TestWithParam<SocketKind> {
+ protected:
+  void SetUp() override {
+    printf("Testing case: %s\n", GetParam().description.c_str());
+    ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)))
+        << "CAP_NET_RAW is required to use SO_BINDTODEVICE";
+
+    interface_name_ = "eth1";
+    auto interface_names = GetInterfaceNames();
+    if (interface_names.find(interface_name_) == interface_names.end()) {
+      // Need a tunnel.
+      tunnel_ = ASSERT_NO_ERRNO_AND_VALUE(Tunnel::New());
+      interface_name_ = tunnel_->GetName();
+      ASSERT_FALSE(interface_name_.empty());
+    }
+    socket_ = ASSERT_NO_ERRNO_AND_VALUE(GetParam().Create());
+  }
+
+  string interface_name() const { return interface_name_; }
+
+  int socket_fd() const { return socket_->get(); }
+
+ private:
+  std::unique_ptr<Tunnel> tunnel_;
+  string interface_name_;
+  std::unique_ptr<FileDescriptor> socket_;
+};
+
+constexpr char kIllegalIfnameChar = '/';
+
+// Tests getsockopt of the default value.
+TEST_P(BindToDeviceTest, GetsockoptDefault) {
+  char name_buffer[IFNAMSIZ * 2];
+  char original_name_buffer[IFNAMSIZ * 2];
+  socklen_t name_buffer_size;
+
+  // Read the default SO_BINDTODEVICE.
+  memset(original_name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+  for (size_t i = 0; i <= sizeof(name_buffer); i++) {
+    memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+    name_buffer_size = i;
+    EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE,
+                           name_buffer, &name_buffer_size),
+                SyscallSucceedsWithValue(0));
+    EXPECT_EQ(name_buffer_size, 0);
+    EXPECT_EQ(memcmp(name_buffer, original_name_buffer, sizeof(name_buffer)),
+              0);
+  }
+}
+
+// Tests setsockopt of invalid device name.
+TEST_P(BindToDeviceTest, SetsockoptInvalidDeviceName) {
+  char name_buffer[IFNAMSIZ * 2];
+  socklen_t name_buffer_size;
+
+  // Set an invalid device name.
+  memset(name_buffer, kIllegalIfnameChar, 5);
+  name_buffer_size = 5;
+  EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         name_buffer_size),
+              SyscallFailsWithErrno(ENODEV));
+}
+
+// Tests setsockopt of a buffer with a valid device name but not
+// null-terminated, with different sizes of buffer.
+TEST_P(BindToDeviceTest, SetsockoptValidDeviceNameWithoutNullTermination) {
+  char name_buffer[IFNAMSIZ * 2];
+  socklen_t name_buffer_size;
+
+  strncpy(name_buffer, interface_name().c_str(), interface_name().size() + 1);
+  // Intentionally overwrite the null at the end.
+  memset(name_buffer + interface_name().size(), kIllegalIfnameChar,
+         sizeof(name_buffer) - interface_name().size());
+  for (size_t i = 1; i <= sizeof(name_buffer); i++) {
+    name_buffer_size = i;
+    SCOPED_TRACE(absl::StrCat("Buffer size: ", i));
+    // It should only work if the size provided is exactly right.
+    if (name_buffer_size == interface_name().size()) {
+      EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE,
+                             name_buffer, name_buffer_size),
+                  SyscallSucceeds());
+    } else {
+      EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE,
+                             name_buffer, name_buffer_size),
+                  SyscallFailsWithErrno(ENODEV));
+    }
+  }
+}
+
+// Tests setsockopt of a buffer with a valid device name and null-terminated,
+// with different sizes of buffer.
+TEST_P(BindToDeviceTest, SetsockoptValidDeviceNameWithNullTermination) {
+  char name_buffer[IFNAMSIZ * 2];
+  socklen_t name_buffer_size;
+
+  strncpy(name_buffer, interface_name().c_str(), interface_name().size() + 1);
+  // Don't overwrite the null at the end.
+  memset(name_buffer + interface_name().size() + 1, kIllegalIfnameChar,
+         sizeof(name_buffer) - interface_name().size() - 1);
+  for (size_t i = 1; i <= sizeof(name_buffer); i++) {
+    name_buffer_size = i;
+    SCOPED_TRACE(absl::StrCat("Buffer size: ", i));
+    // It should only work if the size provided is at least the right size.
+    if (name_buffer_size >= interface_name().size()) {
+      EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE,
+                             name_buffer, name_buffer_size),
+                  SyscallSucceeds());
+    } else {
+      EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE,
+                             name_buffer, name_buffer_size),
+                  SyscallFailsWithErrno(ENODEV));
+    }
+  }
+}
+
+// Tests that setsockopt of an invalid device name doesn't unset the previous
+// valid setsockopt.
+TEST_P(BindToDeviceTest, SetsockoptValidThenInvalid) {
+  char name_buffer[IFNAMSIZ * 2];
+  socklen_t name_buffer_size;
+
+  // Write successfully.
+  strncpy(name_buffer, interface_name().c_str(), sizeof(name_buffer));
+  ASSERT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         sizeof(name_buffer)),
+              SyscallSucceeds());
+
+  // Read it back successfully.
+  memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+  name_buffer_size = sizeof(name_buffer);
+  EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         &name_buffer_size),
+              SyscallSucceeds());
+  EXPECT_EQ(name_buffer_size, interface_name().size() + 1);
+  EXPECT_STREQ(name_buffer, interface_name().c_str());
+
+  // Write unsuccessfully.
+  memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+  name_buffer_size = 5;
+  EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         sizeof(name_buffer)),
+              SyscallFailsWithErrno(ENODEV));
+
+  // Read it back successfully, it's unchanged.
+  memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+  name_buffer_size = sizeof(name_buffer);
+  EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         &name_buffer_size),
+              SyscallSucceeds());
+  EXPECT_EQ(name_buffer_size, interface_name().size() + 1);
+  EXPECT_STREQ(name_buffer, interface_name().c_str());
+}
+
+// Tests that setsockopt of zero-length string correctly unsets the previous
+// value.
+TEST_P(BindToDeviceTest, SetsockoptValidThenClear) {
+  char name_buffer[IFNAMSIZ * 2];
+  socklen_t name_buffer_size;
+
+  // Write successfully.
+  strncpy(name_buffer, interface_name().c_str(), sizeof(name_buffer));
+  EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         sizeof(name_buffer)),
+              SyscallSucceeds());
+
+  // Read it back successfully.
+  memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+  name_buffer_size = sizeof(name_buffer);
+  EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         &name_buffer_size),
+              SyscallSucceeds());
+  EXPECT_EQ(name_buffer_size, interface_name().size() + 1);
+  EXPECT_STREQ(name_buffer, interface_name().c_str());
+
+  // Clear it successfully.
+  name_buffer_size = 0;
+  EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         name_buffer_size),
+              SyscallSucceeds());
+
+  // Read it back successfully, it's cleared.
+  memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+  name_buffer_size = sizeof(name_buffer);
+  EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         &name_buffer_size),
+              SyscallSucceeds());
+  EXPECT_EQ(name_buffer_size, 0);
+}
+
+// Tests that setsockopt of empty string correctly unsets the previous
+// value.
+TEST_P(BindToDeviceTest, SetsockoptValidThenClearWithNull) {
+  char name_buffer[IFNAMSIZ * 2];
+  socklen_t name_buffer_size;
+
+  // Write successfully.
+  strncpy(name_buffer, interface_name().c_str(), sizeof(name_buffer));
+  EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         sizeof(name_buffer)),
+              SyscallSucceeds());
+
+  // Read it back successfully.
+  memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+  name_buffer_size = sizeof(name_buffer);
+  EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         &name_buffer_size),
+              SyscallSucceeds());
+  EXPECT_EQ(name_buffer_size, interface_name().size() + 1);
+  EXPECT_STREQ(name_buffer, interface_name().c_str());
+
+  // Clear it successfully.
+  memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+  name_buffer[0] = 0;
+  name_buffer_size = sizeof(name_buffer);
+  EXPECT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         name_buffer_size),
+              SyscallSucceeds());
+
+  // Read it back successfully, it's cleared.
+  memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+  name_buffer_size = sizeof(name_buffer);
+  EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         &name_buffer_size),
+              SyscallSucceeds());
+  EXPECT_EQ(name_buffer_size, 0);
+}
+
+// Tests getsockopt with different buffer sizes.
+TEST_P(BindToDeviceTest, GetsockoptDevice) {
+  char name_buffer[IFNAMSIZ * 2];
+  socklen_t name_buffer_size;
+
+  // Write successfully.
+  strncpy(name_buffer, interface_name().c_str(), sizeof(name_buffer));
+  ASSERT_THAT(setsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE, name_buffer,
+                         sizeof(name_buffer)),
+              SyscallSucceeds());
+
+  // Read it back at various buffer sizes.
+  for (size_t i = 0; i <= sizeof(name_buffer); i++) {
+    memset(name_buffer, kIllegalIfnameChar, sizeof(name_buffer));
+    name_buffer_size = i;
+    SCOPED_TRACE(absl::StrCat("Buffer size: ", i));
+    // Linux only allows a buffer at least IFNAMSIZ, even if less would suffice
+    // for this interface name.
+    if (name_buffer_size >= IFNAMSIZ) {
+      EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE,
+                             name_buffer, &name_buffer_size),
+                  SyscallSucceeds());
+      EXPECT_EQ(name_buffer_size, interface_name().size() + 1);
+      EXPECT_STREQ(name_buffer, interface_name().c_str());
+    } else {
+      EXPECT_THAT(getsockopt(socket_fd(), SOL_SOCKET, SO_BINDTODEVICE,
+                             name_buffer, &name_buffer_size),
+                  SyscallFailsWithErrno(EINVAL));
+      EXPECT_EQ(name_buffer_size, i);
+    }
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(BindToDeviceTest, BindToDeviceTest,
+                         ::testing::Values(IPv4UDPUnboundSocket(0),
+                                           IPv4TCPUnboundSocket(0)));
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_bind_to_device_distribution.cc b/test/syscalls/linux/socket_bind_to_device_distribution.cc
new file mode 100644
index 000000000..5ed57625c
--- /dev/null
+++ b/test/syscalls/linux/socket_bind_to_device_distribution.cc
@@ -0,0 +1,401 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <atomic>
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_bind_to_device_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+using std::string;
+using std::vector;
+
+struct EndpointConfig {
+  std::string bind_to_device;
+  double expected_ratio;
+};
+
+struct DistributionTestCase {
+  std::string name;
+  std::vector<EndpointConfig> endpoints;
+};
+
+struct ListenerConnector {
+  TestAddress listener;
+  TestAddress connector;
+};
+
+// Test fixture for SO_BINDTODEVICE tests the distribution of packets received
+// with varying SO_BINDTODEVICE settings.
+class BindToDeviceDistributionTest
+    : public ::testing::TestWithParam<
+          ::testing::tuple<ListenerConnector, DistributionTestCase>> {
+ protected:
+  void SetUp() override {
+    printf("Testing case: %s, listener=%s, connector=%s\n",
+           ::testing::get<1>(GetParam()).name.c_str(),
+           ::testing::get<0>(GetParam()).listener.description.c_str(),
+           ::testing::get<0>(GetParam()).connector.description.c_str());
+    ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)))
+        << "CAP_NET_RAW is required to use SO_BINDTODEVICE";
+  }
+};
+
+PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) {
+  switch (family) {
+    case AF_INET:
+      return static_cast<uint16_t>(
+          reinterpret_cast<sockaddr_in const*>(&addr)->sin_port);
+    case AF_INET6:
+      return static_cast<uint16_t>(
+          reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port);
+    default:
+      return PosixError(EINVAL,
+                        absl::StrCat("unknown socket family: ", family));
+  }
+}
+
+PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) {
+  switch (family) {
+    case AF_INET:
+      reinterpret_cast<sockaddr_in*>(addr)->sin_port = port;
+      return NoError();
+    case AF_INET6:
+      reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port;
+      return NoError();
+    default:
+      return PosixError(EINVAL,
+                        absl::StrCat("unknown socket family: ", family));
+  }
+}
+
+// Binds sockets to different devices and then creates many TCP connections.
+// Checks that the distribution of connections received on the sockets matches
+// the expectation.
+TEST_P(BindToDeviceDistributionTest, Tcp) {
+  auto const& [listener_connector, test] = GetParam();
+
+  TestAddress const& listener = listener_connector.listener;
+  TestAddress const& connector = listener_connector.connector;
+  sockaddr_storage listen_addr = listener.addr;
+  sockaddr_storage conn_addr = connector.addr;
+
+  auto interface_names = GetInterfaceNames();
+
+  // Create the listening sockets.
+  std::vector<FileDescriptor> listener_fds;
+  std::vector<std::unique_ptr<Tunnel>> all_tunnels;
+  for (auto const& endpoint : test.endpoints) {
+    if (!endpoint.bind_to_device.empty() &&
+        interface_names.find(endpoint.bind_to_device) ==
+            interface_names.end()) {
+      all_tunnels.push_back(
+          ASSERT_NO_ERRNO_AND_VALUE(Tunnel::New(endpoint.bind_to_device)));
+      interface_names.insert(endpoint.bind_to_device);
+    }
+
+    listener_fds.push_back(ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)));
+    int fd = listener_fds.back().get();
+
+    ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                           sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+                           endpoint.bind_to_device.c_str(),
+                           endpoint.bind_to_device.size() + 1),
+                SyscallSucceeds());
+    ASSERT_THAT(
+        bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len),
+        SyscallSucceeds());
+    ASSERT_THAT(listen(fd, 40), SyscallSucceeds());
+
+    // On the first bind we need to determine which port was bound.
+    if (listener_fds.size() > 1) {
+      continue;
+    }
+
+    // Get the port bound by the listening socket.
+    socklen_t addrlen = listener.addr_len;
+    ASSERT_THAT(
+        getsockname(listener_fds[0].get(),
+                    reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+        SyscallSucceeds());
+    uint16_t const port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+    ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  }
+
+  constexpr int kConnectAttempts = 10000;
+  std::atomic<int> connects_received = ATOMIC_VAR_INIT(0);
+  std::vector<int> accept_counts(listener_fds.size(), 0);
+  std::vector<std::unique_ptr<ScopedThread>> listen_threads(
+      listener_fds.size());
+
+  for (int i = 0; i < listener_fds.size(); i++) {
+    listen_threads[i] = absl::make_unique<ScopedThread>(
+        [&listener_fds, &accept_counts, &connects_received, i,
+         kConnectAttempts]() {
+          do {
+            auto fd = Accept(listener_fds[i].get(), nullptr, nullptr);
+            if (!fd.ok()) {
+              // Another thread has shutdown our read side causing the accept to
+              // fail.
+              ASSERT_GE(connects_received, kConnectAttempts)
+                  << "errno = " << fd.error();
+              return;
+            }
+            // Receive some data from a socket to be sure that the connect()
+            // system call has been completed on another side.
+            // Do a short read and then close the socket to trigger a RST. This
+            // ensures that both ends of the connection are cleaned up and no
+            // goroutines hang around in TIME-WAIT. We do this so that this test
+            // does not timeout under gotsan runs where lots of goroutines can
+            // cause the test to use absurd amounts of memory.
+            //
+            // See: https://tools.ietf.org/html/rfc2525#page-50 section 2.17
+            uint16_t data;
+            EXPECT_THAT(
+                RetryEINTR(recv)(fd.ValueOrDie().get(), &data, sizeof(data), 0),
+                SyscallSucceedsWithValue(sizeof(data)));
+            accept_counts[i]++;
+          } while (++connects_received < kConnectAttempts);
+
+          // Shutdown all sockets to wake up other threads.
+          for (auto const& listener_fd : listener_fds) {
+            shutdown(listener_fd.get(), SHUT_RDWR);
+          }
+        });
+  }
+
+  for (int i = 0; i < kConnectAttempts; i++) {
+    const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+    ASSERT_THAT(
+        RetryEINTR(connect)(fd.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+                            connector.addr_len),
+        SyscallSucceeds());
+
+    // Do two separate sends to ensure two segments are received. This is
+    // required for netstack where read is incorrectly assuming a whole
+    // segment is read when endpoint.Read() is called which is technically
+    // incorrect as the syscall that invoked endpoint.Read() may only
+    // consume it partially. This results in a case where a close() of
+    // such a socket does not trigger a RST in netstack due to the
+    // endpoint assuming that the endpoint has no unread data.
+    EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0),
+                SyscallSucceedsWithValue(sizeof(i)));
+
+    // TODO(gvisor.dev/issue/1449): Remove this block once netstack correctly
+    //   generates a RST.
+    if (IsRunningOnGvisor()) {
+      EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0),
+                  SyscallSucceedsWithValue(sizeof(i)));
+    }
+  }
+
+  // Join threads to be sure that all connections have been counted.
+  for (auto const& listen_thread : listen_threads) {
+    listen_thread->Join();
+  }
+  // Check that connections are distributed correctly among listening sockets.
+  for (int i = 0; i < accept_counts.size(); i++) {
+    EXPECT_THAT(
+        accept_counts[i],
+        EquivalentWithin(static_cast<int>(kConnectAttempts *
+                                          test.endpoints[i].expected_ratio),
+                         0.10))
+        << "endpoint " << i << " got the wrong number of packets";
+  }
+}
+
+// Binds sockets to different devices and then sends many UDP packets.  Checks
+// that the distribution of packets received on the sockets matches the
+// expectation.
+TEST_P(BindToDeviceDistributionTest, Udp) {
+  auto const& [listener_connector, test] = GetParam();
+
+  TestAddress const& listener = listener_connector.listener;
+  TestAddress const& connector = listener_connector.connector;
+  sockaddr_storage listen_addr = listener.addr;
+  sockaddr_storage conn_addr = connector.addr;
+
+  auto interface_names = GetInterfaceNames();
+
+  // Create the listening socket.
+  std::vector<FileDescriptor> listener_fds;
+  std::vector<std::unique_ptr<Tunnel>> all_tunnels;
+  for (auto const& endpoint : test.endpoints) {
+    if (!endpoint.bind_to_device.empty() &&
+        interface_names.find(endpoint.bind_to_device) ==
+            interface_names.end()) {
+      all_tunnels.push_back(
+          ASSERT_NO_ERRNO_AND_VALUE(Tunnel::New(endpoint.bind_to_device)));
+      interface_names.insert(endpoint.bind_to_device);
+    }
+
+    listener_fds.push_back(
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(listener.family(), SOCK_DGRAM, 0)));
+    int fd = listener_fds.back().get();
+
+    ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                           sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+                           endpoint.bind_to_device.c_str(),
+                           endpoint.bind_to_device.size() + 1),
+                SyscallSucceeds());
+    ASSERT_THAT(
+        bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len),
+        SyscallSucceeds());
+
+    // On the first bind we need to determine which port was bound.
+    if (listener_fds.size() > 1) {
+      continue;
+    }
+
+    // Get the port bound by the listening socket.
+    socklen_t addrlen = listener.addr_len;
+    ASSERT_THAT(
+        getsockname(listener_fds[0].get(),
+                    reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+        SyscallSucceeds());
+    uint16_t const port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+    ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port));
+    ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  }
+
+  constexpr int kConnectAttempts = 10000;
+  std::atomic<int> packets_received = ATOMIC_VAR_INIT(0);
+  std::vector<int> packets_per_socket(listener_fds.size(), 0);
+  std::vector<std::unique_ptr<ScopedThread>> receiver_threads(
+      listener_fds.size());
+
+  for (int i = 0; i < listener_fds.size(); i++) {
+    receiver_threads[i] = absl::make_unique<ScopedThread>(
+        [&listener_fds, &packets_per_socket, &packets_received, i]() {
+          do {
+            struct sockaddr_storage addr = {};
+            socklen_t addrlen = sizeof(addr);
+            int data;
+
+            auto ret = RetryEINTR(recvfrom)(
+                listener_fds[i].get(), &data, sizeof(data), 0,
+                reinterpret_cast<struct sockaddr*>(&addr), &addrlen);
+
+            if (packets_received < kConnectAttempts) {
+              ASSERT_THAT(ret, SyscallSucceedsWithValue(sizeof(data)));
+            }
+
+            if (ret != sizeof(data)) {
+              // Another thread may have shutdown our read side causing the
+              // recvfrom to fail.
+              break;
+            }
+
+            packets_received++;
+            packets_per_socket[i]++;
+
+            // A response is required to synchronize with the main thread,
+            // otherwise the main thread can send more than can fit into receive
+            // queues.
+            EXPECT_THAT(RetryEINTR(sendto)(
+                            listener_fds[i].get(), &data, sizeof(data), 0,
+                            reinterpret_cast<sockaddr*>(&addr), addrlen),
+                        SyscallSucceedsWithValue(sizeof(data)));
+          } while (packets_received < kConnectAttempts);
+
+          // Shutdown all sockets to wake up other threads.
+          for (auto const& listener_fd : listener_fds) {
+            shutdown(listener_fd.get(), SHUT_RDWR);
+          }
+        });
+  }
+
+  for (int i = 0; i < kConnectAttempts; i++) {
+    FileDescriptor const fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(connector.family(), SOCK_DGRAM, 0));
+    EXPECT_THAT(RetryEINTR(sendto)(fd.get(), &i, sizeof(i), 0,
+                                   reinterpret_cast<sockaddr*>(&conn_addr),
+                                   connector.addr_len),
+                SyscallSucceedsWithValue(sizeof(i)));
+    int data;
+    EXPECT_THAT(RetryEINTR(recv)(fd.get(), &data, sizeof(data), 0),
+                SyscallSucceedsWithValue(sizeof(data)));
+  }
+
+  // Join threads to be sure that all connections have been counted.
+  for (auto const& receiver_thread : receiver_threads) {
+    receiver_thread->Join();
+  }
+  // Check that packets are distributed correctly among listening sockets.
+  for (int i = 0; i < packets_per_socket.size(); i++) {
+    EXPECT_THAT(
+        packets_per_socket[i],
+        EquivalentWithin(static_cast<int>(kConnectAttempts *
+                                          test.endpoints[i].expected_ratio),
+                         0.10))
+        << "endpoint " << i << " got the wrong number of packets";
+  }
+}
+
+std::vector<DistributionTestCase> GetDistributionTestCases() {
+  return std::vector<DistributionTestCase>{
+      {"Even distribution among sockets not bound to device",
+       {{"", 1. / 3}, {"", 1. / 3}, {"", 1. / 3}}},
+      {"Sockets bound to other interfaces get no packets",
+       {{"eth1", 0}, {"", 1. / 2}, {"", 1. / 2}}},
+      {"Bound has priority over unbound", {{"eth1", 0}, {"", 0}, {"lo", 1}}},
+      {"Even distribution among sockets bound to device",
+       {{"eth1", 0}, {"lo", 1. / 2}, {"lo", 1. / 2}}},
+  };
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    BindToDeviceTest, BindToDeviceDistributionTest,
+    ::testing::Combine(::testing::Values(
+                           // Listeners bound to IPv4 addresses refuse
+                           // connections using IPv6 addresses.
+                           ListenerConnector{V4Any(), V4Loopback()},
+                           ListenerConnector{V4Loopback(), V4MappedLoopback()}),
+                       ::testing::ValuesIn(GetDistributionTestCases())));
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc
new file mode 100644
index 000000000..d3cc71dbf
--- /dev/null
+++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc
@@ -0,0 +1,513 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <linux/capability.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/container/node_hash_map.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_bind_to_device_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+using std::string;
+using std::vector;
+
+// Test fixture for SO_BINDTODEVICE tests the results of sequences of socket
+// binding.
+class BindToDeviceSequenceTest : public ::testing::TestWithParam<SocketKind> {
+ protected:
+  void SetUp() override {
+    printf("Testing case: %s\n", GetParam().description.c_str());
+    ASSERT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)))
+        << "CAP_NET_RAW is required to use SO_BINDTODEVICE";
+    socket_factory_ = GetParam();
+
+    interface_names_ = GetInterfaceNames();
+  }
+
+  PosixErrorOr<std::unique_ptr<FileDescriptor>> NewSocket() const {
+    return socket_factory_.Create();
+  }
+
+  // Gets a device by device_id.  If the device_id has been seen before, returns
+  // the previously returned device.  If not, finds or creates a new device.
+  // Returns an empty string on failure.
+  void GetDevice(int device_id, string* device_name) {
+    auto device = devices_.find(device_id);
+    if (device != devices_.end()) {
+      *device_name = device->second;
+      return;
+    }
+
+    // Need to pick a new device.  Try ethernet first.
+    *device_name = absl::StrCat("eth", next_unused_eth_);
+    if (interface_names_.find(*device_name) != interface_names_.end()) {
+      devices_[device_id] = *device_name;
+      next_unused_eth_++;
+      return;
+    }
+
+    // Need to make a new tunnel device.  gVisor tests should have enough
+    // ethernet devices to never reach here.
+    ASSERT_FALSE(IsRunningOnGvisor());
+    // Need a tunnel.
+    tunnels_.push_back(ASSERT_NO_ERRNO_AND_VALUE(Tunnel::New()));
+    devices_[device_id] = tunnels_.back()->GetName();
+    *device_name = devices_[device_id];
+  }
+
+  // Release the socket
+  void ReleaseSocket(int socket_id) {
+    // Close the socket that was made in a previous action.  The socket_id
+    // indicates which socket to close based on index into the list of actions.
+    sockets_to_close_.erase(socket_id);
+  }
+
+  // SetDevice changes the bind_to_device option. It does not bind or re-bind.
+  void SetDevice(int socket_id, int device_id) {
+    auto socket_fd = sockets_to_close_[socket_id]->get();
+    string device_name;
+    ASSERT_NO_FATAL_FAILURE(GetDevice(device_id, &device_name));
+    EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE,
+                           device_name.c_str(), device_name.size() + 1),
+                SyscallSucceedsWithValue(0));
+  }
+
+  // Bind a socket with the reuse options and bind_to_device options. Checks
+  // that all steps succeed and that the bind command's error matches want.
+  // Sets the socket_id to uniquely identify the socket bound if it is not
+  // nullptr.
+  void BindSocket(bool reuse_port, bool reuse_addr, int device_id = 0,
+                  int want = 0, int* socket_id = nullptr) {
+    next_socket_id_++;
+    sockets_to_close_[next_socket_id_] = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+    auto socket_fd = sockets_to_close_[next_socket_id_]->get();
+    if (socket_id != nullptr) {
+      *socket_id = next_socket_id_;
+    }
+
+    // If reuse_port is indicated, do that.
+    if (reuse_port) {
+      EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                             sizeof(kSockOptOn)),
+                  SyscallSucceedsWithValue(0));
+    }
+
+    // If reuse_addr is indicated, do that.
+    if (reuse_addr) {
+      EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                             sizeof(kSockOptOn)),
+                  SyscallSucceedsWithValue(0));
+    }
+
+    // If the device is non-zero, bind to that device.
+    if (device_id != 0) {
+      string device_name;
+      ASSERT_NO_FATAL_FAILURE(GetDevice(device_id, &device_name));
+      EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE,
+                             device_name.c_str(), device_name.size() + 1),
+                  SyscallSucceedsWithValue(0));
+      char get_device[100];
+      socklen_t get_device_size = 100;
+      EXPECT_THAT(getsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, get_device,
+                             &get_device_size),
+                  SyscallSucceedsWithValue(0));
+    }
+
+    struct sockaddr_in addr = {};
+    addr.sin_family = AF_INET;
+    addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+    addr.sin_port = port_;
+    if (want == 0) {
+      ASSERT_THAT(
+          bind(socket_fd, reinterpret_cast<const struct sockaddr*>(&addr),
+               sizeof(addr)),
+          SyscallSucceeds());
+    } else {
+      ASSERT_THAT(
+          bind(socket_fd, reinterpret_cast<const struct sockaddr*>(&addr),
+               sizeof(addr)),
+          SyscallFailsWithErrno(want));
+    }
+
+    if (port_ == 0) {
+      // We don't yet know what port we'll be using so we need to fetch it and
+      // remember it for future commands.
+      socklen_t addr_size = sizeof(addr);
+      ASSERT_THAT(
+          getsockname(socket_fd, reinterpret_cast<struct sockaddr*>(&addr),
+                      &addr_size),
+          SyscallSucceeds());
+      port_ = addr.sin_port;
+    }
+  }
+
+ private:
+  SocketKind socket_factory_;
+  // devices maps from the device id in the test case to the name of the device.
+  absl::node_hash_map<int, string> devices_;
+  // These are the tunnels that were created for the test and will be destroyed
+  // by the destructor.
+  vector<std::unique_ptr<Tunnel>> tunnels_;
+  // A list of all interface names before the test started.
+  std::unordered_set<string> interface_names_;
+  // The next ethernet device to use when requested a device.
+  int next_unused_eth_ = 1;
+  // The port for all tests.  Originally 0 (any) and later set to the port that
+  // all further commands will use.
+  in_port_t port_ = 0;
+  // sockets_to_close_ is a map from action index to the socket that was
+  // created.
+  absl::node_hash_map<int,
+                      std::unique_ptr<gvisor::testing::FileDescriptor>>
+      sockets_to_close_;
+  int next_socket_id_ = 0;
+};
+
+TEST_P(BindToDeviceSequenceTest, BindTwiceWithDeviceFails) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 3));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 3, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindToDevice) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 1));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 2));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindToDeviceAndThenWithoutDevice) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindWithoutDevice) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindWithDevice) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123, 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 456, 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 789, 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindWithReuse) {
+  ASSERT_NO_FATAL_FAILURE(
+      BindSocket(/* reusePort */ true, /* reuse_addr */ false));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false,
+      /* bind_to_device */ 123));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 0));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindingWithReuseAndDevice) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 456));
+  ASSERT_NO_FATAL_FAILURE(
+      BindSocket(/* reuse_port */ true, /* reuse_addr */ false));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 789));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 999, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, MixingReuseAndNotReuseByBindingToDevice) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123, 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 456, 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 789, 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 999, 0));
+}
+
+TEST_P(BindToDeviceSequenceTest, CannotBindTo0AfterMixingReuseAndNotReuse) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 456));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindAndRelease) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123));
+  int to_release;
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, 0, &to_release));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 345, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 789));
+  // Release the bind to device 0 and try again.
+  ASSERT_NO_FATAL_FAILURE(ReleaseSocket(to_release));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 345));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindTwiceWithReuseOnce) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindWithReuseAddr) {
+  ASSERT_NO_FATAL_FAILURE(
+      BindSocket(/* reusePort */ false, /* reuse_addr */ true));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 123, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 123));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 0));
+}
+
+TEST_P(BindToDeviceSequenceTest,
+       CannotBindTo0AfterMixingReuseAddrAndNotReuseAddr) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 456));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReusePort) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReuseAddr) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReusePort) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ true, /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReuseAddr) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ true, /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindReusePortThenReuseAddrReusePort) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest, BindReuseAddrThenReuseAddr) {
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0, EADDRINUSE));
+}
+
+TEST_P(BindToDeviceSequenceTest,
+       BindReuseAddrThenReuseAddrReusePortThenReuseAddr) {
+  // The behavior described in this test seems like a Linux bug. It doesn't
+  // make any sense and it is unlikely that any applications rely on it.
+  //
+  // Both SO_REUSEADDR and SO_REUSEPORT allow binding multiple UDP sockets to
+  // the same address and deliver each packet to exactly one of the bound
+  // sockets. If both are enabled, one of the strategies is selected to route
+  // packets. The strategy is selected dynamically based on the settings of the
+  // currently bound sockets. Usually, the strategy is selected based on the
+  // common setting (SO_REUSEADDR or SO_REUSEPORT) amongst the sockets, but for
+  // some reason, Linux allows binding sets of sockets with no overlapping
+  // settings in some situations. In this case, it is not obvious which strategy
+  // would be selected as the configured setting is a contradiction.
+  SKIP_IF(IsRunningOnGvisor());
+
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ true,
+                                     /* bind_to_device */ 0));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 0));
+}
+
+// Repro test for gvisor.dev/issue/1217. Not replicated in ports_test.go as this
+// test is different from the others and wouldn't fit well there.
+TEST_P(BindToDeviceSequenceTest, BindAndReleaseDifferentDevice) {
+  int to_release;
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 3, 0, &to_release));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false,
+                                     /* reuse_addr */ false,
+                                     /* bind_to_device */ 3, EADDRINUSE));
+  // Change the device. Since the socket was already bound, this should have no
+  // effect.
+  SetDevice(to_release, 2);
+  // Release the bind to device 3 and try again.
+  ASSERT_NO_FATAL_FAILURE(ReleaseSocket(to_release));
+  ASSERT_NO_FATAL_FAILURE(BindSocket(
+      /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 3));
+}
+
+INSTANTIATE_TEST_SUITE_P(BindToDeviceTest, BindToDeviceSequenceTest,
+                         ::testing::Values(IPv4UDPUnboundSocket(0),
+                                           IPv4TCPUnboundSocket(0)));
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_bind_to_device_util.cc b/test/syscalls/linux/socket_bind_to_device_util.cc
new file mode 100644
index 000000000..f4ee775bd
--- /dev/null
+++ b/test/syscalls/linux/socket_bind_to_device_util.cc
@@ -0,0 +1,75 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_bind_to_device_util.h"
+
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+using std::string;
+
+PosixErrorOr<std::unique_ptr<Tunnel>> Tunnel::New(string tunnel_name) {
+  int fd;
+  RETURN_ERROR_IF_SYSCALL_FAIL(fd = open("/dev/net/tun", O_RDWR));
+
+  // Using `new` to access a non-public constructor.
+  auto new_tunnel = absl::WrapUnique(new Tunnel(fd));
+
+  ifreq ifr = {};
+  ifr.ifr_flags = IFF_TUN;
+  strncpy(ifr.ifr_name, tunnel_name.c_str(), sizeof(ifr.ifr_name));
+
+  RETURN_ERROR_IF_SYSCALL_FAIL(ioctl(fd, TUNSETIFF, &ifr));
+  new_tunnel->name_ = ifr.ifr_name;
+  return new_tunnel;
+}
+
+std::unordered_set<string> GetInterfaceNames() {
+  struct if_nameindex* interfaces = if_nameindex();
+  std::unordered_set<string> names;
+  if (interfaces == nullptr) {
+    return names;
+  }
+  for (auto interface = interfaces;
+       interface->if_index != 0 || interface->if_name != nullptr; interface++) {
+    names.insert(interface->if_name);
+  }
+  if_freenameindex(interfaces);
+  return names;
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_bind_to_device_util.h b/test/syscalls/linux/socket_bind_to_device_util.h
new file mode 100644
index 000000000..f941ccc86
--- /dev/null
+++ b/test/syscalls/linux/socket_bind_to_device_util.h
@@ -0,0 +1,67 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_SOCKET_BIND_TO_DEVICE_UTILS_H_
+#define GVISOR_TEST_SYSCALLS_SOCKET_BIND_TO_DEVICE_UTILS_H_
+
+#include <arpa/inet.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "absl/memory/memory.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+class Tunnel {
+ public:
+  static PosixErrorOr<std::unique_ptr<Tunnel>> New(
+      std::string tunnel_name = "");
+  const std::string& GetName() const { return name_; }
+
+  ~Tunnel() {
+    if (fd_ != -1) {
+      close(fd_);
+    }
+  }
+
+ private:
+  Tunnel(int fd) : fd_(fd) {}
+  int fd_ = -1;
+  std::string name_;
+};
+
+std::unordered_set<std::string> GetInterfaceNames();
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_SOCKET_BIND_TO_DEVICE_UTILS_H_
diff --git a/test/syscalls/linux/socket_blocking.cc b/test/syscalls/linux/socket_blocking.cc
new file mode 100644
index 000000000..7e88aa2d9
--- /dev/null
+++ b/test/syscalls/linux/socket_blocking.cc
@@ -0,0 +1,60 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_blocking.h"
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <cstdio>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(BlockingSocketPairTest, RecvBlocks) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[100];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  constexpr auto kDuration = absl::Milliseconds(200);
+  auto before = Now(CLOCK_MONOTONIC);
+
+  const ScopedThread t([&]() {
+    absl::SleepFor(kDuration);
+    ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+                SyscallSucceedsWithValue(sizeof(sent_data)));
+  });
+
+  char received_data[sizeof(sent_data)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  auto after = Now(CLOCK_MONOTONIC);
+  EXPECT_GE(after - before, kDuration);
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_blocking.h b/test/syscalls/linux/socket_blocking.h
new file mode 100644
index 000000000..db26e5ef5
--- /dev/null
+++ b/test/syscalls/linux/socket_blocking.h
@@ -0,0 +1,29 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_BLOCKING_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_BLOCKING_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of blocking connected sockets.
+using BlockingSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_BLOCKING_H_
diff --git a/test/syscalls/linux/socket_capability.cc b/test/syscalls/linux/socket_capability.cc
new file mode 100644
index 000000000..84b5b2b21
--- /dev/null
+++ b/test/syscalls/linux/socket_capability.cc
@@ -0,0 +1,61 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Subset of socket tests that need Linux-specific headers (compared to POSIX
+// headers).
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST(SocketTest, UnixConnectNeedsWritePerm) {
+  SKIP_IF(IsRunningWithVFS1());
+
+  FileDescriptor bound =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX));
+
+  struct sockaddr_un addr =
+      ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX));
+  ASSERT_THAT(bind(bound.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                   sizeof(addr)),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(bound.get(), 1), SyscallSucceeds());
+
+  // Drop capabilites that allow us to override permision checks. Otherwise if
+  // the test is run as root, the connect below will bypass permission checks
+  // and succeed unexpectedly.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+  // Connect should fail without write perms.
+  ASSERT_THAT(chmod(addr.sun_path, 0500), SyscallSucceeds());
+  FileDescriptor client =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX));
+  ASSERT_THAT(connect(client.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                      sizeof(addr)),
+              SyscallFailsWithErrno(EACCES));
+
+  // Connect should succeed with write perms.
+  ASSERT_THAT(chmod(addr.sun_path, 0200), SyscallSucceeds());
+  EXPECT_THAT(connect(client.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                      sizeof(addr)),
+              SyscallSucceeds());
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_filesystem.cc b/test/syscalls/linux/socket_filesystem.cc
new file mode 100644
index 000000000..287359363
--- /dev/null
+++ b/test/syscalls/linux/socket_filesystem.cc
@@ -0,0 +1,49 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix.h"
+#include "test/syscalls/linux/socket_unix_cmsg.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return ApplyVec<SocketPairKind>(
+      FilesystemBoundUnixDomainSocketPair,
+      AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+                             List<int>{0, SOCK_NONBLOCK}));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    FilesystemUnixSockets, AllSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    FilesystemUnixSockets, UnixSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    FilesystemUnixSockets, UnixSocketPairCmsgTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc
new file mode 100644
index 000000000..f7d6139f1
--- /dev/null
+++ b/test/syscalls/linux/socket_generic.cc
@@ -0,0 +1,820 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_generic.h"
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+// This file is a generic socket test file. It must be built with another file
+// that provides the test types.
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(AllSocketPairTest, BasicReadWrite) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char buf[20];
+  const std::string data = "abc";
+  ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), 3),
+              SyscallSucceedsWithValue(3));
+  ASSERT_THAT(ReadFd(sockets->second_fd(), buf, 3),
+              SyscallSucceedsWithValue(3));
+  EXPECT_EQ(data, absl::string_view(buf, 3));
+}
+
+TEST_P(AllSocketPairTest, BasicSendRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data)];
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(AllSocketPairTest, BasicSendmmsg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[200];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  std::vector<struct mmsghdr> msgs(10);
+  std::vector<struct iovec> iovs(msgs.size());
+  const int chunk_size = sizeof(sent_data) / msgs.size();
+  for (size_t i = 0; i < msgs.size(); i++) {
+    iovs[i].iov_len = chunk_size;
+    iovs[i].iov_base = &sent_data[i * chunk_size];
+    msgs[i].msg_hdr.msg_iov = &iovs[i];
+    msgs[i].msg_hdr.msg_iovlen = 1;
+  }
+
+  ASSERT_THAT(
+      RetryEINTR(sendmmsg)(sockets->first_fd(), &msgs[0], msgs.size(), 0),
+      SyscallSucceedsWithValue(msgs.size()));
+
+  for (const struct mmsghdr& msg : msgs) {
+    EXPECT_EQ(chunk_size, msg.msg_len);
+  }
+
+  char received_data[sizeof(sent_data)];
+  for (size_t i = 0; i < msgs.size(); i++) {
+    ASSERT_THAT(ReadFd(sockets->second_fd(), &received_data[i * chunk_size],
+                       chunk_size),
+                SyscallSucceedsWithValue(chunk_size));
+  }
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(AllSocketPairTest, BasicRecvmmsg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[200];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  char received_data[sizeof(sent_data)];
+  std::vector<struct mmsghdr> msgs(10);
+  std::vector<struct iovec> iovs(msgs.size());
+  const int chunk_size = sizeof(sent_data) / msgs.size();
+  for (size_t i = 0; i < msgs.size(); i++) {
+    iovs[i].iov_len = chunk_size;
+    iovs[i].iov_base = &received_data[i * chunk_size];
+    msgs[i].msg_hdr.msg_iov = &iovs[i];
+    msgs[i].msg_hdr.msg_iovlen = 1;
+  }
+
+  for (size_t i = 0; i < msgs.size(); i++) {
+    ASSERT_THAT(
+        WriteFd(sockets->first_fd(), &sent_data[i * chunk_size], chunk_size),
+        SyscallSucceedsWithValue(chunk_size));
+  }
+
+  ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->second_fd(), &msgs[0], msgs.size(),
+                                   0, nullptr),
+              SyscallSucceedsWithValue(msgs.size()));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  for (const struct mmsghdr& msg : msgs) {
+    EXPECT_EQ(chunk_size, msg.msg_len);
+  }
+}
+
+TEST_P(AllSocketPairTest, SendmsgRecvmsg10KB) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  std::vector<char> sent_data(10 * 1024);
+  RandomizeBuffer(sent_data.data(), sent_data.size());
+  ASSERT_NO_FATAL_FAILURE(
+      SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size()));
+
+  std::vector<char> received_data(sent_data.size());
+  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->second_fd(), received_data.data(),
+                                     received_data.size()));
+
+  EXPECT_EQ(0,
+            memcmp(sent_data.data(), received_data.data(), sent_data.size()));
+}
+
+// This test validates that a sendmsg/recvmsg w/ MSG_CTRUNC is a no-op on
+// input flags.
+TEST_P(AllSocketPairTest, SendmsgRecvmsgMsgCtruncNoop) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  std::vector<char> sent_data(10 * 1024);
+  RandomizeBuffer(sent_data.data(), sent_data.size());
+  ASSERT_NO_FATAL_FAILURE(
+      SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size()));
+
+  std::vector<char> received_data(sent_data.size());
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(struct ucred))];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct iovec iov;
+  iov.iov_base = &received_data[0];
+  iov.iov_len = received_data.size();
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  // MSG_CTRUNC should be a no-op.
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CTRUNC),
+              SyscallSucceedsWithValue(received_data.size()));
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  EXPECT_EQ(cmsg, nullptr);
+  EXPECT_EQ(msg.msg_controllen, 0);
+  EXPECT_EQ(0,
+            memcmp(sent_data.data(), received_data.data(), sent_data.size()));
+}
+
+TEST_P(AllSocketPairTest, SendmsgRecvmsg16KB) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  std::vector<char> sent_data(16 * 1024);
+  RandomizeBuffer(sent_data.data(), sent_data.size());
+  ASSERT_NO_FATAL_FAILURE(
+      SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size()));
+
+  std::vector<char> received_data(sent_data.size());
+  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->second_fd(), received_data.data(),
+                                     received_data.size()));
+
+  EXPECT_EQ(0,
+            memcmp(sent_data.data(), received_data.data(), sent_data.size()));
+}
+
+TEST_P(AllSocketPairTest, RecvmsgMsghdrFlagsNotClearedOnFailure) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char received_data[10] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+
+  // Check that msghdr flags were not changed.
+  EXPECT_EQ(msg.msg_flags, -1);
+}
+
+TEST_P(AllSocketPairTest, RecvmsgMsghdrFlagsCleared) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data)] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(sent_data)));
+
+  // Check that msghdr flags were cleared.
+  EXPECT_EQ(msg.msg_flags, 0);
+}
+
+TEST_P(AllSocketPairTest, RecvmsgPeekMsghdrFlagsCleared) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data)] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_PEEK),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(sent_data)));
+
+  // Check that msghdr flags were cleared.
+  EXPECT_EQ(msg.msg_flags, 0);
+}
+
+TEST_P(AllSocketPairTest, RecvmsgIovNotUpdated) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data) * 2] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(sent_data)));
+
+  // Check that the iovec length was not updated.
+  EXPECT_EQ(msg.msg_iov->iov_len, sizeof(received_data));
+}
+
+TEST_P(AllSocketPairTest, RecvmmsgInvalidTimeout) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char buf[10];
+  struct mmsghdr msg = {};
+  struct iovec iov = {};
+  iov.iov_len = sizeof(buf);
+  iov.iov_base = buf;
+  msg.msg_hdr.msg_iov = &iov;
+  msg.msg_hdr.msg_iovlen = 1;
+  struct timespec timeout = {-1, -1};
+  ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->first_fd(), &msg, 1, 0, &timeout),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, RecvmmsgTimeoutBeforeRecv) {
+  // There is a known bug in the Linux recvmmsg(2) causing it to block forever
+  // if the timeout expires while blocking for the first message.
+  SKIP_IF(!IsRunningOnGvisor());
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char buf[10];
+  struct mmsghdr msg = {};
+  struct iovec iov = {};
+  iov.iov_len = sizeof(buf);
+  iov.iov_base = buf;
+  msg.msg_hdr.msg_iov = &iov;
+  msg.msg_hdr.msg_iovlen = 1;
+  struct timespec timeout = {};
+  ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->first_fd(), &msg, 1, 0, &timeout),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, MsgPeek) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[50];
+  memset(&sent_data, 0, sizeof(sent_data));
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data)];
+  for (int i = 0; i < 3; i++) {
+    memset(received_data, 0, sizeof(received_data));
+    EXPECT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                                 sizeof(received_data), MSG_PEEK),
+                SyscallSucceedsWithValue(sizeof(received_data)));
+    EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+  }
+
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+}
+
+TEST_P(AllSocketPairTest, LingerSocketOption) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  struct linger got_linger = {-1, -1};
+  socklen_t length = sizeof(struct linger);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
+                         &got_linger, &length),
+              SyscallSucceedsWithValue(0));
+  struct linger want_linger = {};
+  EXPECT_EQ(0, memcmp(&want_linger, &got_linger, sizeof(struct linger)));
+  EXPECT_EQ(sizeof(struct linger), length);
+}
+
+TEST_P(AllSocketPairTest, KeepAliveSocketOption) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  int keepalive = -1;
+  socklen_t length = sizeof(int);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE,
+                         &keepalive, &length),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(0, keepalive);
+  EXPECT_EQ(sizeof(int), length);
+}
+
+TEST_P(AllSocketPairTest, RcvBufSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  int size = 0;
+  socklen_t size_size = sizeof(size);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &size, &size_size),
+      SyscallSucceeds());
+  EXPECT_GT(size, 0);
+}
+
+TEST_P(AllSocketPairTest, SndBufSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  int size = 0;
+  socklen_t size_size = sizeof(size);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &size, &size_size),
+      SyscallSucceeds());
+  EXPECT_GT(size, 0);
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutReadSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 10
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  char buf[20] = {};
+  EXPECT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutRecvSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 10
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  char buf[20] = {};
+  EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutRecvOneSecondSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 1, .tv_usec = 0
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  char buf[20] = {};
+  EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutRecvmsgSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 10
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  struct msghdr msg = {};
+  char buf[20] = {};
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, SendTimeoutDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  timeval actual_tv = {.tv_sec = -1, .tv_usec = -1};
+  socklen_t len = sizeof(actual_tv);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO,
+                         &actual_tv, &len),
+              SyscallSucceeds());
+  EXPECT_EQ(actual_tv.tv_sec, 0);
+  EXPECT_EQ(actual_tv.tv_usec, 0);
+}
+
+TEST_P(AllSocketPairTest, SetGetSendTimeout) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  timeval tv = {.tv_sec = 89, .tv_usec = 42000};
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  timeval actual_tv = {};
+  socklen_t len = sizeof(actual_tv);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO,
+                         &actual_tv, &len),
+              SyscallSucceeds());
+  EXPECT_EQ(actual_tv.tv_sec, 89);
+  EXPECT_EQ(actual_tv.tv_usec, 42000);
+}
+
+TEST_P(AllSocketPairTest, SetGetSendTimeoutLargerArg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval_with_extra {
+    struct timeval tv;
+    int64_t extra_data;
+  } ABSL_ATTRIBUTE_PACKED;
+
+  timeval_with_extra tv_extra = {
+      .tv = {.tv_sec = 0, .tv_usec = 123000},
+  };
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO,
+                         &tv_extra, sizeof(tv_extra)),
+              SyscallSucceeds());
+
+  timeval_with_extra actual_tv = {};
+  socklen_t len = sizeof(actual_tv);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO,
+                         &actual_tv, &len),
+              SyscallSucceeds());
+  EXPECT_EQ(actual_tv.tv.tv_sec, 0);
+  EXPECT_EQ(actual_tv.tv.tv_usec, 123000);
+}
+
+TEST_P(AllSocketPairTest, SendTimeoutAllowsWrite) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 10
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  char buf[20] = {};
+  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST_P(AllSocketPairTest, SendTimeoutAllowsSend) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 10
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  char buf[20] = {};
+  ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST_P(AllSocketPairTest, SendTimeoutAllowsSendmsg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 10
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  char buf[20] = {};
+  ASSERT_NO_FATAL_FAILURE(SendNullCmsg(sockets->first_fd(), buf, sizeof(buf)));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  timeval actual_tv = {.tv_sec = -1, .tv_usec = -1};
+  socklen_t len = sizeof(actual_tv);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO,
+                         &actual_tv, &len),
+              SyscallSucceeds());
+  EXPECT_EQ(actual_tv.tv_sec, 0);
+  EXPECT_EQ(actual_tv.tv_usec, 0);
+}
+
+TEST_P(AllSocketPairTest, SetGetRecvTimeout) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  timeval tv = {.tv_sec = 123, .tv_usec = 456000};
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  timeval actual_tv = {};
+  socklen_t len = sizeof(actual_tv);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO,
+                         &actual_tv, &len),
+              SyscallSucceeds());
+  EXPECT_EQ(actual_tv.tv_sec, 123);
+  EXPECT_EQ(actual_tv.tv_usec, 456000);
+}
+
+TEST_P(AllSocketPairTest, SetGetRecvTimeoutLargerArg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval_with_extra {
+    struct timeval tv;
+    int64_t extra_data;
+  } ABSL_ATTRIBUTE_PACKED;
+
+  timeval_with_extra tv_extra = {
+      .tv = {.tv_sec = 0, .tv_usec = 432000},
+  };
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO,
+                         &tv_extra, sizeof(tv_extra)),
+              SyscallSucceeds());
+
+  timeval_with_extra actual_tv = {};
+  socklen_t len = sizeof(actual_tv);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO,
+                         &actual_tv, &len),
+              SyscallSucceeds());
+  EXPECT_EQ(actual_tv.tv.tv_sec, 0);
+  EXPECT_EQ(actual_tv.tv.tv_usec, 432000);
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutRecvmsgOneSecondSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 1, .tv_usec = 0
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  struct msghdr msg = {};
+  char buf[20] = {};
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutUsecTooLarge) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 2000000  // 2 seconds.
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallFailsWithErrno(EDOM));
+}
+
+TEST_P(AllSocketPairTest, SendTimeoutUsecTooLarge) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 2000000  // 2 seconds.
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
+      SyscallFailsWithErrno(EDOM));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutUsecNeg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = -1
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallFailsWithErrno(EDOM));
+}
+
+TEST_P(AllSocketPairTest, SendTimeoutUsecNeg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = -1
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
+      SyscallFailsWithErrno(EDOM));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutNegSecRead) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = -1, .tv_usec = 0
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  char buf[20] = {};
+  EXPECT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutNegSecRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = -1, .tv_usec = 0
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  char buf[20] = {};
+  EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutNegSecRecvmsg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = -1, .tv_usec = 0
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  struct msghdr msg = {};
+  char buf[20] = {};
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvWaitAll) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[100];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), MSG_WAITALL),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(AllSocketPairTest, RecvWaitAllDontWait) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char data[100] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), data, sizeof(data),
+                               MSG_WAITALL | MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutWaitAll) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 200000  // 200ms
+  };
+  EXPECT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv,
+                         sizeof(tv)),
+              SyscallSucceeds());
+
+  char sent_data[100];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data) * 2] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), MSG_WAITALL),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(AllSocketPairTest, GetSockoptType) {
+  int type = GetParam().type;
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  for (const int fd : {sockets->first_fd(), sockets->second_fd()}) {
+    int opt;
+    socklen_t optlen = sizeof(opt);
+    EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_TYPE, &opt, &optlen),
+                SyscallSucceeds());
+
+    // Type may have SOCK_NONBLOCK and SOCK_CLOEXEC ORed into it. Remove these
+    // before comparison.
+    type &= ~(SOCK_NONBLOCK | SOCK_CLOEXEC);
+    EXPECT_EQ(opt, type) << absl::StrFormat(
+        "getsockopt(%d, SOL_SOCKET, SO_TYPE, &opt, &optlen) => opt=%d was "
+        "unexpected",
+        fd, opt);
+  }
+}
+
+TEST_P(AllSocketPairTest, GetSockoptDomain) {
+  const int domain = GetParam().domain;
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  for (const int fd : {sockets->first_fd(), sockets->second_fd()}) {
+    int opt;
+    socklen_t optlen = sizeof(opt);
+    EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &opt, &optlen),
+                SyscallSucceeds());
+    EXPECT_EQ(opt, domain) << absl::StrFormat(
+        "getsockopt(%d, SOL_SOCKET, SO_DOMAIN, &opt, &optlen) => opt=%d was "
+        "unexpected",
+        fd, opt);
+  }
+}
+
+TEST_P(AllSocketPairTest, GetSockoptProtocol) {
+  const int protocol = GetParam().protocol;
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  for (const int fd : {sockets->first_fd(), sockets->second_fd()}) {
+    int opt;
+    socklen_t optlen = sizeof(opt);
+    EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &opt, &optlen),
+                SyscallSucceeds());
+    EXPECT_EQ(opt, protocol) << absl::StrFormat(
+        "getsockopt(%d, SOL_SOCKET, SO_PROTOCOL, &opt, &optlen) => opt=%d was "
+        "unexpected",
+        fd, opt);
+  }
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_generic.h b/test/syscalls/linux/socket_generic.h
new file mode 100644
index 000000000..00ae7bfc3
--- /dev/null
+++ b/test/syscalls/linux/socket_generic.h
@@ -0,0 +1,30 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of blocking and non-blocking
+// connected stream sockets.
+using AllSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_
diff --git a/test/syscalls/linux/socket_generic_stress.cc b/test/syscalls/linux/socket_generic_stress.cc
new file mode 100644
index 000000000..6a232238d
--- /dev/null
+++ b/test/syscalls/linux/socket_generic_stress.cc
@@ -0,0 +1,83 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected sockets.
+using ConnectStressTest = SocketPairTest;
+
+TEST_P(ConnectStressTest, Reset65kTimes) {
+  for (int i = 0; i < 1 << 16; ++i) {
+    auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+    // Send some data to ensure that the connection gets reset and the port gets
+    // released immediately. This avoids either end entering TIME-WAIT.
+    char sent_data[100] = {};
+    ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+                SyscallSucceedsWithValue(sizeof(sent_data)));
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllConnectedSockets, ConnectStressTest,
+    ::testing::Values(IPv6UDPBidirectionalBindSocketPair(0),
+                      IPv4UDPBidirectionalBindSocketPair(0),
+                      DualStackUDPBidirectionalBindSocketPair(0),
+
+                      // Without REUSEADDR, we get port exhaustion on Linux.
+                      SetSockOpt(SOL_SOCKET, SO_REUSEADDR,
+                                 &kSockOptOn)(IPv6TCPAcceptBindSocketPair(0)),
+                      SetSockOpt(SOL_SOCKET, SO_REUSEADDR,
+                                 &kSockOptOn)(IPv4TCPAcceptBindSocketPair(0)),
+                      SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)(
+                          DualStackTCPAcceptBindSocketPair(0))));
+
+// Test fixture for tests that apply to pairs of connected sockets created with
+// a persistent listener (if applicable).
+using PersistentListenerConnectStressTest = SocketPairTest;
+
+TEST_P(PersistentListenerConnectStressTest, 65kTimes) {
+  for (int i = 0; i < 1 << 16; ++i) {
+    auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllConnectedSockets, PersistentListenerConnectStressTest,
+    ::testing::Values(
+        IPv6UDPBidirectionalBindSocketPair(0),
+        IPv4UDPBidirectionalBindSocketPair(0),
+        DualStackUDPBidirectionalBindSocketPair(0),
+
+        // Without REUSEADDR, we get port exhaustion on Linux.
+        SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)(
+            IPv6TCPAcceptBindPersistentListenerSocketPair(0)),
+        SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)(
+            IPv4TCPAcceptBindPersistentListenerSocketPair(0)),
+        SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)(
+            DualStackTCPAcceptBindPersistentListenerSocketPair(0))));
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
new file mode 100644
index 000000000..18b9e4b70
--- /dev/null
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -0,0 +1,2566 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <poll.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include <atomic>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/save_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::Gt;
+
+PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) {
+  switch (family) {
+    case AF_INET:
+      return static_cast<uint16_t>(
+          reinterpret_cast<sockaddr_in const*>(&addr)->sin_port);
+    case AF_INET6:
+      return static_cast<uint16_t>(
+          reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port);
+    default:
+      return PosixError(EINVAL,
+                        absl::StrCat("unknown socket family: ", family));
+  }
+}
+
+PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) {
+  switch (family) {
+    case AF_INET:
+      reinterpret_cast<sockaddr_in*>(addr)->sin_port = port;
+      return NoError();
+    case AF_INET6:
+      reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port;
+      return NoError();
+    default:
+      return PosixError(EINVAL,
+                        absl::StrCat("unknown socket family: ", family));
+  }
+}
+
+struct TestParam {
+  TestAddress listener;
+  TestAddress connector;
+};
+
+std::string DescribeTestParam(::testing::TestParamInfo<TestParam> const& info) {
+  return absl::StrCat("Listen", info.param.listener.description, "_Connect",
+                      info.param.connector.description);
+}
+
+using SocketInetLoopbackTest = ::testing::TestWithParam<TestParam>;
+
+TEST(BadSocketPairArgs, ValidateErrForBadCallsToSocketPair) {
+  int fd[2] = {};
+
+  // Valid AF but invalid for socketpair(2) return ESOCKTNOSUPPORT.
+  ASSERT_THAT(socketpair(AF_INET, 0, 0, fd),
+              SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+  ASSERT_THAT(socketpair(AF_INET6, 0, 0, fd),
+              SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+
+  // Invalid AF will return ENOAFSUPPORT.
+  ASSERT_THAT(socketpair(AF_MAX, 0, 0, fd),
+              SyscallFailsWithErrno(EAFNOSUPPORT));
+  ASSERT_THAT(socketpair(8675309, 0, 0, fd),
+              SyscallFailsWithErrno(EAFNOSUPPORT));
+}
+
+enum class Operation {
+  Bind,
+  Connect,
+  SendTo,
+};
+
+std::string OperationToString(Operation operation) {
+  switch (operation) {
+    case Operation::Bind:
+      return "Bind";
+    case Operation::Connect:
+      return "Connect";
+    case Operation::SendTo:
+      return "SendTo";
+  }
+}
+
+using OperationSequence = std::vector<Operation>;
+
+using DualStackSocketTest =
+    ::testing::TestWithParam<std::tuple<TestAddress, OperationSequence>>;
+
+TEST_P(DualStackSocketTest, AddressOperations) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_DGRAM, 0));
+
+  const TestAddress& addr = std::get<0>(GetParam());
+  const OperationSequence& operations = std::get<1>(GetParam());
+
+  auto addr_in = reinterpret_cast<const sockaddr*>(&addr.addr);
+
+  // sockets may only be bound once. Both `connect` and `sendto` cause a socket
+  // to be bound.
+  bool bound = false;
+  for (const Operation& operation : operations) {
+    bool sockname = false;
+    bool peername = false;
+    switch (operation) {
+      case Operation::Bind: {
+        ASSERT_NO_ERRNO(SetAddrPort(
+            addr.family(), const_cast<sockaddr_storage*>(&addr.addr), 0));
+
+        int bind_ret = bind(fd.get(), addr_in, addr.addr_len);
+
+        // Dual stack sockets may only be bound to AF_INET6.
+        if (!bound && addr.family() == AF_INET6) {
+          EXPECT_THAT(bind_ret, SyscallSucceeds());
+          bound = true;
+
+          sockname = true;
+        } else {
+          EXPECT_THAT(bind_ret, SyscallFailsWithErrno(EINVAL));
+        }
+        break;
+      }
+      case Operation::Connect: {
+        ASSERT_NO_ERRNO(SetAddrPort(
+            addr.family(), const_cast<sockaddr_storage*>(&addr.addr), 1337));
+
+        EXPECT_THAT(RetryEINTR(connect)(fd.get(), addr_in, addr.addr_len),
+                    SyscallSucceeds())
+            << GetAddrStr(addr_in);
+        bound = true;
+
+        sockname = true;
+        peername = true;
+
+        break;
+      }
+      case Operation::SendTo: {
+        const char payload[] = "hello";
+        ASSERT_NO_ERRNO(SetAddrPort(
+            addr.family(), const_cast<sockaddr_storage*>(&addr.addr), 1337));
+
+        ssize_t sendto_ret = sendto(fd.get(), &payload, sizeof(payload), 0,
+                                    addr_in, addr.addr_len);
+
+        EXPECT_THAT(sendto_ret, SyscallSucceedsWithValue(sizeof(payload)));
+        sockname = !bound;
+        bound = true;
+        break;
+      }
+    }
+
+    if (sockname) {
+      sockaddr_storage sock_addr;
+      socklen_t addrlen = sizeof(sock_addr);
+      ASSERT_THAT(getsockname(fd.get(), reinterpret_cast<sockaddr*>(&sock_addr),
+                              &addrlen),
+                  SyscallSucceeds());
+      ASSERT_EQ(addrlen, sizeof(struct sockaddr_in6));
+
+      auto sock_addr_in6 = reinterpret_cast<const sockaddr_in6*>(&sock_addr);
+
+      if (operation == Operation::SendTo) {
+        EXPECT_EQ(sock_addr_in6->sin6_family, AF_INET6);
+        EXPECT_TRUE(IN6_IS_ADDR_UNSPECIFIED(sock_addr_in6->sin6_addr.s6_addr32))
+            << OperationToString(operation) << " getsocknam="
+            << GetAddrStr(reinterpret_cast<sockaddr*>(&sock_addr));
+
+        EXPECT_NE(sock_addr_in6->sin6_port, 0);
+      } else if (IN6_IS_ADDR_V4MAPPED(
+                     reinterpret_cast<const sockaddr_in6*>(addr_in)
+                         ->sin6_addr.s6_addr32)) {
+        EXPECT_TRUE(IN6_IS_ADDR_V4MAPPED(sock_addr_in6->sin6_addr.s6_addr32))
+            << OperationToString(operation) << " getsocknam="
+            << GetAddrStr(reinterpret_cast<sockaddr*>(&sock_addr));
+      }
+    }
+
+    if (peername) {
+      sockaddr_storage peer_addr;
+      socklen_t addrlen = sizeof(peer_addr);
+      ASSERT_THAT(getpeername(fd.get(), reinterpret_cast<sockaddr*>(&peer_addr),
+                              &addrlen),
+                  SyscallSucceeds());
+      ASSERT_EQ(addrlen, sizeof(struct sockaddr_in6));
+
+      if (addr.family() == AF_INET ||
+          IN6_IS_ADDR_V4MAPPED(reinterpret_cast<const sockaddr_in6*>(addr_in)
+                                   ->sin6_addr.s6_addr32)) {
+        EXPECT_TRUE(IN6_IS_ADDR_V4MAPPED(
+            reinterpret_cast<const sockaddr_in6*>(&peer_addr)
+                ->sin6_addr.s6_addr32))
+            << OperationToString(operation) << " getpeername="
+            << GetAddrStr(reinterpret_cast<sockaddr*>(&peer_addr));
+      }
+    }
+  }
+}
+
+// TODO(gvisor.dev/issue/1556): uncomment V4MappedAny.
+INSTANTIATE_TEST_SUITE_P(
+    All, DualStackSocketTest,
+    ::testing::Combine(
+        ::testing::Values(V4Any(), V4Loopback(), /*V4MappedAny(),*/
+                          V4MappedLoopback(), V6Any(), V6Loopback()),
+        ::testing::ValuesIn<OperationSequence>(
+            {{Operation::Bind, Operation::Connect, Operation::SendTo},
+             {Operation::Bind, Operation::SendTo, Operation::Connect},
+             {Operation::Connect, Operation::Bind, Operation::SendTo},
+             {Operation::Connect, Operation::SendTo, Operation::Bind},
+             {Operation::SendTo, Operation::Bind, Operation::Connect},
+             {Operation::SendTo, Operation::Connect, Operation::Bind}})),
+    [](::testing::TestParamInfo<
+        std::tuple<TestAddress, OperationSequence>> const& info) {
+      const TestAddress& addr = std::get<0>(info.param);
+      const OperationSequence& operations = std::get<1>(info.param);
+      std::string s = addr.description;
+      for (const Operation& operation : operations) {
+        absl::StrAppend(&s, OperationToString(operation));
+      }
+      return s;
+    });
+
+void tcpSimpleConnectTest(TestAddress const& listener,
+                          TestAddress const& connector, bool unbound) {
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  if (!unbound) {
+    ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                     listener.addr_len),
+                SyscallSucceeds());
+  }
+  ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Connect to the listening socket.
+  const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  connector.addr_len),
+              SyscallSucceeds());
+
+  // Accept the connection.
+  //
+  // We have to assign a name to the accepted socket, as unamed temporary
+  // objects are destructed upon full evaluation of the expression it is in,
+  // potentially causing the connecting socket to fail to shutdown properly.
+  auto accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+
+  ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RDWR), SyscallSucceeds());
+
+  ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds());
+}
+
+TEST_P(SocketInetLoopbackTest, TCP) {
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  tcpSimpleConnectTest(listener, connector, true);
+}
+
+TEST_P(SocketInetLoopbackTest, TCPListenUnbound) {
+  auto const& param = GetParam();
+
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  tcpSimpleConnectTest(listener, connector, false);
+}
+
+TEST_P(SocketInetLoopbackTest, TCPListenShutdownListen) {
+  const auto& param = GetParam();
+
+  const TestAddress& listener = param.listener;
+  const TestAddress& connector = param.connector;
+
+  constexpr int kBacklog = 5;
+
+  // Create the listening socket.
+  FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
+  ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RD), SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+  const uint16_t port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+
+  for (int i = 0; i < kBacklog; i++) {
+    auto client = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+    ASSERT_THAT(RetryEINTR(connect)(client.get(),
+                                    reinterpret_cast<sockaddr*>(&conn_addr),
+                                    connector.addr_len),
+                SyscallSucceeds());
+  }
+  for (int i = 0; i < kBacklog; i++) {
+    ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), SyscallSucceeds());
+  }
+}
+
+TEST_P(SocketInetLoopbackTest, TCPListenShutdown) {
+  auto const& param = GetParam();
+
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  constexpr int kBacklog = 2;
+  constexpr int kFDs = kBacklog + 1;
+
+  // Create the listening socket.
+  FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+
+  // Shutdown the write of the listener, expect to not have any effect.
+  ASSERT_THAT(shutdown(listen_fd.get(), SHUT_WR), SyscallSucceeds());
+
+  for (int i = 0; i < kFDs; i++) {
+    auto client = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+    ASSERT_THAT(RetryEINTR(connect)(client.get(),
+                                    reinterpret_cast<sockaddr*>(&conn_addr),
+                                    connector.addr_len),
+                SyscallSucceeds());
+    ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), SyscallSucceeds());
+  }
+
+  // Shutdown the read of the listener, expect to fail subsequent
+  // server accepts, binds and client connects.
+  ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RD), SyscallSucceeds());
+
+  ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Check that shutdown did not release the port.
+  FileDescriptor new_listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  ASSERT_THAT(
+      bind(new_listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+           listener.addr_len),
+      SyscallFailsWithErrno(EADDRINUSE));
+
+  // Check that subsequent connection attempts receive a RST.
+  auto client = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  for (int i = 0; i < kFDs; i++) {
+    auto client = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+    ASSERT_THAT(RetryEINTR(connect)(client.get(),
+                                    reinterpret_cast<sockaddr*>(&conn_addr),
+                                    connector.addr_len),
+                SyscallFailsWithErrno(ECONNREFUSED));
+  }
+}
+
+TEST_P(SocketInetLoopbackTest, TCPListenClose) {
+  auto const& param = GetParam();
+
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  constexpr int kAcceptCount = 2;
+  constexpr int kBacklog = kAcceptCount + 2;
+  constexpr int kFDs = kBacklog * 3;
+
+  // Create the listening socket.
+  FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  std::vector<FileDescriptor> clients;
+  for (int i = 0; i < kFDs; i++) {
+    auto client = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+    int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+                      connector.addr_len);
+    if (ret != 0) {
+      EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS));
+    }
+    clients.push_back(std::move(client));
+  }
+  for (int i = 0; i < kAcceptCount; i++) {
+    auto accepted =
+        ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+  }
+}
+
+void TestListenWhileConnect(const TestParam& param,
+                            void (*stopListen)(FileDescriptor&)) {
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  constexpr int kBacklog = 2;
+  constexpr int kClients = kBacklog + 1;
+
+  // Create the listening socket.
+  FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  std::vector<FileDescriptor> clients;
+  for (int i = 0; i < kClients; i++) {
+    FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+    int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+                      connector.addr_len);
+    if (ret != 0) {
+      EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS));
+      clients.push_back(std::move(client));
+    }
+  }
+
+  stopListen(listen_fd);
+
+  for (auto& client : clients) {
+    const int kTimeout = 10000;
+    struct pollfd pfd = {
+        .fd = client.get(),
+        .events = POLLIN,
+    };
+    // When the listening socket is closed, then we expect the remote to reset
+    // the connection.
+    ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
+    ASSERT_EQ(pfd.revents, POLLIN | POLLHUP | POLLERR);
+    char c;
+    // Subsequent read can fail with:
+    // ECONNRESET: If the client connection was established and was reset by the
+    // remote.
+    // ECONNREFUSED: If the client connection failed to be established.
+    ASSERT_THAT(read(client.get(), &c, sizeof(c)),
+                AnyOf(SyscallFailsWithErrno(ECONNRESET),
+                      SyscallFailsWithErrno(ECONNREFUSED)));
+  }
+}
+
+TEST_P(SocketInetLoopbackTest, TCPListenCloseWhileConnect) {
+  TestListenWhileConnect(GetParam(), [](FileDescriptor& f) {
+    ASSERT_THAT(close(f.release()), SyscallSucceeds());
+  });
+}
+
+TEST_P(SocketInetLoopbackTest, TCPListenShutdownWhileConnect) {
+  TestListenWhileConnect(GetParam(), [](FileDescriptor& f) {
+    ASSERT_THAT(shutdown(f.get(), SHUT_RD), SyscallSucceeds());
+  });
+}
+
+TEST_P(SocketInetLoopbackTest, TCPbacklog) {
+  auto const& param = GetParam();
+
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), 2), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+  int i = 0;
+  while (1) {
+    int ret;
+
+    // Connect to the listening socket.
+    const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+    sockaddr_storage conn_addr = connector.addr;
+    ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+    ret = connect(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+                  connector.addr_len);
+    if (ret != 0) {
+      EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS));
+      struct pollfd pfd = {
+          .fd = conn_fd.get(),
+          .events = POLLOUT,
+      };
+      ret = poll(&pfd, 1, 3000);
+      if (ret == 0) break;
+      EXPECT_THAT(ret, SyscallSucceedsWithValue(1));
+    }
+    EXPECT_THAT(RetryEINTR(send)(conn_fd.get(), &i, sizeof(i), 0),
+                SyscallSucceedsWithValue(sizeof(i)));
+    ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds());
+    i++;
+  }
+
+  for (; i != 0; i--) {
+    // Accept the connection.
+    //
+    // We have to assign a name to the accepted socket, as unamed temporary
+    // objects are destructed upon full evaluation of the expression it is in,
+    // potentially causing the connecting socket to fail to shutdown properly.
+    auto accepted =
+        ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+  }
+}
+
+// TCPFinWait2Test creates a pair of connected sockets then closes one end to
+// trigger FIN_WAIT2 state for the closed endpoint. Then it binds the same local
+// IP/port on a new socket and tries to connect. The connect should fail w/
+// an EADDRINUSE. Then we wait till the FIN_WAIT2 timeout is over and try the
+// connect again with a new socket and this time it should succeed.
+//
+// TCP timers are not S/R today, this can cause this test to be flaky when run
+// under random S/R due to timer being reset on a restore.
+TEST_P(SocketInetLoopbackTest, TCPFinWait2Test_NoRandomSave) {
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Connect to the listening socket.
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Lower FIN_WAIT2 state to 5 seconds for test.
+  constexpr int kTCPLingerTimeout = 5;
+  EXPECT_THAT(setsockopt(conn_fd.get(), IPPROTO_TCP, TCP_LINGER2,
+                         &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)),
+              SyscallSucceedsWithValue(0));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  connector.addr_len),
+              SyscallSucceeds());
+
+  // Accept the connection.
+  auto accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+
+  // Get the address/port bound by the connecting socket.
+  sockaddr_storage conn_bound_addr;
+  socklen_t conn_addrlen = connector.addr_len;
+  ASSERT_THAT(
+      getsockname(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
+                  &conn_addrlen),
+      SyscallSucceeds());
+
+  // close the connecting FD to trigger FIN_WAIT2  on the connected fd.
+  conn_fd.reset();
+
+  // Now bind and connect a new socket.
+  const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Disable cooperative saves after this point. As a save between the first
+  // bind/connect and the second one can cause the linger timeout timer to
+  // be restarted causing the final bind/connect to fail.
+  DisableSave ds;
+
+  ASSERT_THAT(bind(conn_fd2.get(),
+                   reinterpret_cast<sockaddr*>(&conn_bound_addr), conn_addrlen),
+              SyscallFailsWithErrno(EADDRINUSE));
+
+  // Sleep for a little over the linger timeout to reduce flakiness in
+  // save/restore tests.
+  absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 2));
+
+  ds.reset();
+
+  if (!IsRunningOnGvisor()) {
+    ASSERT_THAT(
+        bind(conn_fd2.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
+             conn_addrlen),
+        SyscallSucceeds());
+  }
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  conn_addrlen),
+              SyscallSucceeds());
+}
+
+// TCPLinger2TimeoutAfterClose creates a pair of connected sockets
+// then closes one end to trigger FIN_WAIT2 state for the closed endpont.
+// It then sleeps for the TCP_LINGER2 timeout and verifies that bind/
+// connecting the same address succeeds.
+//
+// TCP timers are not S/R today, this can cause this test to be flaky when run
+// under random S/R due to timer being reset on a restore.
+TEST_P(SocketInetLoopbackTest, TCPLinger2TimeoutAfterClose_NoRandomSave) {
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Connect to the listening socket.
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  connector.addr_len),
+              SyscallSucceeds());
+
+  // Accept the connection.
+  auto accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+
+  // Get the address/port bound by the connecting socket.
+  sockaddr_storage conn_bound_addr;
+  socklen_t conn_addrlen = connector.addr_len;
+  ASSERT_THAT(
+      getsockname(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
+                  &conn_addrlen),
+      SyscallSucceeds());
+
+  // Disable cooperative saves after this point as TCP timers are not restored
+  // across a S/R.
+  {
+    DisableSave ds;
+    constexpr int kTCPLingerTimeout = 5;
+    EXPECT_THAT(setsockopt(conn_fd.get(), IPPROTO_TCP, TCP_LINGER2,
+                           &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)),
+                SyscallSucceedsWithValue(0));
+
+    // close the connecting FD to trigger FIN_WAIT2  on the connected fd.
+    conn_fd.reset();
+
+    absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 1));
+
+    // ds going out of scope will Re-enable S/R's since at this point the timer
+    // must have fired and cleaned up the endpoint.
+  }
+
+  // Now bind and connect a new socket and verify that we can immediately
+  // rebind the address bound by the conn_fd as it never entered TIME_WAIT.
+  const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  ASSERT_THAT(bind(conn_fd2.get(),
+                   reinterpret_cast<sockaddr*>(&conn_bound_addr), conn_addrlen),
+              SyscallSucceeds());
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  conn_addrlen),
+              SyscallSucceeds());
+}
+
+// TCPResetAfterClose creates a pair of connected sockets then closes
+// one end to trigger FIN_WAIT2 state for the closed endpoint verifies
+// that we generate RSTs for any new data after the socket is fully
+// closed.
+TEST_P(SocketInetLoopbackTest, TCPResetAfterClose) {
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Connect to the listening socket.
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  connector.addr_len),
+              SyscallSucceeds());
+
+  // Accept the connection.
+  auto accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+
+  // close the connecting FD to trigger FIN_WAIT2  on the connected fd.
+  conn_fd.reset();
+
+  int data = 1234;
+
+  // Now send data which should trigger a RST as the other end should
+  // have timed out and closed the socket.
+  EXPECT_THAT(RetryEINTR(send)(accepted.get(), &data, sizeof(data), 0),
+              SyscallSucceeds());
+  // Sleep for a shortwhile to get a RST back.
+  absl::SleepFor(absl::Seconds(1));
+
+  // Try writing again and we should get an EPIPE back.
+  EXPECT_THAT(RetryEINTR(send)(accepted.get(), &data, sizeof(data), 0),
+              SyscallFailsWithErrno(EPIPE));
+
+  // Trying to read should return zero as the other end did send
+  // us a FIN. We do it twice to verify that the RST does not cause an
+  // ECONNRESET on the read after EOF has been read by applicaiton.
+  EXPECT_THAT(RetryEINTR(recv)(accepted.get(), &data, sizeof(data), 0),
+              SyscallSucceedsWithValue(0));
+  EXPECT_THAT(RetryEINTR(recv)(accepted.get(), &data, sizeof(data), 0),
+              SyscallSucceedsWithValue(0));
+}
+
+// This test is disabled under random save as the the restore run
+// results in the stack.Seed() being different which can cause
+// sequence number of final connect to be one that is considered
+// old and can cause the test to be flaky.
+TEST_P(SocketInetLoopbackTest, TCPTimeWaitTest_NoRandomSave) {
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Connect to the listening socket.
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  // We disable saves after this point as a S/R causes the netstack seed
+  // to be regenerated which changes what ports/ISN is picked for a given
+  // tuple (src ip,src port, dst ip, dst port). This can cause the final
+  // SYN to use a sequence number that looks like one from the current
+  // connection in TIME_WAIT and will not be accepted causing the test
+  // to timeout.
+  //
+  // TODO(gvisor.dev/issue/940): S/R portSeed/portHint
+  DisableSave ds;
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  connector.addr_len),
+              SyscallSucceeds());
+
+  // Accept the connection.
+  auto accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+
+  // Get the address/port bound by the connecting socket.
+  sockaddr_storage conn_bound_addr;
+  socklen_t conn_addrlen = connector.addr_len;
+  ASSERT_THAT(
+      getsockname(conn_fd.get(), reinterpret_cast<sockaddr*>(&conn_bound_addr),
+                  &conn_addrlen),
+      SyscallSucceeds());
+
+  // close the accept FD to trigger TIME_WAIT on the accepted socket which
+  // should cause the conn_fd to follow CLOSE_WAIT->LAST_ACK->CLOSED instead of
+  // TIME_WAIT.
+  accepted.reset();
+  absl::SleepFor(absl::Seconds(1));
+  conn_fd.reset();
+  absl::SleepFor(absl::Seconds(1));
+
+  // Now bind and connect a new socket and verify that we can immediately
+  // rebind the address bound by the conn_fd as it never entered TIME_WAIT.
+  const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  ASSERT_THAT(bind(conn_fd2.get(),
+                   reinterpret_cast<sockaddr*>(&conn_bound_addr), conn_addrlen),
+              SyscallSucceeds());
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  conn_addrlen),
+              SyscallSucceeds());
+}
+
+TEST_P(SocketInetLoopbackTest, AcceptedInheritsTCPUserTimeout) {
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+
+  const uint16_t port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Set the userTimeout on the listening socket.
+  constexpr int kUserTimeout = 10;
+  ASSERT_THAT(setsockopt(listen_fd.get(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                         &kUserTimeout, sizeof(kUserTimeout)),
+              SyscallSucceeds());
+
+  // Connect to the listening socket.
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  connector.addr_len),
+              SyscallSucceeds());
+
+  // Accept the connection.
+  auto accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+  // Verify that the accepted socket inherited the user timeout set on
+  // listening socket.
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(accepted.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len),
+      SyscallSucceeds());
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kUserTimeout);
+}
+
+// TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not
+// saved. Enable S/R once issue is fixed.
+TEST_P(SocketInetLoopbackTest, TCPDeferAccept_NoRandomSave) {
+  // TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not
+  // saved. Enable S/R issue is fixed.
+  DisableSave ds;
+
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+
+  const uint16_t port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Set the TCP_DEFER_ACCEPT on the listening socket.
+  constexpr int kTCPDeferAccept = 3;
+  ASSERT_THAT(setsockopt(listen_fd.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT,
+                         &kTCPDeferAccept, sizeof(kTCPDeferAccept)),
+              SyscallSucceeds());
+
+  // Connect to the listening socket.
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  connector.addr_len),
+              SyscallSucceeds());
+
+  // Set the listening socket to nonblock so that we can verify that there is no
+  // connection in queue despite the connect above succeeding since the peer has
+  // sent no data and TCP_DEFER_ACCEPT is set on the listening socket. Set the
+  // FD to O_NONBLOCK.
+  int opts;
+  ASSERT_THAT(opts = fcntl(listen_fd.get(), F_GETFL), SyscallSucceeds());
+  opts |= O_NONBLOCK;
+  ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds());
+
+  ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Set FD back to blocking.
+  opts &= ~O_NONBLOCK;
+  ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds());
+
+  // Now write some data to the socket.
+  int data = 0;
+  ASSERT_THAT(RetryEINTR(write)(conn_fd.get(), &data, sizeof(data)),
+              SyscallSucceedsWithValue(sizeof(data)));
+
+  // This should now cause the connection to complete and be delivered to the
+  // accept socket.
+
+  // Accept the connection.
+  auto accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+
+  // Verify that the accepted socket returns the data written.
+  int get = -1;
+  ASSERT_THAT(RetryEINTR(recv)(accepted.get(), &get, sizeof(get), 0),
+              SyscallSucceedsWithValue(sizeof(get)));
+
+  EXPECT_EQ(get, data);
+}
+
+// TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not
+// saved. Enable S/R once issue is fixed.
+TEST_P(SocketInetLoopbackTest, TCPDeferAcceptTimeout_NoRandomSave) {
+  // TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not
+  // saved. Enable S/R once issue is fixed.
+  DisableSave ds;
+
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+
+  const uint16_t port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Set the TCP_DEFER_ACCEPT on the listening socket.
+  constexpr int kTCPDeferAccept = 3;
+  ASSERT_THAT(setsockopt(listen_fd.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT,
+                         &kTCPDeferAccept, sizeof(kTCPDeferAccept)),
+              SyscallSucceeds());
+
+  // Connect to the listening socket.
+  FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&conn_addr),
+                                  connector.addr_len),
+              SyscallSucceeds());
+
+  // Set the listening socket to nonblock so that we can verify that there is no
+  // connection in queue despite the connect above succeeding since the peer has
+  // sent no data and TCP_DEFER_ACCEPT is set on the listening socket. Set the
+  // FD to O_NONBLOCK.
+  int opts;
+  ASSERT_THAT(opts = fcntl(listen_fd.get(), F_GETFL), SyscallSucceeds());
+  opts |= O_NONBLOCK;
+  ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds());
+
+  // Verify that there is no acceptable connection before TCP_DEFER_ACCEPT
+  // timeout is hit.
+  absl::SleepFor(absl::Seconds(kTCPDeferAccept - 1));
+  ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Set FD back to blocking.
+  opts &= ~O_NONBLOCK;
+  ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds());
+
+  // Now sleep for a little over the TCP_DEFER_ACCEPT duration. When the timeout
+  // is hit a SYN-ACK should be retransmitted by the listener as a last ditch
+  // attempt to complete the connection with or without data.
+  absl::SleepFor(absl::Seconds(2));
+
+  // Verify that we have a connection that can be accepted even though no
+  // data was written.
+  auto accepted =
+      ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    All, SocketInetLoopbackTest,
+    ::testing::Values(
+        // Listeners bound to IPv4 addresses refuse connections using IPv6
+        // addresses.
+        TestParam{V4Any(), V4Any()}, TestParam{V4Any(), V4Loopback()},
+        TestParam{V4Any(), V4MappedAny()},
+        TestParam{V4Any(), V4MappedLoopback()},
+        TestParam{V4Loopback(), V4Any()}, TestParam{V4Loopback(), V4Loopback()},
+        TestParam{V4Loopback(), V4MappedLoopback()},
+        TestParam{V4MappedAny(), V4Any()},
+        TestParam{V4MappedAny(), V4Loopback()},
+        TestParam{V4MappedAny(), V4MappedAny()},
+        TestParam{V4MappedAny(), V4MappedLoopback()},
+        TestParam{V4MappedLoopback(), V4Any()},
+        TestParam{V4MappedLoopback(), V4Loopback()},
+        TestParam{V4MappedLoopback(), V4MappedLoopback()},
+
+        // Listeners bound to IN6ADDR_ANY accept all connections.
+        TestParam{V6Any(), V4Any()}, TestParam{V6Any(), V4Loopback()},
+        TestParam{V6Any(), V4MappedAny()},
+        TestParam{V6Any(), V4MappedLoopback()}, TestParam{V6Any(), V6Any()},
+        TestParam{V6Any(), V6Loopback()},
+
+        // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4
+        // addresses.
+        TestParam{V6Loopback(), V6Any()},
+        TestParam{V6Loopback(), V6Loopback()}),
+    DescribeTestParam);
+
+using SocketInetReusePortTest = ::testing::TestWithParam<TestParam>;
+
+// TODO(gvisor.dev/issue/940): Remove _NoRandomSave when portHint/stack.Seed is
+// saved/restored.
+TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) {
+  auto const& param = GetParam();
+
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+  sockaddr_storage listen_addr = listener.addr;
+  sockaddr_storage conn_addr = connector.addr;
+  constexpr int kThreadCount = 3;
+  constexpr int kConnectAttempts = 10000;
+
+  // Create the listening socket.
+  FileDescriptor listener_fds[kThreadCount];
+  for (int i = 0; i < kThreadCount; i++) {
+    listener_fds[i] = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+    int fd = listener_fds[i].get();
+
+    ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                           sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(
+        bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len),
+        SyscallSucceeds());
+    ASSERT_THAT(listen(fd, 40), SyscallSucceeds());
+
+    // On the first bind we need to determine which port was bound.
+    if (i != 0) {
+      continue;
+    }
+
+    // Get the port bound by the listening socket.
+    socklen_t addrlen = listener.addr_len;
+    ASSERT_THAT(
+        getsockname(listener_fds[0].get(),
+                    reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+        SyscallSucceeds());
+    uint16_t const port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+    ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port));
+    ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  }
+
+  std::atomic<int> connects_received = ATOMIC_VAR_INIT(0);
+  std::unique_ptr<ScopedThread> listen_thread[kThreadCount];
+  int accept_counts[kThreadCount] = {};
+  // TODO(avagin): figure how to not disable S/R for the whole test.
+  // We need to take into account that this test executes a lot of system
+  // calls from many threads.
+  DisableSave ds;
+
+  for (int i = 0; i < kThreadCount; i++) {
+    listen_thread[i] = absl::make_unique<ScopedThread>(
+        [&listener_fds, &accept_counts, i, &connects_received]() {
+          do {
+            auto fd = Accept(listener_fds[i].get(), nullptr, nullptr);
+            if (!fd.ok()) {
+              if (connects_received >= kConnectAttempts) {
+                // Another thread have shutdown our read side causing the
+                // accept to fail.
+                ASSERT_EQ(errno, EINVAL);
+                break;
+              }
+              ASSERT_NO_ERRNO(fd);
+              break;
+            }
+            // Receive some data from a socket to be sure that the connect()
+            // system call has been completed on another side.
+            // Do a short read and then close the socket to trigger a RST. This
+            // ensures that both ends of the connection are cleaned up and no
+            // goroutines hang around in TIME-WAIT. We do this so that this test
+            // does not timeout under gotsan runs where lots of goroutines can
+            // cause the test to use absurd amounts of memory.
+            //
+            // See: https://tools.ietf.org/html/rfc2525#page-50 section 2.17
+            uint16_t data;
+            EXPECT_THAT(
+                RetryEINTR(recv)(fd.ValueOrDie().get(), &data, sizeof(data), 0),
+                SyscallSucceedsWithValue(sizeof(data)));
+            accept_counts[i]++;
+          } while (++connects_received < kConnectAttempts);
+
+          // Shutdown all sockets to wake up other threads.
+          for (int j = 0; j < kThreadCount; j++) {
+            shutdown(listener_fds[j].get(), SHUT_RDWR);
+          }
+        });
+  }
+
+  ScopedThread connecting_thread([&connector, &conn_addr]() {
+    for (int i = 0; i < kConnectAttempts; i++) {
+      const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+          Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+      ASSERT_THAT(
+          RetryEINTR(connect)(fd.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+                              connector.addr_len),
+          SyscallSucceeds());
+
+      // Do two separate sends to ensure two segments are received. This is
+      // required for netstack where read is incorrectly assuming a whole
+      // segment is read when endpoint.Read() is called which is technically
+      // incorrect as the syscall that invoked endpoint.Read() may only
+      // consume it partially. This results in a case where a close() of
+      // such a socket does not trigger a RST in netstack due to the
+      // endpoint assuming that the endpoint has no unread data.
+      EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0),
+                  SyscallSucceedsWithValue(sizeof(i)));
+
+      // TODO(gvisor.dev/issue/1449): Remove this block once netstack correctly
+      //   generates a RST.
+      if (IsRunningOnGvisor()) {
+        EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0),
+                    SyscallSucceedsWithValue(sizeof(i)));
+      }
+    }
+  });
+
+  // Join threads to be sure that all connections have been counted
+  connecting_thread.Join();
+  for (int i = 0; i < kThreadCount; i++) {
+    listen_thread[i]->Join();
+  }
+  // Check that connections are distributed fairly between listening sockets
+  for (int i = 0; i < kThreadCount; i++)
+    EXPECT_THAT(accept_counts[i],
+                EquivalentWithin((kConnectAttempts / kThreadCount), 0.10));
+}
+
+TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThread_NoRandomSave) {
+  auto const& param = GetParam();
+
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+  sockaddr_storage listen_addr = listener.addr;
+  sockaddr_storage conn_addr = connector.addr;
+  constexpr int kThreadCount = 3;
+
+  // Create the listening socket.
+  FileDescriptor listener_fds[kThreadCount];
+  for (int i = 0; i < kThreadCount; i++) {
+    listener_fds[i] =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(listener.family(), SOCK_DGRAM, 0));
+    int fd = listener_fds[i].get();
+
+    ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                           sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(
+        bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len),
+        SyscallSucceeds());
+
+    // On the first bind we need to determine which port was bound.
+    if (i != 0) {
+      continue;
+    }
+
+    // Get the port bound by the listening socket.
+    socklen_t addrlen = listener.addr_len;
+    ASSERT_THAT(
+        getsockname(listener_fds[0].get(),
+                    reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+        SyscallSucceeds());
+    uint16_t const port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+    ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port));
+    ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  }
+
+  constexpr int kConnectAttempts = 10000;
+  std::atomic<int> packets_received = ATOMIC_VAR_INIT(0);
+  std::unique_ptr<ScopedThread> receiver_thread[kThreadCount];
+  int packets_per_socket[kThreadCount] = {};
+  // TODO(avagin): figure how to not disable S/R for the whole test.
+  DisableSave ds;  // Too expensive.
+
+  for (int i = 0; i < kThreadCount; i++) {
+    receiver_thread[i] = absl::make_unique<ScopedThread>(
+        [&listener_fds, &packets_per_socket, i, &packets_received]() {
+          do {
+            struct sockaddr_storage addr = {};
+            socklen_t addrlen = sizeof(addr);
+            int data;
+
+            auto ret = RetryEINTR(recvfrom)(
+                listener_fds[i].get(), &data, sizeof(data), 0,
+                reinterpret_cast<struct sockaddr*>(&addr), &addrlen);
+
+            if (packets_received < kConnectAttempts) {
+              ASSERT_THAT(ret, SyscallSucceedsWithValue(sizeof(data)));
+            }
+
+            if (ret != sizeof(data)) {
+              // Another thread may have shutdown our read side causing the
+              // recvfrom to fail.
+              break;
+            }
+
+            packets_received++;
+            packets_per_socket[i]++;
+
+            // A response is required to synchronize with the main thread,
+            // otherwise the main thread can send more than can fit into receive
+            // queues.
+            EXPECT_THAT(RetryEINTR(sendto)(
+                            listener_fds[i].get(), &data, sizeof(data), 0,
+                            reinterpret_cast<sockaddr*>(&addr), addrlen),
+                        SyscallSucceedsWithValue(sizeof(data)));
+          } while (packets_received < kConnectAttempts);
+
+          // Shutdown all sockets to wake up other threads.
+          for (int j = 0; j < kThreadCount; j++)
+            shutdown(listener_fds[j].get(), SHUT_RDWR);
+        });
+  }
+
+  ScopedThread main_thread([&connector, &conn_addr]() {
+    for (int i = 0; i < kConnectAttempts; i++) {
+      const FileDescriptor fd =
+          ASSERT_NO_ERRNO_AND_VALUE(Socket(connector.family(), SOCK_DGRAM, 0));
+      EXPECT_THAT(RetryEINTR(sendto)(fd.get(), &i, sizeof(i), 0,
+                                     reinterpret_cast<sockaddr*>(&conn_addr),
+                                     connector.addr_len),
+                  SyscallSucceedsWithValue(sizeof(i)));
+      int data;
+      EXPECT_THAT(RetryEINTR(recv)(fd.get(), &data, sizeof(data), 0),
+                  SyscallSucceedsWithValue(sizeof(data)));
+    }
+  });
+
+  main_thread.Join();
+
+  // Join threads to be sure that all connections have been counted
+  for (int i = 0; i < kThreadCount; i++) {
+    receiver_thread[i]->Join();
+  }
+  // Check that packets are distributed fairly between listening sockets.
+  for (int i = 0; i < kThreadCount; i++)
+    EXPECT_THAT(packets_per_socket[i],
+                EquivalentWithin((kConnectAttempts / kThreadCount), 0.10));
+}
+
+TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThreadShort_NoRandomSave) {
+  auto const& param = GetParam();
+
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+  sockaddr_storage listen_addr = listener.addr;
+  sockaddr_storage conn_addr = connector.addr;
+  constexpr int kThreadCount = 3;
+
+  // TODO(b/141211329): endpointsByNic.seed has to be saved/restored.
+  const DisableSave ds141211329;
+
+  // Create listening sockets.
+  FileDescriptor listener_fds[kThreadCount];
+  for (int i = 0; i < kThreadCount; i++) {
+    listener_fds[i] =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(listener.family(), SOCK_DGRAM, 0));
+    int fd = listener_fds[i].get();
+
+    ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                           sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(
+        bind(fd, reinterpret_cast<sockaddr*>(&listen_addr), listener.addr_len),
+        SyscallSucceeds());
+
+    // On the first bind we need to determine which port was bound.
+    if (i != 0) {
+      continue;
+    }
+
+    // Get the port bound by the listening socket.
+    socklen_t addrlen = listener.addr_len;
+    ASSERT_THAT(
+        getsockname(listener_fds[0].get(),
+                    reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+        SyscallSucceeds());
+    uint16_t const port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+    ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port));
+    ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  }
+
+  constexpr int kConnectAttempts = 10;
+  FileDescriptor client_fds[kConnectAttempts];
+
+  // Do the first run without save/restore.
+  DisableSave ds;
+  for (int i = 0; i < kConnectAttempts; i++) {
+    client_fds[i] =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(connector.family(), SOCK_DGRAM, 0));
+    EXPECT_THAT(RetryEINTR(sendto)(client_fds[i].get(), &i, sizeof(i), 0,
+                                   reinterpret_cast<sockaddr*>(&conn_addr),
+                                   connector.addr_len),
+                SyscallSucceedsWithValue(sizeof(i)));
+  }
+  ds.reset();
+
+  // Check that a mapping of client and server sockets has
+  // not been change after save/restore.
+  for (int i = 0; i < kConnectAttempts; i++) {
+    EXPECT_THAT(RetryEINTR(sendto)(client_fds[i].get(), &i, sizeof(i), 0,
+                                   reinterpret_cast<sockaddr*>(&conn_addr),
+                                   connector.addr_len),
+                SyscallSucceedsWithValue(sizeof(i)));
+  }
+
+  struct pollfd pollfds[kThreadCount];
+  for (int i = 0; i < kThreadCount; i++) {
+    pollfds[i].fd = listener_fds[i].get();
+    pollfds[i].events = POLLIN;
+  }
+
+  std::map<uint16_t, int> portToFD;
+
+  int received = 0;
+  while (received < kConnectAttempts * 2) {
+    ASSERT_THAT(poll(pollfds, kThreadCount, -1),
+                SyscallSucceedsWithValue(Gt(0)));
+
+    for (int i = 0; i < kThreadCount; i++) {
+      if ((pollfds[i].revents & POLLIN) == 0) {
+        continue;
+      }
+
+      received++;
+
+      const int fd = pollfds[i].fd;
+      struct sockaddr_storage addr = {};
+      socklen_t addrlen = sizeof(addr);
+      int data;
+      EXPECT_THAT(RetryEINTR(recvfrom)(
+                      fd, &data, sizeof(data), 0,
+                      reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+                  SyscallSucceedsWithValue(sizeof(data)));
+      uint16_t const port =
+          ASSERT_NO_ERRNO_AND_VALUE(AddrPort(connector.family(), addr));
+      auto prev_port = portToFD.find(port);
+      // Check that all packets from one client have been delivered to the
+      // same server socket.
+      if (prev_port == portToFD.end()) {
+        portToFD[port] = fd;
+      } else {
+        EXPECT_EQ(portToFD[port], fd);
+      }
+    }
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    All, SocketInetReusePortTest,
+    ::testing::Values(
+        // Listeners bound to IPv4 addresses refuse connections using IPv6
+        // addresses.
+        TestParam{V4Any(), V4Loopback()},
+        TestParam{V4Loopback(), V4MappedLoopback()},
+
+        // Listeners bound to IN6ADDR_ANY accept all connections.
+        TestParam{V6Any(), V4Loopback()}, TestParam{V6Any(), V6Loopback()},
+
+        // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4
+        // addresses.
+        TestParam{V6Loopback(), V6Loopback()}),
+    DescribeTestParam);
+
+struct ProtocolTestParam {
+  std::string description;
+  int type;
+};
+
+std::string DescribeProtocolTestParam(
+    ::testing::TestParamInfo<ProtocolTestParam> const& info) {
+  return info.param.description;
+}
+
+using SocketMultiProtocolInetLoopbackTest =
+    ::testing::TestWithParam<ProtocolTestParam>;
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedLoopbackOnlyReservesV4) {
+  auto const& param = GetParam();
+
+  for (int i = 0; true; i++) {
+    // Bind the v4 loopback on a dual stack socket.
+    TestAddress const& test_addr_dual = V4MappedLoopback();
+    sockaddr_storage addr_dual = test_addr_dual.addr;
+    const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(test_addr_dual.family(), param.type, 0));
+    ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+                     test_addr_dual.addr_len),
+                SyscallSucceeds());
+
+    // Get the port that we bound.
+    socklen_t addrlen = test_addr_dual.addr_len;
+    ASSERT_THAT(getsockname(fd_dual.get(),
+                            reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+                SyscallSucceeds());
+    uint16_t const port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+    // Verify that we can still bind the v6 loopback on the same port.
+    TestAddress const& test_addr_v6 = V6Loopback();
+    sockaddr_storage addr_v6 = test_addr_v6.addr;
+    ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port));
+    const FileDescriptor fd_v6 =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+    int ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+                   test_addr_v6.addr_len);
+    if (ret == -1 && errno == EADDRINUSE) {
+      // Port may have been in use.
+      ASSERT_LT(i, 100);  // Give up after 100 tries.
+      continue;
+    }
+    ASSERT_THAT(ret, SyscallSucceeds());
+
+    // Verify that binding the v4 loopback with the same port on a v4 socket
+    // fails.
+    TestAddress const& test_addr_v4 = V4Loopback();
+    sockaddr_storage addr_v4 = test_addr_v4.addr;
+    ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port));
+    const FileDescriptor fd_v4 =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0));
+    ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4),
+                     test_addr_v4.addr_len),
+                SyscallFailsWithErrno(EADDRINUSE));
+
+    // No need to try again.
+    break;
+  }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedAnyOnlyReservesV4) {
+  auto const& param = GetParam();
+
+  for (int i = 0; true; i++) {
+    // Bind the v4 any on a dual stack socket.
+    TestAddress const& test_addr_dual = V4MappedAny();
+    sockaddr_storage addr_dual = test_addr_dual.addr;
+    const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(test_addr_dual.family(), param.type, 0));
+    ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+                     test_addr_dual.addr_len),
+                SyscallSucceeds());
+
+    // Get the port that we bound.
+    socklen_t addrlen = test_addr_dual.addr_len;
+    ASSERT_THAT(getsockname(fd_dual.get(),
+                            reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+                SyscallSucceeds());
+    uint16_t const port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+    // Verify that we can still bind the v6 loopback on the same port.
+    TestAddress const& test_addr_v6 = V6Loopback();
+    sockaddr_storage addr_v6 = test_addr_v6.addr;
+    ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port));
+    const FileDescriptor fd_v6 =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+    int ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+                   test_addr_v6.addr_len);
+    if (ret == -1 && errno == EADDRINUSE) {
+      // Port may have been in use.
+      ASSERT_LT(i, 100);  // Give up after 100 tries.
+      continue;
+    }
+    ASSERT_THAT(ret, SyscallSucceeds());
+
+    // Verify that binding the v4 loopback with the same port on a v4 socket
+    // fails.
+    TestAddress const& test_addr_v4 = V4Loopback();
+    sockaddr_storage addr_v4 = test_addr_v4.addr;
+    ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port));
+    const FileDescriptor fd_v4 =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0));
+    ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4),
+                     test_addr_v4.addr_len),
+                SyscallFailsWithErrno(EADDRINUSE));
+
+    // No need to try again.
+    break;
+  }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, DualStackV6AnyReservesEverything) {
+  auto const& param = GetParam();
+
+  // Bind the v6 any on a dual stack socket.
+  TestAddress const& test_addr_dual = V6Any();
+  sockaddr_storage addr_dual = test_addr_dual.addr;
+  const FileDescriptor fd_dual =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0));
+  ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+                   test_addr_dual.addr_len),
+              SyscallSucceeds());
+
+  // Get the port that we bound.
+  socklen_t addrlen = test_addr_dual.addr_len;
+  ASSERT_THAT(getsockname(fd_dual.get(),
+                          reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+  // Verify that binding the v6 loopback with the same port fails.
+  TestAddress const& test_addr_v6 = V6Loopback();
+  sockaddr_storage addr_v6 = test_addr_v6.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port));
+  const FileDescriptor fd_v6 =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+  ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+                   test_addr_v6.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+
+  // Verify that binding the v4 loopback on the same port with a v6 socket
+  // fails.
+  TestAddress const& test_addr_v4_mapped = V4MappedLoopback();
+  sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr;
+  ASSERT_NO_ERRNO(
+      SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port));
+  const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(test_addr_v4_mapped.family(), param.type, 0));
+  ASSERT_THAT(
+      bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped),
+           test_addr_v4_mapped.addr_len),
+      SyscallFailsWithErrno(EADDRINUSE));
+
+  // Verify that binding the v4 loopback on the same port with a v4 socket
+  // fails.
+  TestAddress const& test_addr_v4 = V4Loopback();
+  sockaddr_storage addr_v4 = test_addr_v4.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port));
+  const FileDescriptor fd_v4 =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0));
+  ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4),
+                   test_addr_v4.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+
+  // Verify that binding the v4 any on the same port with a v4 socket
+  // fails.
+  TestAddress const& test_addr_v4_any = V4Any();
+  sockaddr_storage addr_v4_any = test_addr_v4_any.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port));
+  const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(test_addr_v4_any.family(), param.type, 0));
+  ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any),
+                   test_addr_v4_any.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest,
+       DualStackV6AnyReuseAddrDoesNotReserveV4Any) {
+  auto const& param = GetParam();
+
+  // Bind the v6 any on a dual stack socket.
+  TestAddress const& test_addr_dual = V6Any();
+  sockaddr_storage addr_dual = test_addr_dual.addr;
+  const FileDescriptor fd_dual =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0));
+  ASSERT_THAT(setsockopt(fd_dual.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+                   test_addr_dual.addr_len),
+              SyscallSucceeds());
+
+  // Get the port that we bound.
+  socklen_t addrlen = test_addr_dual.addr_len;
+  ASSERT_THAT(getsockname(fd_dual.get(),
+                          reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+  // Verify that binding the v4 any on the same port with a v4 socket succeeds.
+  TestAddress const& test_addr_v4_any = V4Any();
+  sockaddr_storage addr_v4_any = test_addr_v4_any.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port));
+  const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(test_addr_v4_any.family(), param.type, 0));
+  ASSERT_THAT(setsockopt(fd_v4_any.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any),
+                   test_addr_v4_any.addr_len),
+              SyscallSucceeds());
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest,
+       DualStackV6AnyReuseAddrListenReservesV4Any) {
+  auto const& param = GetParam();
+
+  // Only TCP sockets are supported.
+  SKIP_IF((param.type & SOCK_STREAM) == 0);
+
+  // Bind the v6 any on a dual stack socket.
+  TestAddress const& test_addr_dual = V6Any();
+  sockaddr_storage addr_dual = test_addr_dual.addr;
+  const FileDescriptor fd_dual =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0));
+  ASSERT_THAT(setsockopt(fd_dual.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+                   test_addr_dual.addr_len),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(fd_dual.get(), 5), SyscallSucceeds());
+
+  // Get the port that we bound.
+  socklen_t addrlen = test_addr_dual.addr_len;
+  ASSERT_THAT(getsockname(fd_dual.get(),
+                          reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+  // Verify that binding the v4 any on the same port with a v4 socket succeeds.
+  TestAddress const& test_addr_v4_any = V4Any();
+  sockaddr_storage addr_v4_any = test_addr_v4_any.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port));
+  const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(test_addr_v4_any.family(), param.type, 0));
+  ASSERT_THAT(setsockopt(fd_v4_any.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any),
+                   test_addr_v4_any.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest,
+       DualStackV6AnyWithListenReservesEverything) {
+  auto const& param = GetParam();
+
+  // Only TCP sockets are supported.
+  SKIP_IF((param.type & SOCK_STREAM) == 0);
+
+  // Bind the v6 any on a dual stack socket.
+  TestAddress const& test_addr_dual = V6Any();
+  sockaddr_storage addr_dual = test_addr_dual.addr;
+  const FileDescriptor fd_dual =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0));
+  ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+                   test_addr_dual.addr_len),
+              SyscallSucceeds());
+
+  ASSERT_THAT(listen(fd_dual.get(), 5), SyscallSucceeds());
+
+  // Get the port that we bound.
+  socklen_t addrlen = test_addr_dual.addr_len;
+  ASSERT_THAT(getsockname(fd_dual.get(),
+                          reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+  // Verify that binding the v6 loopback with the same port fails.
+  TestAddress const& test_addr_v6 = V6Loopback();
+  sockaddr_storage addr_v6 = test_addr_v6.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port));
+  const FileDescriptor fd_v6 =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+  ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+                   test_addr_v6.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+
+  // Verify that binding the v4 loopback on the same port with a v6 socket
+  // fails.
+  TestAddress const& test_addr_v4_mapped = V4MappedLoopback();
+  sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr;
+  ASSERT_NO_ERRNO(
+      SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port));
+  const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(test_addr_v4_mapped.family(), param.type, 0));
+  ASSERT_THAT(
+      bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped),
+           test_addr_v4_mapped.addr_len),
+      SyscallFailsWithErrno(EADDRINUSE));
+
+  // Verify that binding the v4 loopback on the same port with a v4 socket
+  // fails.
+  TestAddress const& test_addr_v4 = V4Loopback();
+  sockaddr_storage addr_v4 = test_addr_v4.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port));
+  const FileDescriptor fd_v4 =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0));
+  ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4),
+                   test_addr_v4.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+
+  // Verify that binding the v4 any on the same port with a v4 socket
+  // fails.
+  TestAddress const& test_addr_v4_any = V4Any();
+  sockaddr_storage addr_v4_any = test_addr_v4_any.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port));
+  const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(test_addr_v4_any.family(), param.type, 0));
+  ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any),
+                   test_addr_v4_any.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V6OnlyV6AnyReservesV6) {
+  auto const& param = GetParam();
+
+  for (int i = 0; true; i++) {
+    // Bind the v6 any on a v6-only socket.
+    TestAddress const& test_addr_dual = V6Any();
+    sockaddr_storage addr_dual = test_addr_dual.addr;
+    const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(test_addr_dual.family(), param.type, 0));
+    EXPECT_THAT(setsockopt(fd_dual.get(), IPPROTO_IPV6, IPV6_V6ONLY,
+                           &kSockOptOn, sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+                     test_addr_dual.addr_len),
+                SyscallSucceeds());
+
+    // Get the port that we bound.
+    socklen_t addrlen = test_addr_dual.addr_len;
+    ASSERT_THAT(getsockname(fd_dual.get(),
+                            reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+                SyscallSucceeds());
+    uint16_t const port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+    // Verify that binding the v6 loopback with the same port fails.
+    TestAddress const& test_addr_v6 = V6Loopback();
+    sockaddr_storage addr_v6 = test_addr_v6.addr;
+    ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port));
+    const FileDescriptor fd_v6 =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+    ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+                     test_addr_v6.addr_len),
+                SyscallFailsWithErrno(EADDRINUSE));
+
+    // Verify that we can still bind the v4 loopback on the same port.
+    TestAddress const& test_addr_v4_mapped = V4MappedLoopback();
+    sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr;
+    ASSERT_NO_ERRNO(
+        SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port));
+    const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(test_addr_v4_mapped.family(), param.type, 0));
+    int ret =
+        bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped),
+             test_addr_v4_mapped.addr_len);
+    if (ret == -1 && errno == EADDRINUSE) {
+      // Port may have been in use.
+      ASSERT_LT(i, 100);  // Give up after 100 tries.
+      continue;
+    }
+    ASSERT_THAT(ret, SyscallSucceeds());
+
+    // No need to try again.
+    break;
+  }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) {
+  auto const& param = GetParam();
+
+  for (int i = 0; true; i++) {
+    // Bind the v6 loopback on a dual stack socket.
+    TestAddress const& test_addr = V6Loopback();
+    sockaddr_storage bound_addr = test_addr.addr;
+    const FileDescriptor bound_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                     test_addr.addr_len),
+                SyscallSucceeds());
+
+    // Listen iff TCP.
+    if (param.type == SOCK_STREAM) {
+      ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds());
+    }
+
+    // Get the port that we bound.
+    socklen_t bound_addr_len = test_addr.addr_len;
+    ASSERT_THAT(
+        getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                    &bound_addr_len),
+        SyscallSucceeds());
+
+    // Connect to bind an ephemeral port.
+    const FileDescriptor connected_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(),
+                                    reinterpret_cast<sockaddr*>(&bound_addr),
+                                    bound_addr_len),
+                SyscallSucceeds());
+
+    // Get the ephemeral port.
+    sockaddr_storage connected_addr = {};
+    socklen_t connected_addr_len = sizeof(connected_addr);
+    ASSERT_THAT(getsockname(connected_fd.get(),
+                            reinterpret_cast<sockaddr*>(&connected_addr),
+                            &connected_addr_len),
+                SyscallSucceeds());
+    uint16_t const ephemeral_port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr));
+
+    // Verify that we actually got an ephemeral port.
+    ASSERT_NE(ephemeral_port, 0);
+
+    // Verify that the ephemeral port is reserved.
+    const FileDescriptor checking_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    EXPECT_THAT(
+        bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr),
+             connected_addr_len),
+        SyscallFailsWithErrno(EADDRINUSE));
+
+    // Verify that binding the v6 loopback with the same port fails.
+    TestAddress const& test_addr_v6 = V6Loopback();
+    sockaddr_storage addr_v6 = test_addr_v6.addr;
+    ASSERT_NO_ERRNO(
+        SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port));
+    const FileDescriptor fd_v6 =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+    ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+                     test_addr_v6.addr_len),
+                SyscallFailsWithErrno(EADDRINUSE));
+
+    // Verify that we can still bind the v4 loopback on the same port.
+    TestAddress const& test_addr_v4_mapped = V4MappedLoopback();
+    sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr;
+    ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped,
+                                ephemeral_port));
+    const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(test_addr_v4_mapped.family(), param.type, 0));
+    int ret =
+        bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped),
+             test_addr_v4_mapped.addr_len);
+    if (ret == -1 && errno == EADDRINUSE) {
+      // Port may have been in use.
+      ASSERT_LT(i, 100);  // Give up after 100 tries.
+      continue;
+    }
+    EXPECT_THAT(ret, SyscallSucceeds());
+
+    // No need to try again.
+    break;
+  }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReservedReuseAddr) {
+  auto const& param = GetParam();
+
+  // Bind the v6 loopback on a dual stack socket.
+  TestAddress const& test_addr = V6Loopback();
+  sockaddr_storage bound_addr = test_addr.addr;
+  const FileDescriptor bound_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+  ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                   test_addr.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Listen iff TCP.
+  if (param.type == SOCK_STREAM) {
+    ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds());
+  }
+
+  // Get the port that we bound.
+  socklen_t bound_addr_len = test_addr.addr_len;
+  ASSERT_THAT(
+      getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                  &bound_addr_len),
+      SyscallSucceeds());
+
+  // Connect to bind an ephemeral port.
+  const FileDescriptor connected_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+  ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&bound_addr),
+                                  bound_addr_len),
+              SyscallSucceeds());
+
+  // Get the ephemeral port.
+  sockaddr_storage connected_addr = {};
+  socklen_t connected_addr_len = sizeof(connected_addr);
+  ASSERT_THAT(getsockname(connected_fd.get(),
+                          reinterpret_cast<sockaddr*>(&connected_addr),
+                          &connected_addr_len),
+              SyscallSucceeds());
+  uint16_t const ephemeral_port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr));
+
+  // Verify that we actually got an ephemeral port.
+  ASSERT_NE(ephemeral_port, 0);
+
+  // Verify that the ephemeral port is not reserved.
+  const FileDescriptor checking_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+  ASSERT_THAT(setsockopt(checking_fd.get(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  EXPECT_THAT(
+      bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr),
+           connected_addr_len),
+      SyscallSucceeds());
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) {
+  auto const& param = GetParam();
+
+  for (int i = 0; true; i++) {
+    // Bind the v4 loopback on a dual stack socket.
+    TestAddress const& test_addr = V4MappedLoopback();
+    sockaddr_storage bound_addr = test_addr.addr;
+    const FileDescriptor bound_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                     test_addr.addr_len),
+                SyscallSucceeds());
+
+    // Listen iff TCP.
+    if (param.type == SOCK_STREAM) {
+      ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds());
+    }
+
+    // Get the port that we bound.
+    socklen_t bound_addr_len = test_addr.addr_len;
+    ASSERT_THAT(
+        getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                    &bound_addr_len),
+        SyscallSucceeds());
+
+    // Connect to bind an ephemeral port.
+    const FileDescriptor connected_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(),
+                                    reinterpret_cast<sockaddr*>(&bound_addr),
+                                    bound_addr_len),
+                SyscallSucceeds());
+
+    // Get the ephemeral port.
+    sockaddr_storage connected_addr = {};
+    socklen_t connected_addr_len = sizeof(connected_addr);
+    ASSERT_THAT(getsockname(connected_fd.get(),
+                            reinterpret_cast<sockaddr*>(&connected_addr),
+                            &connected_addr_len),
+                SyscallSucceeds());
+    uint16_t const ephemeral_port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr));
+
+    // Verify that we actually got an ephemeral port.
+    ASSERT_NE(ephemeral_port, 0);
+
+    // Verify that the ephemeral port is reserved.
+    const FileDescriptor checking_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    EXPECT_THAT(
+        bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr),
+             connected_addr_len),
+        SyscallFailsWithErrno(EADDRINUSE));
+
+    // Verify that binding the v4 loopback on the same port with a v4 socket
+    // fails.
+    TestAddress const& test_addr_v4 = V4Loopback();
+    sockaddr_storage addr_v4 = test_addr_v4.addr;
+    ASSERT_NO_ERRNO(
+        SetAddrPort(test_addr_v4.family(), &addr_v4, ephemeral_port));
+    const FileDescriptor fd_v4 =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0));
+    EXPECT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4),
+                     test_addr_v4.addr_len),
+                SyscallFailsWithErrno(EADDRINUSE));
+
+    // Verify that binding the v6 any on the same port with a dual-stack socket
+    // fails.
+    TestAddress const& test_addr_v6_any = V6Any();
+    sockaddr_storage addr_v6_any = test_addr_v6_any.addr;
+    ASSERT_NO_ERRNO(
+        SetAddrPort(test_addr_v6_any.family(), &addr_v6_any, ephemeral_port));
+    const FileDescriptor fd_v6_any = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(test_addr_v6_any.family(), param.type, 0));
+    ASSERT_THAT(bind(fd_v6_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any),
+                     test_addr_v6_any.addr_len),
+                SyscallFailsWithErrno(EADDRINUSE));
+
+    // For some reason, binding the TCP v6-only any is flaky on Linux. Maybe we
+    // tend to run out of ephemeral ports? Regardless, binding the v6 loopback
+    // seems pretty reliable. Only try to bind the v6-only any on UDP and
+    // gVisor.
+
+    int ret = -1;
+
+    if (!IsRunningOnGvisor() && param.type == SOCK_STREAM) {
+      // Verify that we can still bind the v6 loopback on the same port.
+      TestAddress const& test_addr_v6 = V6Loopback();
+      sockaddr_storage addr_v6 = test_addr_v6.addr;
+      ASSERT_NO_ERRNO(
+          SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port));
+      const FileDescriptor fd_v6 = ASSERT_NO_ERRNO_AND_VALUE(
+          Socket(test_addr_v6.family(), param.type, 0));
+      ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+                 test_addr_v6.addr_len);
+    } else {
+      // Verify that we can still bind the v6 any on the same port with a
+      // v6-only socket.
+      const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE(
+          Socket(test_addr_v6_any.family(), param.type, 0));
+      EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY,
+                             &kSockOptOn, sizeof(kSockOptOn)),
+                  SyscallSucceeds());
+      ret =
+          bind(fd_v6_only_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any),
+               test_addr_v6_any.addr_len);
+    }
+
+    if (ret == -1 && errno == EADDRINUSE) {
+      // Port may have been in use.
+      ASSERT_LT(i, 100);  // Give up after 100 tries.
+      continue;
+    }
+    EXPECT_THAT(ret, SyscallSucceeds());
+
+    // No need to try again.
+    break;
+  }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest,
+       V4MappedEphemeralPortReservedResueAddr) {
+  auto const& param = GetParam();
+
+  // Bind the v4 loopback on a dual stack socket.
+  TestAddress const& test_addr = V4MappedLoopback();
+  sockaddr_storage bound_addr = test_addr.addr;
+  const FileDescriptor bound_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+  ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                   test_addr.addr_len),
+              SyscallSucceeds());
+
+  ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Listen iff TCP.
+  if (param.type == SOCK_STREAM) {
+    ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds());
+  }
+
+  // Get the port that we bound.
+  socklen_t bound_addr_len = test_addr.addr_len;
+  ASSERT_THAT(
+      getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                  &bound_addr_len),
+      SyscallSucceeds());
+
+  // Connect to bind an ephemeral port.
+  const FileDescriptor connected_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+  ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&bound_addr),
+                                  bound_addr_len),
+              SyscallSucceeds());
+
+  // Get the ephemeral port.
+  sockaddr_storage connected_addr = {};
+  socklen_t connected_addr_len = sizeof(connected_addr);
+  ASSERT_THAT(getsockname(connected_fd.get(),
+                          reinterpret_cast<sockaddr*>(&connected_addr),
+                          &connected_addr_len),
+              SyscallSucceeds());
+  uint16_t const ephemeral_port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr));
+
+  // Verify that we actually got an ephemeral port.
+  ASSERT_NE(ephemeral_port, 0);
+
+  // Verify that the ephemeral port is not reserved.
+  const FileDescriptor checking_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+  ASSERT_THAT(setsockopt(checking_fd.get(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  EXPECT_THAT(
+      bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr),
+           connected_addr_len),
+      SyscallSucceeds());
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) {
+  auto const& param = GetParam();
+
+  for (int i = 0; true; i++) {
+    // Bind the v4 loopback on a v4 socket.
+    TestAddress const& test_addr = V4Loopback();
+    sockaddr_storage bound_addr = test_addr.addr;
+    const FileDescriptor bound_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                     test_addr.addr_len),
+                SyscallSucceeds());
+
+    // Listen iff TCP.
+    if (param.type == SOCK_STREAM) {
+      ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds());
+    }
+
+    // Get the port that we bound.
+    socklen_t bound_addr_len = test_addr.addr_len;
+    ASSERT_THAT(
+        getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                    &bound_addr_len),
+        SyscallSucceeds());
+
+    // Connect to bind an ephemeral port.
+    const FileDescriptor connected_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(),
+                                    reinterpret_cast<sockaddr*>(&bound_addr),
+                                    bound_addr_len),
+                SyscallSucceeds());
+
+    // Get the ephemeral port.
+    sockaddr_storage connected_addr = {};
+    socklen_t connected_addr_len = sizeof(connected_addr);
+    ASSERT_THAT(getsockname(connected_fd.get(),
+                            reinterpret_cast<sockaddr*>(&connected_addr),
+                            &connected_addr_len),
+                SyscallSucceeds());
+    uint16_t const ephemeral_port =
+        ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr));
+
+    // Verify that we actually got an ephemeral port.
+    ASSERT_NE(ephemeral_port, 0);
+
+    // Verify that the ephemeral port is reserved.
+    const FileDescriptor checking_fd =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    EXPECT_THAT(
+        bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr),
+             connected_addr_len),
+        SyscallFailsWithErrno(EADDRINUSE));
+
+    // Verify that binding the v4 loopback on the same port with a v6 socket
+    // fails.
+    TestAddress const& test_addr_v4_mapped = V4MappedLoopback();
+    sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr;
+    ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped,
+                                ephemeral_port));
+    const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(test_addr_v4_mapped.family(), param.type, 0));
+    EXPECT_THAT(
+        bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped),
+             test_addr_v4_mapped.addr_len),
+        SyscallFailsWithErrno(EADDRINUSE));
+
+    // Verify that binding the v6 any on the same port with a dual-stack socket
+    // fails.
+    TestAddress const& test_addr_v6_any = V6Any();
+    sockaddr_storage addr_v6_any = test_addr_v6_any.addr;
+    ASSERT_NO_ERRNO(
+        SetAddrPort(test_addr_v6_any.family(), &addr_v6_any, ephemeral_port));
+    const FileDescriptor fd_v6_any = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(test_addr_v6_any.family(), param.type, 0));
+    ASSERT_THAT(bind(fd_v6_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any),
+                     test_addr_v6_any.addr_len),
+                SyscallFailsWithErrno(EADDRINUSE));
+
+    // For some reason, binding the TCP v6-only any is flaky on Linux. Maybe we
+    // tend to run out of ephemeral ports? Regardless, binding the v6 loopback
+    // seems pretty reliable. Only try to bind the v6-only any on UDP and
+    // gVisor.
+
+    int ret = -1;
+
+    if (!IsRunningOnGvisor() && param.type == SOCK_STREAM) {
+      // Verify that we can still bind the v6 loopback on the same port.
+      TestAddress const& test_addr_v6 = V6Loopback();
+      sockaddr_storage addr_v6 = test_addr_v6.addr;
+      ASSERT_NO_ERRNO(
+          SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port));
+      const FileDescriptor fd_v6 = ASSERT_NO_ERRNO_AND_VALUE(
+          Socket(test_addr_v6.family(), param.type, 0));
+      ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+                 test_addr_v6.addr_len);
+    } else {
+      // Verify that we can still bind the v6 any on the same port with a
+      // v6-only socket.
+      const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE(
+          Socket(test_addr_v6_any.family(), param.type, 0));
+      EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY,
+                             &kSockOptOn, sizeof(kSockOptOn)),
+                  SyscallSucceeds());
+      ret =
+          bind(fd_v6_only_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any),
+               test_addr_v6_any.addr_len);
+    }
+
+    if (ret == -1 && errno == EADDRINUSE) {
+      // Port may have been in use.
+      ASSERT_LT(i, 100);  // Give up after 100 tries.
+      continue;
+    }
+    EXPECT_THAT(ret, SyscallSucceeds());
+
+    // No need to try again.
+    break;
+  }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReservedReuseAddr) {
+  auto const& param = GetParam();
+
+  // Bind the v4 loopback on a v4 socket.
+  TestAddress const& test_addr = V4Loopback();
+  sockaddr_storage bound_addr = test_addr.addr;
+  const FileDescriptor bound_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+
+  ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                   test_addr.addr_len),
+              SyscallSucceeds());
+
+  // Listen iff TCP.
+  if (param.type == SOCK_STREAM) {
+    ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds());
+  }
+
+  // Get the port that we bound.
+  socklen_t bound_addr_len = test_addr.addr_len;
+  ASSERT_THAT(
+      getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+                  &bound_addr_len),
+      SyscallSucceeds());
+
+  // Connect to bind an ephemeral port.
+  const FileDescriptor connected_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+
+  ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(),
+                                  reinterpret_cast<sockaddr*>(&bound_addr),
+                                  bound_addr_len),
+              SyscallSucceeds());
+
+  // Get the ephemeral port.
+  sockaddr_storage connected_addr = {};
+  socklen_t connected_addr_len = sizeof(connected_addr);
+  ASSERT_THAT(getsockname(connected_fd.get(),
+                          reinterpret_cast<sockaddr*>(&connected_addr),
+                          &connected_addr_len),
+              SyscallSucceeds());
+  uint16_t const ephemeral_port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr));
+
+  // Verify that we actually got an ephemeral port.
+  ASSERT_NE(ephemeral_port, 0);
+
+  // Verify that the ephemeral port is not reserved.
+  const FileDescriptor checking_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+  ASSERT_THAT(setsockopt(checking_fd.get(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  EXPECT_THAT(
+      bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr),
+           connected_addr_len),
+      SyscallSucceeds());
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, PortReuseTwoSockets) {
+  auto const& param = GetParam();
+  TestAddress const& test_addr = V4Loopback();
+  sockaddr_storage addr = test_addr.addr;
+
+  for (int i = 0; i < 2; i++) {
+    const int portreuse1 = i % 2;
+    auto s1 =
+        ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+    int fd1 = s1.get();
+    socklen_t addrlen = test_addr.addr_len;
+
+    EXPECT_THAT(
+        setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &portreuse1, sizeof(int)),
+        SyscallSucceeds());
+
+    ASSERT_THAT(bind(fd1, reinterpret_cast<sockaddr*>(&addr), addrlen),
+                SyscallSucceeds());
+
+    ASSERT_THAT(getsockname(fd1, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+                SyscallSucceeds());
+    if (param.type == SOCK_STREAM) {
+      ASSERT_THAT(listen(fd1, 1), SyscallSucceeds());
+    }
+
+    // j is less than 4 to check that the port reuse logic works correctly after
+    // closing bound sockets.
+    for (int j = 0; j < 4; j++) {
+      const int portreuse2 = j % 2;
+      auto s2 =
+          ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+      int fd2 = s2.get();
+
+      EXPECT_THAT(
+          setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &portreuse2, sizeof(int)),
+          SyscallSucceeds());
+
+      std::cout << portreuse1 << " " << portreuse2 << std::endl;
+      int ret = bind(fd2, reinterpret_cast<sockaddr*>(&addr), addrlen);
+
+      // Verify that two sockets can be bound to the same port only if
+      // SO_REUSEPORT is set for both of them.
+      if (!portreuse1 || !portreuse2) {
+        ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRINUSE));
+      } else {
+        ASSERT_THAT(ret, SyscallSucceeds());
+      }
+    }
+  }
+}
+
+// Check that when a socket was bound to an address with REUSEPORT and then
+// closed, we can bind a different socket to the same address without needing
+// REUSEPORT.
+TEST_P(SocketMultiProtocolInetLoopbackTest, NoReusePortFollowingReusePort) {
+  auto const& param = GetParam();
+  TestAddress const& test_addr = V4Loopback();
+  sockaddr_storage addr = test_addr.addr;
+
+  auto s = ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+  int fd = s.get();
+  socklen_t addrlen = test_addr.addr_len;
+  int portreuse = 1;
+  ASSERT_THAT(
+      setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &portreuse, sizeof(portreuse)),
+      SyscallSucceeds());
+  ASSERT_THAT(bind(fd, reinterpret_cast<sockaddr*>(&addr), addrlen),
+              SyscallSucceeds());
+  ASSERT_THAT(getsockname(fd, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+              SyscallSucceeds());
+  ASSERT_EQ(addrlen, test_addr.addr_len);
+
+  s.reset();
+
+  // Open a new socket and bind to the same address, but w/o REUSEPORT.
+  s = ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+  fd = s.get();
+  portreuse = 0;
+  ASSERT_THAT(
+      setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &portreuse, sizeof(portreuse)),
+      SyscallSucceeds());
+  ASSERT_THAT(bind(fd, reinterpret_cast<sockaddr*>(&addr), addrlen),
+              SyscallSucceeds());
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllFamilies, SocketMultiProtocolInetLoopbackTest,
+    ::testing::Values(ProtocolTestParam{"TCP", SOCK_STREAM},
+                      ProtocolTestParam{"UDP", SOCK_DGRAM}),
+    DescribeProtocolTestParam);
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc
new file mode 100644
index 000000000..2324c7f6a
--- /dev/null
+++ b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc
@@ -0,0 +1,171 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <string.h>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/save_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::Gt;
+
+PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) {
+  switch (family) {
+    case AF_INET:
+      return static_cast<uint16_t>(
+          reinterpret_cast<sockaddr_in const*>(&addr)->sin_port);
+    case AF_INET6:
+      return static_cast<uint16_t>(
+          reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port);
+    default:
+      return PosixError(EINVAL,
+                        absl::StrCat("unknown socket family: ", family));
+  }
+}
+
+PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) {
+  switch (family) {
+    case AF_INET:
+      reinterpret_cast<sockaddr_in*>(addr)->sin_port = port;
+      return NoError();
+    case AF_INET6:
+      reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port;
+      return NoError();
+    default:
+      return PosixError(EINVAL,
+                        absl::StrCat("unknown socket family: ", family));
+  }
+}
+
+struct TestParam {
+  TestAddress listener;
+  TestAddress connector;
+};
+
+std::string DescribeTestParam(::testing::TestParamInfo<TestParam> const& info) {
+  return absl::StrCat("Listen", info.param.listener.description, "_Connect",
+                      info.param.connector.description);
+}
+
+using SocketInetLoopbackTest = ::testing::TestWithParam<TestParam>;
+
+// This test verifies that connect returns EADDRNOTAVAIL if all local ephemeral
+// ports are already in use for a given destination ip/port.
+// We disable S/R because this test creates a large number of sockets.
+TEST_P(SocketInetLoopbackTest, TestTCPPortExhaustion_NoRandomSave) {
+  auto const& param = GetParam();
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  constexpr int kBacklog = 10;
+  constexpr int kClients = 65536;
+
+  // Create the listening socket.
+  auto listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  // Disable cooperative S/R as we are making too many syscalls.
+  DisableSave ds;
+
+  // Now we keep opening connections till we run out of local ephemeral ports.
+  // and assert the error we get back.
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  std::vector<FileDescriptor> clients;
+  std::vector<FileDescriptor> servers;
+
+  for (int i = 0; i < kClients; i++) {
+    FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+    int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+                      connector.addr_len);
+    if (ret == 0) {
+      clients.push_back(std::move(client));
+      FileDescriptor server =
+          ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+      servers.push_back(std::move(server));
+      continue;
+    }
+    ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRNOTAVAIL));
+    break;
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    All, SocketInetLoopbackTest,
+    ::testing::Values(
+        // Listeners bound to IPv4 addresses refuse connections using IPv6
+        // addresses.
+        TestParam{V4Any(), V4Any()}, TestParam{V4Any(), V4Loopback()},
+        TestParam{V4Any(), V4MappedAny()},
+        TestParam{V4Any(), V4MappedLoopback()},
+        TestParam{V4Loopback(), V4Any()}, TestParam{V4Loopback(), V4Loopback()},
+        TestParam{V4Loopback(), V4MappedLoopback()},
+        TestParam{V4MappedAny(), V4Any()},
+        TestParam{V4MappedAny(), V4Loopback()},
+        TestParam{V4MappedAny(), V4MappedAny()},
+        TestParam{V4MappedAny(), V4MappedLoopback()},
+        TestParam{V4MappedLoopback(), V4Any()},
+        TestParam{V4MappedLoopback(), V4Loopback()},
+        TestParam{V4MappedLoopback(), V4MappedLoopback()},
+
+        // Listeners bound to IN6ADDR_ANY accept all connections.
+        TestParam{V6Any(), V4Any()}, TestParam{V6Any(), V4Loopback()},
+        TestParam{V6Any(), V4MappedAny()},
+        TestParam{V6Any(), V4MappedLoopback()}, TestParam{V6Any(), V6Any()},
+        TestParam{V6Any(), V6Loopback()},
+
+        // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4
+        // addresses.
+        TestParam{V6Loopback(), V6Any()},
+        TestParam{V6Loopback(), V6Loopback()}),
+    DescribeTestParam);
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_loopback_blocking.cc b/test/syscalls/linux/socket_ip_loopback_blocking.cc
new file mode 100644
index 000000000..fda252dd7
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_loopback_blocking.cc
@@ -0,0 +1,49 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/tcp.h>
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return VecCat<SocketPairKind>(
+      std::vector<SocketPairKind>{
+          IPv6UDPBidirectionalBindSocketPair(0),
+          IPv4UDPBidirectionalBindSocketPair(0),
+      },
+      ApplyVecToVec<SocketPairKind>(
+          std::vector<Middleware>{
+              NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)},
+          std::vector<SocketPairKind>{
+              IPv6TCPAcceptBindSocketPair(0),
+              IPv4TCPAcceptBindSocketPair(0),
+          }));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    BlockingIPSockets, BlockingSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc
new file mode 100644
index 000000000..c2ecb639f
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_generic.cc
@@ -0,0 +1,1054 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ip_tcp_generic.h"
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <poll.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(TCPSocketPairTest, TcpInfoSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct tcp_info opt = {};
+  socklen_t optLen = sizeof(opt);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen),
+              SyscallSucceeds());
+}
+
+TEST_P(TCPSocketPairTest, ShortTcpInfoSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct tcp_info opt = {};
+  socklen_t optLen = 1;
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen),
+              SyscallSucceeds());
+}
+
+TEST_P(TCPSocketPairTest, ZeroTcpInfoSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct tcp_info opt = {};
+  socklen_t optLen = 0;
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen),
+              SyscallSucceeds());
+}
+
+// This test validates that an RST is sent instead of a FIN when data is
+// unread on calls to close(2).
+TEST_P(TCPSocketPairTest, RSTSentOnCloseWithUnreadData) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Wait until t_ sees the data on its side but don't read it.
+  struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+  constexpr int kPollTimeoutMs = 20000;  // Wait up to 20 seconds for the data.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Now close the connected without reading the data.
+  ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+
+  // Wait for the other end to receive the RST (up to 20 seconds).
+  struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // A shutdown with unread data will cause a RST to be sent instead
+  // of a FIN, per RFC 2525 section 2.17; this is also what Linux does.
+  ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(ECONNRESET));
+}
+
+// This test will validate that a RST will cause POLLHUP to trigger.
+TEST_P(TCPSocketPairTest, RSTCausesPollHUP) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Wait until second sees the data on its side but don't read it.
+  struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0};
+  constexpr int kPollTimeoutMs = 20000;  // Wait up to 20 seconds for the data.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+
+  // Confirm we at least have one unread byte.
+  int bytes_available = 0;
+  ASSERT_THAT(
+      RetryEINTR(ioctl)(sockets->second_fd(), FIONREAD, &bytes_available),
+      SyscallSucceeds());
+  EXPECT_GT(bytes_available, 0);
+
+  // Now close the connected socket without reading the data from the second,
+  // this will cause a RST and we should see that with POLLHUP.
+  ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+
+  // Wait for the other end to receive the RST (up to 20 seconds).
+  struct pollfd poll_fd3 = {sockets->first_fd(), POLLHUP, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd3, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+  ASSERT_NE(poll_fd3.revents & POLLHUP, 0);
+}
+
+// This test validates that even if a RST is sent the other end will not
+// get an ECONNRESET until it's read all data.
+TEST_P(TCPSocketPairTest, RSTSentOnCloseWithUnreadDataAllowsReadBuffered) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+  ASSERT_THAT(RetryEINTR(write)(sockets->second_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Wait until second sees the data on its side but don't read it.
+  struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0};
+  constexpr int kPollTimeoutMs = 30000;  // Wait up to 30 seconds for the data.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Wait until first sees the data on its side but don't read it.
+  struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Now close the connected socket without reading the data from the second.
+  ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+
+  // Wait for the other end to receive the RST (up to 30 seconds).
+  struct pollfd poll_fd3 = {sockets->first_fd(), POLLHUP, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd3, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Since we also have data buffered we should be able to read it before
+  // the syscall will fail with ECONNRESET.
+  ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // A shutdown with unread data will cause a RST to be sent instead
+  // of a FIN, per RFC 2525 section 2.17; this is also what Linux does.
+  ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(ECONNRESET));
+}
+
+// This test will verify that a clean shutdown (FIN) is preformed when there
+// is unread data but only the write side is closed.
+TEST_P(TCPSocketPairTest, FINSentOnShutdownWrWithUnreadData) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Wait until t_ sees the data on its side but don't read it.
+  struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+  constexpr int kPollTimeoutMs = 20000;  // Wait up to 20 seconds for the data.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Now shutdown the write end leaving the read end open.
+  ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_WR), SyscallSucceeds());
+
+  // Wait for the other end to receive the FIN (up to 20 seconds).
+  struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Since we didn't shutdown the read end this will be a clean close.
+  ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(0));
+}
+
+// This test will verify that when data is received by a socket, even if it's
+// not read SHUT_RD will not cause any packets to be generated.
+TEST_P(TCPSocketPairTest, ShutdownRdShouldCauseNoPacketsWithUnreadData) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Wait until t_ sees the data on its side but don't read it.
+  struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+  constexpr int kPollTimeoutMs = 20000;  // Wait up to 20 seconds for the data.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Now shutdown the read end, this will generate no packets to the other end.
+  ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds());
+
+  // We should not receive any events on the other side of the socket.
+  struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0};
+  constexpr int kPollNoResponseTimeoutMs = 3000;
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollNoResponseTimeoutMs),
+              SyscallSucceedsWithValue(0));  // Timeout.
+}
+
+// This test will verify that a socket which has unread data will still allow
+// the data to be read after shutting down the read side, and once there is no
+// unread data left, then read will return an EOF.
+TEST_P(TCPSocketPairTest, ShutdownRdAllowsReadOfReceivedDataBeforeEOF) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Wait until t_ sees the data on its side but don't read it.
+  struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+  constexpr int kPollTimeoutMs = 20000;  // Wait up to 20 seconds for the data.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Now shutdown the read end.
+  ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds());
+
+  // Even though we did a SHUT_RD on the read end we can still read the data.
+  ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // After reading all of the data, reading the closed read end returns EOF.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+  ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(0));
+}
+
+// This test verifies that a shutdown(wr) by the server after sending
+// data allows the client to still read() the queued data and a client
+// close after sending response allows server to read the incoming
+// response.
+TEST_P(TCPSocketPairTest, ShutdownWrServerClientClose) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char buf[10] = {};
+  ScopedThread t([&]() {
+    ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+                SyscallSucceedsWithValue(sizeof(buf)));
+    ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+                SyscallSucceedsWithValue(sizeof(buf)));
+    ASSERT_THAT(close(sockets->release_first_fd()),
+                SyscallSucceedsWithValue(0));
+  });
+  ASSERT_THAT(RetryEINTR(write)(sockets->second_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+  ASSERT_THAT(RetryEINTR(shutdown)(sockets->second_fd(), SHUT_WR),
+              SyscallSucceedsWithValue(0));
+  t.Join();
+
+  ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST_P(TCPSocketPairTest, ClosedReadNonBlockingSocket) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Set the read end to O_NONBLOCK.
+  int opts = 0;
+  ASSERT_THAT(opts = fcntl(sockets->second_fd(), F_GETFL), SyscallSucceeds());
+  ASSERT_THAT(fcntl(sockets->second_fd(), F_SETFL, opts | O_NONBLOCK),
+              SyscallSucceeds());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Wait until second_fd sees the data and then recv it.
+  struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0};
+  constexpr int kPollTimeoutMs = 2000;  // Wait up to 2 seconds for the data.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Now shutdown the write end leaving the read end open.
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+
+  // Wait for close notification and recv again.
+  struct pollfd poll_fd2 = {sockets->second_fd(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(TCPSocketPairTest,
+       ShutdownRdUnreadDataShouldCauseNoPacketsUnlessClosed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Wait until t_ sees the data on its side but don't read it.
+  struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+  constexpr int kPollTimeoutMs = 20000;  // Wait up to 20 seconds for the data.
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));
+
+  // Now shutdown the read end, this will generate no packets to the other end.
+  ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds());
+
+  // We should not receive any events on the other side of the socket.
+  struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0};
+  constexpr int kPollNoResponseTimeoutMs = 3000;
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollNoResponseTimeoutMs),
+              SyscallSucceedsWithValue(0));  // Timeout.
+
+  // Now since we've fully closed the connection it will generate a RST.
+  ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+  ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+              SyscallSucceedsWithValue(1));  // The other end has closed.
+
+  // A shutdown with unread data will cause a RST to be sent instead
+  // of a FIN, per RFC 2525 section 2.17; this is also what Linux does.
+  ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(ECONNRESET));
+}
+
+TEST_P(TCPSocketPairTest, TCPCorkDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPCork) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(TCPSocketPairTest, TCPCork) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  constexpr char kData[] = "abc";
+  ASSERT_THAT(WriteFd(sockets->first_fd(), kData, sizeof(kData)),
+              SyscallSucceedsWithValue(sizeof(kData)));
+
+  ASSERT_NO_FATAL_FAILURE(RecvNoData(sockets->second_fd()));
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  // Create a receive buffer larger than kData.
+  char buf[(sizeof(kData) + 1) * 2] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(kData)));
+  EXPECT_EQ(absl::string_view(kData, sizeof(kData)),
+            absl::string_view(buf, sizeof(kData)));
+}
+
+TEST_P(TCPSocketPairTest, TCPQuickAckDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get,
+                         &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPQuickAck) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get,
+                         &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get,
+                         &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+}
+
+TEST_P(TCPSocketPairTest, SoKeepaliveDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(TCPSocketPairTest, SetSoKeepalive) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(TCPSocketPairTest, TCPKeepidleDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE, &get,
+                         &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 2 * 60 * 60);  // 2 hours.
+}
+
+TEST_P(TCPSocketPairTest, TCPKeepintvlDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL, &get,
+                         &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 75);  // 75 seconds.
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepidleZero) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kZero = 0;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE, &kZero,
+                         sizeof(kZero)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepintvlZero) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kZero = 0;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL,
+                         &kZero, sizeof(kZero)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Copied from include/net/tcp.h.
+constexpr int MAX_TCP_KEEPIDLE = 32767;
+constexpr int MAX_TCP_KEEPINTVL = 32767;
+constexpr int MAX_TCP_KEEPCNT = 127;
+
+TEST_P(TCPSocketPairTest, SetTCPKeepidleAboveMax) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kAboveMax = MAX_TCP_KEEPIDLE + 1;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE,
+                         &kAboveMax, sizeof(kAboveMax)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepintvlAboveMax) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kAboveMax = MAX_TCP_KEEPINTVL + 1;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL,
+                         &kAboveMax, sizeof(kAboveMax)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepidleToMax) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE,
+                         &MAX_TCP_KEEPIDLE, sizeof(MAX_TCP_KEEPIDLE)),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPIDLE, &get,
+                         &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, MAX_TCP_KEEPIDLE);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepintvlToMax) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL,
+                         &MAX_TCP_KEEPINTVL, sizeof(MAX_TCP_KEEPINTVL)),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPINTVL, &get,
+                         &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, MAX_TCP_KEEPINTVL);
+}
+
+TEST_P(TCPSocketPairTest, TCPKeepcountDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 9);  // 9 keepalive probes.
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepcountZero) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kZero = 0;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &kZero,
+                         sizeof(kZero)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepcountAboveMax) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kAboveMax = MAX_TCP_KEEPCNT + 1;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT,
+                         &kAboveMax, sizeof(kAboveMax)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepcountToMax) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT,
+                         &MAX_TCP_KEEPCNT, sizeof(MAX_TCP_KEEPCNT)),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, MAX_TCP_KEEPCNT);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepcountToOne) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int keepaliveCount = 1;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT,
+                         &keepaliveCount, sizeof(keepaliveCount)),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, keepaliveCount);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPKeepcountToNegative) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int keepaliveCount = -5;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT,
+                         &keepaliveCount, sizeof(keepaliveCount)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(TCPSocketPairTest, SetOOBInline) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_OOBINLINE,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_OOBINLINE, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+}
+
+TEST_P(TCPSocketPairTest, MsgTruncMsgPeek) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  // Read half of the data with MSG_TRUNC | MSG_PEEK. This way there will still
+  // be some data left to read in the next step even if the data gets consumed.
+  char received_data1[sizeof(sent_data) / 2] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data1,
+                               sizeof(received_data1), MSG_TRUNC | MSG_PEEK),
+              SyscallSucceedsWithValue(sizeof(received_data1)));
+
+  // Check that we didn't get anything.
+  char zeros[sizeof(received_data1)] = {};
+  EXPECT_EQ(0, memcmp(zeros, received_data1, sizeof(received_data1)));
+
+  // Check that all of the data is still there.
+  char received_data2[sizeof(sent_data)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data2,
+                               sizeof(received_data2), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  EXPECT_EQ(0, memcmp(received_data2, sent_data, sizeof(sent_data)));
+}
+
+TEST_P(TCPSocketPairTest, SetCongestionControlSucceedsForSupported) {
+  // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+  const int kTcpCaNameMax = 16;
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  // Netstack only supports reno & cubic so we only test these two values here.
+  {
+    const char kSetCC[kTcpCaNameMax] = "reno";
+    ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                           &kSetCC, strlen(kSetCC)),
+                SyscallSucceedsWithValue(0));
+
+    char got_cc[kTcpCaNameMax];
+    memset(got_cc, '1', sizeof(got_cc));
+    socklen_t optlen = sizeof(got_cc);
+    ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                           &got_cc, &optlen),
+                SyscallSucceedsWithValue(0));
+    EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
+  }
+  {
+    const char kSetCC[kTcpCaNameMax] = "cubic";
+    ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                           &kSetCC, strlen(kSetCC)),
+                SyscallSucceedsWithValue(0));
+
+    char got_cc[kTcpCaNameMax];
+    memset(got_cc, '1', sizeof(got_cc));
+    socklen_t optlen = sizeof(got_cc);
+    ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                           &got_cc, &optlen),
+                SyscallSucceedsWithValue(0));
+    EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
+  }
+}
+
+TEST_P(TCPSocketPairTest, SetGetTCPCongestionShortReadBuffer) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  {
+    // Verify that getsockopt/setsockopt work with buffers smaller than
+    // kTcpCaNameMax.
+    const char kSetCC[] = "cubic";
+    ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                           &kSetCC, strlen(kSetCC)),
+                SyscallSucceedsWithValue(0));
+
+    char got_cc[sizeof(kSetCC)];
+    socklen_t optlen = sizeof(got_cc);
+    ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                           &got_cc, &optlen),
+                SyscallSucceedsWithValue(0));
+    EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(got_cc)));
+  }
+}
+
+TEST_P(TCPSocketPairTest, SetGetTCPCongestionLargeReadBuffer) {
+  // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+  const int kTcpCaNameMax = 16;
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  {
+    // Verify that getsockopt works with buffers larger than
+    // kTcpCaNameMax.
+    const char kSetCC[] = "cubic";
+    ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                           &kSetCC, strlen(kSetCC)),
+                SyscallSucceedsWithValue(0));
+
+    char got_cc[kTcpCaNameMax + 5];
+    socklen_t optlen = sizeof(got_cc);
+    ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                           &got_cc, &optlen),
+                SyscallSucceedsWithValue(0));
+    // Linux copies the minimum of kTcpCaNameMax or the length of the passed in
+    // buffer and sets optlen to the number of bytes actually copied
+    // irrespective of the actual length of the congestion control name.
+    EXPECT_EQ(kTcpCaNameMax, optlen);
+    EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
+  }
+}
+
+TEST_P(TCPSocketPairTest, SetCongestionControlFailsForUnsupported) {
+  // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+  const int kTcpCaNameMax = 16;
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char old_cc[kTcpCaNameMax];
+  socklen_t optlen = sizeof(old_cc);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                         &old_cc, &optlen),
+              SyscallSucceedsWithValue(0));
+
+  const char kSetCC[] = "invalid_ca_cc";
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                         &kSetCC, strlen(kSetCC)),
+              SyscallFailsWithErrno(ENOENT));
+
+  char got_cc[kTcpCaNameMax];
+  optlen = sizeof(got_cc);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+                         &got_cc, &optlen),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(old_cc)));
+}
+
+// Linux and Netstack both default to a 60s TCP_LINGER2 timeout.
+constexpr int kDefaultTCPLingerTimeout = 60;
+
+TEST_P(TCPSocketPairTest, TCPLingerTimeoutDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kDefaultTCPLingerTimeout);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutZeroOrLess) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kZero = 0;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &kZero,
+                         sizeof(kZero)),
+              SyscallSucceedsWithValue(0));
+
+  constexpr int kNegative = -1234;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2,
+                         &kNegative, sizeof(kNegative)),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutAboveDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Values above the net.ipv4.tcp_fin_timeout are capped to tcp_fin_timeout
+  // on linux (defaults to 60 seconds on linux).
+  constexpr int kAboveDefault = kDefaultTCPLingerTimeout + 1;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2,
+                         &kAboveDefault, sizeof(kAboveDefault)),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kDefaultTCPLingerTimeout);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPLingerTimeout) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Values above the net.ipv4.tcp_fin_timeout are capped to tcp_fin_timeout
+  // on linux (defaults to 60 seconds on linux).
+  constexpr int kTCPLingerTimeout = kDefaultTCPLingerTimeout - 1;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2,
+                         &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kTCPLingerTimeout);
+}
+
+TEST_P(TCPSocketPairTest, TestTCPCloseWithData) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ScopedThread t([&]() {
+    // Close one end to trigger sending of a FIN.
+    ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_WR), SyscallSucceeds());
+    char buf[3];
+    ASSERT_THAT(read(sockets->second_fd(), buf, 3),
+                SyscallSucceedsWithValue(3));
+    absl::SleepFor(absl::Milliseconds(50));
+    ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+  });
+
+  absl::SleepFor(absl::Milliseconds(50));
+  // Send some data then close.
+  constexpr char kStr[] = "abc";
+  ASSERT_THAT(write(sockets->first_fd(), kStr, 3), SyscallSucceedsWithValue(3));
+  t.Join();
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+}
+
+TEST_P(TCPSocketPairTest, TCPUserTimeoutDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                         &get, &get_len),
+              SyscallSucceeds());
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 0);  // 0 ms (disabled).
+}
+
+TEST_P(TCPSocketPairTest, SetTCPUserTimeoutZero) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kZero = 0;
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                         &kZero, sizeof(kZero)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                         &get, &get_len),
+              SyscallSucceeds());
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 0);  // 0 ms (disabled).
+}
+
+TEST_P(TCPSocketPairTest, SetTCPUserTimeoutBelowZero) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kNeg = -10;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                         &kNeg, sizeof(kNeg)),
+              SyscallFailsWithErrno(EINVAL));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                         &get, &get_len),
+              SyscallSucceeds());
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 0);  // 0 ms (disabled).
+}
+
+TEST_P(TCPSocketPairTest, SetTCPUserTimeoutAboveZero) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kAbove = 10;
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                         &kAbove, sizeof(kAbove)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                         &get, &get_len),
+              SyscallSucceeds());
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kAbove);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPWindowClampBelowMinRcvBufConnectedSocket) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  // Discover minimum receive buf by setting a really low value
+  // for the receive buffer.
+  constexpr int kZero = 0;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &kZero,
+                         sizeof(kZero)),
+              SyscallSucceeds());
+
+  // Now retrieve the minimum value for SO_RCVBUF as the set above should
+  // have caused SO_RCVBUF for the socket to be set to the minimum.
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  int min_so_rcvbuf = get;
+
+  {
+    // Setting TCP_WINDOW_CLAMP to zero for a connected socket is not permitted.
+    constexpr int kZero = 0;
+    EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+                           &kZero, sizeof(kZero)),
+                SyscallFailsWithErrno(EINVAL));
+
+    // Non-zero clamp values below MIN_SO_RCVBUF/2 should result in the clamp
+    // being set to MIN_SO_RCVBUF/2.
+    int below_half_min_so_rcvbuf = min_so_rcvbuf / 2 - 1;
+    EXPECT_THAT(
+        setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+                   &below_half_min_so_rcvbuf, sizeof(below_half_min_so_rcvbuf)),
+        SyscallSucceeds());
+
+    int get = -1;
+    socklen_t get_len = sizeof(get);
+
+    ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+                           &get, &get_len),
+                SyscallSucceedsWithValue(0));
+    EXPECT_EQ(get_len, sizeof(get));
+    EXPECT_EQ(min_so_rcvbuf / 2, get);
+  }
+}
+
+TEST_P(TCPSocketPairTest, IpMulticastTtlDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_GT(get, 0);
+}
+
+TEST_P(TCPSocketPairTest, IpMulticastLoopDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 1);
+}
+
+TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) {
+  DisableSave ds;  // Too many syscalls.
+  constexpr int kThreadCount = 1000;
+  std::unique_ptr<ScopedThread> instances[kThreadCount];
+  for (int i = 0; i < kThreadCount; i++) {
+    instances[i] = absl::make_unique<ScopedThread>([&]() {
+      auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+      ScopedThread t([&]() {
+        // Close one end to trigger sending of a FIN.
+        struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+        // Wait up to 20 seconds for the data.
+        constexpr int kPollTimeoutMs = 20000;
+        ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+                    SyscallSucceedsWithValue(1));
+        ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+      });
+
+      // Send some data then close.
+      constexpr char kStr[] = "abc";
+      ASSERT_THAT(write(sockets->first_fd(), kStr, 3),
+                  SyscallSucceedsWithValue(3));
+      absl::SleepFor(absl::Milliseconds(10));
+      ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+      t.Join();
+    });
+  }
+  for (int i = 0; i < kThreadCount; i++) {
+    instances[i]->Join();
+  }
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_generic.h b/test/syscalls/linux/socket_ip_tcp_generic.h
new file mode 100644
index 000000000..a3eff3c73
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_generic.h
@@ -0,0 +1,29 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected TCP sockets.
+using TCPSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_
diff --git a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc
new file mode 100644
index 000000000..4e79d21f4
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc
@@ -0,0 +1,45 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/tcp.h>
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_ip_tcp_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return ApplyVecToVec<SocketPairKind>(
+      std::vector<Middleware>{
+          NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)},
+      std::vector<SocketPairKind>{
+          IPv6TCPAcceptBindSocketPair(0),
+          IPv4TCPAcceptBindSocketPair(0),
+          DualStackTCPAcceptBindSocketPair(0),
+      });
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllTCPSockets, TCPSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_loopback.cc b/test/syscalls/linux/socket_ip_tcp_loopback.cc
new file mode 100644
index 000000000..9db3037bc
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_loopback.cc
@@ -0,0 +1,40 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return {
+      IPv6TCPAcceptBindSocketPair(0),
+      IPv4TCPAcceptBindSocketPair(0),
+      DualStackTCPAcceptBindSocketPair(0),
+  };
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, AllSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc
new file mode 100644
index 000000000..f996b93d2
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc
@@ -0,0 +1,45 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/tcp.h>
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_stream_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return ApplyVecToVec<SocketPairKind>(
+      std::vector<Middleware>{
+          NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)},
+      std::vector<SocketPairKind>{
+          IPv6TCPAcceptBindSocketPair(0),
+          IPv4TCPAcceptBindSocketPair(0),
+          DualStackTCPAcceptBindSocketPair(0),
+      });
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    BlockingTCPSockets, BlockingStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc
new file mode 100644
index 000000000..ffa377210
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc
@@ -0,0 +1,44 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/tcp.h>
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return ApplyVecToVec<SocketPairKind>(
+      std::vector<Middleware>{
+          NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)},
+      std::vector<SocketPairKind>{
+          IPv6TCPAcceptBindSocketPair(SOCK_NONBLOCK),
+          IPv4TCPAcceptBindSocketPair(SOCK_NONBLOCK),
+      });
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NonBlockingTCPSockets, NonBlockingSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_udp_generic.cc b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc
new file mode 100644
index 000000000..f178f1af9
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc
@@ -0,0 +1,77 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <poll.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of TCP and UDP sockets.
+using TcpUdpSocketPairTest = SocketPairTest;
+
+TEST_P(TcpUdpSocketPairTest, ShutdownWrFollowedBySendIsError) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Now shutdown the write end of the first.
+  ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_WR), SyscallSucceeds());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(EPIPE));
+}
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return VecCat<SocketPairKind>(
+      ApplyVec<SocketPairKind>(
+          IPv6UDPBidirectionalBindSocketPair,
+          AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          IPv4UDPBidirectionalBindSocketPair,
+          AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          DualStackUDPBidirectionalBindSocketPair,
+          AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          IPv6TCPAcceptBindSocketPair,
+          AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          IPv4TCPAcceptBindSocketPair,
+          AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          DualStackTCPAcceptBindSocketPair,
+          AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllIPSockets, TcpUdpSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc
new file mode 100644
index 000000000..edb86aded
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_generic.cc
@@ -0,0 +1,452 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ip_udp_generic.h"
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <poll.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(UDPSocketPairTest, MulticastTTLDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 1);
+}
+
+TEST_P(UDPSocketPairTest, SetUDPMulticastTTLMin) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kMin = 0;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &kMin, sizeof(kMin)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kMin);
+}
+
+TEST_P(UDPSocketPairTest, SetUDPMulticastTTLMax) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kMax = 255;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &kMax, sizeof(kMax)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kMax);
+}
+
+TEST_P(UDPSocketPairTest, SetUDPMulticastTTLNegativeOne) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kArbitrary = 6;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &kArbitrary, sizeof(kArbitrary)),
+              SyscallSucceeds());
+
+  constexpr int kNegOne = -1;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &kNegOne, sizeof(kNegOne)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 1);
+}
+
+TEST_P(UDPSocketPairTest, SetUDPMulticastTTLBelowMin) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kBelowMin = -2;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &kBelowMin, sizeof(kBelowMin)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(UDPSocketPairTest, SetUDPMulticastTTLAboveMax) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr int kAboveMax = 256;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &kAboveMax, sizeof(kAboveMax)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(UDPSocketPairTest, SetUDPMulticastTTLChar) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr char kArbitrary = 6;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &kArbitrary, sizeof(kArbitrary)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kArbitrary);
+}
+
+TEST_P(UDPSocketPairTest, SetEmptyIPAddMembership) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct ip_mreqn req = {};
+  EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP,
+                         &req, sizeof(req)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(UDPSocketPairTest, MulticastLoopDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+}
+
+TEST_P(UDPSocketPairTest, SetMulticastLoop) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+}
+
+TEST_P(UDPSocketPairTest, SetMulticastLoopChar) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  constexpr char kSockOptOnChar = kSockOptOn;
+  constexpr char kSockOptOffChar = kSockOptOff;
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &kSockOptOffChar, sizeof(kSockOptOffChar)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &kSockOptOnChar, sizeof(kSockOptOnChar)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+}
+
+TEST_P(UDPSocketPairTest, ReuseAddrDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(UDPSocketPairTest, SetReuseAddr) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(UDPSocketPairTest, ReusePortDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(UDPSocketPairTest, SetReusePort) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(UDPSocketPairTest, SetReuseAddrReusePort) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+}
+
+// Test getsockopt for a socket which is not set with IP_PKTINFO option.
+TEST_P(UDPSocketPairTest, IPPKTINFODefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_IP, IP_PKTINFO, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+// Test setsockopt and getsockopt for a socket with IP_PKTINFO option.
+TEST_P(UDPSocketPairTest, SetAndGetIPPKTINFO) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int level = SOL_IP;
+  int type = IP_PKTINFO;
+
+  // Check getsockopt before IP_PKTINFO is set.
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), level, type, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  ASSERT_THAT(getsockopt(sockets->first_fd(), level, type, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get, kSockOptOn);
+  EXPECT_EQ(get_len, sizeof(get));
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), level, type, &kSockOptOff,
+                         sizeof(kSockOptOff)),
+              SyscallSucceedsWithValue(0));
+
+  ASSERT_THAT(getsockopt(sockets->first_fd(), level, type, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get, kSockOptOff);
+  EXPECT_EQ(get_len, sizeof(get));
+}
+
+// Holds TOS or TClass information for IPv4 or IPv6 respectively.
+struct RecvTosOption {
+  int level;
+  int option;
+};
+
+RecvTosOption GetRecvTosOption(int domain) {
+  TEST_CHECK(domain == AF_INET || domain == AF_INET6);
+  RecvTosOption opt;
+  switch (domain) {
+    case AF_INET:
+      opt.level = IPPROTO_IP;
+      opt.option = IP_RECVTOS;
+      break;
+    case AF_INET6:
+      opt.level = IPPROTO_IPV6;
+      opt.option = IPV6_RECVTCLASS;
+      break;
+  }
+  return opt;
+}
+
+// Ensure that Receiving TOS or TCLASS is off by default.
+TEST_P(UDPSocketPairTest, RecvTosDefault) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  RecvTosOption t = GetRecvTosOption(GetParam().domain);
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+// Test that setting and getting IP_RECVTOS or IPV6_RECVTCLASS works as
+// expected.
+TEST_P(UDPSocketPairTest, SetRecvTos) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  RecvTosOption t = GetRecvTosOption(GetParam().domain);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), t.level, t.option, &kSockOptOff,
+                         sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+
+  ASSERT_THAT(setsockopt(sockets->first_fd(), t.level, t.option, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+}
+
+// Test that any socket (including IPv6 only) accepts the IPv4 TOS option: this
+// mirrors behavior in linux.
+TEST_P(UDPSocketPairTest, TOSRecvMismatch) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  RecvTosOption t = GetRecvTosOption(AF_INET);
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+}
+
+// Test that an IPv4 socket does not support the IPv6 TClass option.
+TEST_P(UDPSocketPairTest, TClassRecvMismatch) {
+  // This should only test AF_INET sockets for the mismatch behavior.
+  SKIP_IF(GetParam().domain != AF_INET);
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+
+  ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IPV6, IPV6_RECVTCLASS,
+                         &get, &get_len),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_generic.h b/test/syscalls/linux/socket_ip_udp_generic.h
new file mode 100644
index 000000000..106c54e9f
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_generic.h
@@ -0,0 +1,29 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_GENERIC_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_GENERIC_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected UDP sockets.
+using UDPSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_UDP_GENERIC_H_
diff --git a/test/syscalls/linux/socket_ip_udp_loopback.cc b/test/syscalls/linux/socket_ip_udp_loopback.cc
new file mode 100644
index 000000000..c7fa44884
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_loopback.cc
@@ -0,0 +1,50 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_ip_udp_generic.h"
+#include "test/syscalls/linux/socket_non_stream.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return {
+      IPv6UDPBidirectionalBindSocketPair(0),
+      IPv4UDPBidirectionalBindSocketPair(0),
+      DualStackUDPBidirectionalBindSocketPair(0),
+  };
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUDPSockets, AllSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUDPSockets, NonStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUDPSockets, UDPSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc
new file mode 100644
index 000000000..d6925a8df
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc
@@ -0,0 +1,39 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_non_stream_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return {
+      IPv6UDPBidirectionalBindSocketPair(0),
+      IPv4UDPBidirectionalBindSocketPair(0),
+  };
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    BlockingUDPSockets, BlockingNonStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc
new file mode 100644
index 000000000..d675eddc6
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc
@@ -0,0 +1,39 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return {
+      IPv6UDPBidirectionalBindSocketPair(SOCK_NONBLOCK),
+      IPv4UDPBidirectionalBindSocketPair(SOCK_NONBLOCK),
+  };
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NonBlockingUDPSockets, NonBlockingSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc
new file mode 100644
index 000000000..1c7b0cf90
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_unbound.cc
@@ -0,0 +1,474 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <cstdio>
+#include <cstring>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of IP sockets.
+using IPUnboundSocketTest = SimpleSocketTest;
+
+TEST_P(IPUnboundSocketTest, TtlDefault) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get, &get_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_TRUE(get == 64 || get == 127);
+  EXPECT_EQ(get_sz, sizeof(get));
+}
+
+TEST_P(IPUnboundSocketTest, SetTtl) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int get1 = -1;
+  socklen_t get1_sz = sizeof(get1);
+  EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get1, &get1_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get1_sz, sizeof(get1));
+
+  int set = 100;
+  if (set == get1) {
+    set += 1;
+  }
+  socklen_t set_sz = sizeof(set);
+  EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set, set_sz),
+              SyscallSucceedsWithValue(0));
+
+  int get2 = -1;
+  socklen_t get2_sz = sizeof(get2);
+  EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get2, &get2_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get2_sz, sizeof(get2));
+  EXPECT_EQ(get2, set);
+}
+
+TEST_P(IPUnboundSocketTest, ResetTtlToDefault) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int get1 = -1;
+  socklen_t get1_sz = sizeof(get1);
+  EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get1, &get1_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get1_sz, sizeof(get1));
+
+  int set1 = 100;
+  if (set1 == get1) {
+    set1 += 1;
+  }
+  socklen_t set1_sz = sizeof(set1);
+  EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set1, set1_sz),
+              SyscallSucceedsWithValue(0));
+
+  int set2 = -1;
+  socklen_t set2_sz = sizeof(set2);
+  EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set2, set2_sz),
+              SyscallSucceedsWithValue(0));
+
+  int get2 = -1;
+  socklen_t get2_sz = sizeof(get2);
+  EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get2, &get2_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get2_sz, sizeof(get2));
+  EXPECT_EQ(get2, get1);
+}
+
+TEST_P(IPUnboundSocketTest, ZeroTtl) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int set = 0;
+  socklen_t set_sz = sizeof(set);
+  EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set, set_sz),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(IPUnboundSocketTest, InvalidLargeTtl) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int set = 256;
+  socklen_t set_sz = sizeof(set);
+  EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set, set_sz),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(IPUnboundSocketTest, InvalidNegativeTtl) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int set = -2;
+  socklen_t set_sz = sizeof(set);
+  EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_TTL, &set, set_sz),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+struct TOSOption {
+  int level;
+  int option;
+  int cmsg_level;
+};
+
+constexpr int INET_ECN_MASK = 3;
+
+static TOSOption GetTOSOption(int domain) {
+  TOSOption opt;
+  switch (domain) {
+    case AF_INET:
+      opt.level = IPPROTO_IP;
+      opt.option = IP_TOS;
+      opt.cmsg_level = SOL_IP;
+      break;
+    case AF_INET6:
+      opt.level = IPPROTO_IPV6;
+      opt.option = IPV6_TCLASS;
+      opt.cmsg_level = SOL_IPV6;
+      break;
+  }
+  return opt;
+}
+
+TEST_P(IPUnboundSocketTest, TOSDefault) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  TOSOption t = GetTOSOption(GetParam().domain);
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  constexpr int kDefaultTOS = 0;
+  ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_sz, sizeof(get));
+  EXPECT_EQ(get, kDefaultTOS);
+}
+
+TEST_P(IPUnboundSocketTest, SetTOS) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  int set = 0xC0;
+  socklen_t set_sz = sizeof(set);
+  TOSOption t = GetTOSOption(GetParam().domain);
+  EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_sz, sizeof(get));
+  EXPECT_EQ(get, set);
+}
+
+TEST_P(IPUnboundSocketTest, ZeroTOS) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  int set = 0;
+  socklen_t set_sz = sizeof(set);
+  TOSOption t = GetTOSOption(GetParam().domain);
+  EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+              SyscallSucceedsWithValue(0));
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_sz, sizeof(get));
+  EXPECT_EQ(get, set);
+}
+
+TEST_P(IPUnboundSocketTest, InvalidLargeTOS) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  // Test with exceeding the byte space.
+  int set = 256;
+  constexpr int kDefaultTOS = 0;
+  socklen_t set_sz = sizeof(set);
+  TOSOption t = GetTOSOption(GetParam().domain);
+  if (GetParam().domain == AF_INET) {
+    EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+                SyscallSucceedsWithValue(0));
+  } else {
+    EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+                SyscallFailsWithErrno(EINVAL));
+  }
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_sz, sizeof(get));
+  EXPECT_EQ(get, kDefaultTOS);
+}
+
+TEST_P(IPUnboundSocketTest, CheckSkipECN) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  int set = 0xFF;
+  socklen_t set_sz = sizeof(set);
+  TOSOption t = GetTOSOption(GetParam().domain);
+  EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+              SyscallSucceedsWithValue(0));
+  int expect = static_cast<uint8_t>(set);
+  if (GetParam().protocol == IPPROTO_TCP) {
+    expect &= ~INET_ECN_MASK;
+  }
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_sz, sizeof(get));
+  EXPECT_EQ(get, expect);
+}
+
+TEST_P(IPUnboundSocketTest, ZeroTOSOptionSize) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  int set = 0xC0;
+  socklen_t set_sz = 0;
+  TOSOption t = GetTOSOption(GetParam().domain);
+  if (GetParam().domain == AF_INET) {
+    EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+                SyscallSucceedsWithValue(0));
+  } else {
+    EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+                SyscallFailsWithErrno(EINVAL));
+  }
+  int get = -1;
+  socklen_t get_sz = 0;
+  ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_sz, 0);
+  EXPECT_EQ(get, -1);
+}
+
+TEST_P(IPUnboundSocketTest, SmallTOSOptionSize) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  int set = 0xC0;
+  constexpr int kDefaultTOS = 0;
+  TOSOption t = GetTOSOption(GetParam().domain);
+  for (socklen_t i = 1; i < sizeof(int); i++) {
+    int expect_tos;
+    socklen_t expect_sz;
+    if (GetParam().domain == AF_INET) {
+      EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, i),
+                  SyscallSucceedsWithValue(0));
+      expect_tos = set;
+      expect_sz = sizeof(uint8_t);
+    } else {
+      EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, i),
+                  SyscallFailsWithErrno(EINVAL));
+      expect_tos = kDefaultTOS;
+      expect_sz = i;
+    }
+    uint get = -1;
+    socklen_t get_sz = i;
+    ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+                SyscallSucceedsWithValue(0));
+    EXPECT_EQ(get_sz, expect_sz);
+    // Account for partial copies by getsockopt, retrieve the lower
+    // bits specified by get_sz, while comparing against expect_tos.
+    EXPECT_EQ(get & ~(~0 << (get_sz * 8)), expect_tos);
+  }
+}
+
+TEST_P(IPUnboundSocketTest, LargeTOSOptionSize) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  int set = 0xC0;
+  TOSOption t = GetTOSOption(GetParam().domain);
+  for (socklen_t i = sizeof(int); i < 10; i++) {
+    EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, i),
+                SyscallSucceedsWithValue(0));
+    int get = -1;
+    socklen_t get_sz = i;
+    // We expect the system call handler to only copy atmost sizeof(int) bytes
+    // as asserted by the check below. Hence, we do not expect the copy to
+    // overflow in getsockopt.
+    ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+                SyscallSucceedsWithValue(0));
+    EXPECT_EQ(get_sz, sizeof(int));
+    EXPECT_EQ(get, set);
+  }
+}
+
+TEST_P(IPUnboundSocketTest, NegativeTOS) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int set = -1;
+  socklen_t set_sz = sizeof(set);
+  TOSOption t = GetTOSOption(GetParam().domain);
+  EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+              SyscallSucceedsWithValue(0));
+  int expect;
+  if (GetParam().domain == AF_INET) {
+    expect = static_cast<uint8_t>(set);
+    if (GetParam().protocol == IPPROTO_TCP) {
+      expect &= ~INET_ECN_MASK;
+    }
+  } else {
+    // On IPv6 TCLASS, setting -1 has the effect of resetting the
+    // TrafficClass.
+    expect = 0;
+  }
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_sz, sizeof(get));
+  EXPECT_EQ(get, expect);
+}
+
+TEST_P(IPUnboundSocketTest, InvalidNegativeTOS) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  int set = -2;
+  socklen_t set_sz = sizeof(set);
+  TOSOption t = GetTOSOption(GetParam().domain);
+  int expect;
+  if (GetParam().domain == AF_INET) {
+    ASSERT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+                SyscallSucceedsWithValue(0));
+    expect = static_cast<uint8_t>(set);
+    if (GetParam().protocol == IPPROTO_TCP) {
+      expect &= ~INET_ECN_MASK;
+    }
+  } else {
+    ASSERT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz),
+                SyscallFailsWithErrno(EINVAL));
+    expect = 0;
+  }
+  int get = 0;
+  socklen_t get_sz = sizeof(get);
+  ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_sz, sizeof(get));
+  EXPECT_EQ(get, expect);
+}
+
+TEST_P(IPUnboundSocketTest, NullTOS) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  TOSOption t = GetTOSOption(GetParam().domain);
+  int set_sz = sizeof(int);
+  if (GetParam().domain == AF_INET) {
+    EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, nullptr, set_sz),
+                SyscallFailsWithErrno(EFAULT));
+  } else {  // AF_INET6
+    // The AF_INET6 behavior is not yet compatible. gVisor will try to read
+    // optval from user memory at syscall handler, it needs substantial
+    // refactoring to implement this behavior just for IPv6.
+    if (IsRunningOnGvisor()) {
+      EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, nullptr, set_sz),
+                  SyscallFailsWithErrno(EFAULT));
+    } else {
+      // Linux's IPv6 stack treats nullptr optval as input of 0, so the call
+      // succeeds. (net/ipv6/ipv6_sockglue.c, do_ipv6_setsockopt())
+      //
+      // Linux's implementation would need fixing as passing a nullptr as optval
+      // and non-zero optlen may not be valid.
+      // TODO(b/158666797): Combine the gVisor and linux cases for IPv6.
+      // Some kernel versions return EFAULT, so we handle both.
+      EXPECT_THAT(
+          setsockopt(socket->get(), t.level, t.option, nullptr, set_sz),
+          AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(0)));
+    }
+  }
+  socklen_t get_sz = sizeof(int);
+  EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, nullptr, &get_sz),
+              SyscallFailsWithErrno(EFAULT));
+  int get = -1;
+  EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, nullptr),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_P(IPUnboundSocketTest, InsufficientBufferTOS) {
+  SKIP_IF(GetParam().protocol == IPPROTO_TCP);
+
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  TOSOption t = GetTOSOption(GetParam().domain);
+
+  in_addr addr4;
+  in6_addr addr6;
+  ASSERT_THAT(inet_pton(AF_INET, "127.0.0.1", &addr4), ::testing::Eq(1));
+  ASSERT_THAT(inet_pton(AF_INET6, "fe80::", &addr6), ::testing::Eq(1));
+
+  cmsghdr cmsg = {};
+  cmsg.cmsg_len = sizeof(cmsg);
+  cmsg.cmsg_level = t.cmsg_level;
+  cmsg.cmsg_type = t.option;
+
+  msghdr msg = {};
+  msg.msg_control = &cmsg;
+  msg.msg_controllen = sizeof(cmsg);
+  if (GetParam().domain == AF_INET) {
+    msg.msg_name = &addr4;
+    msg.msg_namelen = sizeof(addr4);
+  } else {
+    msg.msg_name = &addr6;
+    msg.msg_namelen = sizeof(addr6);
+  }
+
+  EXPECT_THAT(sendmsg(socket->get(), &msg, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(IPUnboundSocketTest, ReuseAddrDefault) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket->get(), SOL_SOCKET, SO_REUSEADDR, &get, &get_sz),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get, kSockOptOff);
+  EXPECT_EQ(get_sz, sizeof(get));
+}
+
+TEST_P(IPUnboundSocketTest, SetReuseAddr) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ASSERT_THAT(setsockopt(socket->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket->get(), SOL_SOCKET, SO_REUSEADDR, &get, &get_sz),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get, kSockOptOn);
+  EXPECT_EQ(get_sz, sizeof(get));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    IPUnboundSockets, IPUnboundSocketTest,
+    ::testing::ValuesIn(VecCat<SocketKind>(VecCat<SocketKind>(
+        ApplyVec<SocketKind>(IPv4UDPUnboundSocket,
+                             AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+                                                    List<int>{0,
+                                                              SOCK_NONBLOCK})),
+        ApplyVec<SocketKind>(IPv6UDPUnboundSocket,
+                             AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+                                                    List<int>{0,
+                                                              SOCK_NONBLOCK})),
+        ApplyVec<SocketKind>(IPv4TCPUnboundSocket,
+                             AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                                    List<int>{0,
+                                                              SOCK_NONBLOCK})),
+        ApplyVec<SocketKind>(IPv6TCPUnboundSocket,
+                             AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                                    List<int>{
+                                                        0, SOCK_NONBLOCK}))))));
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc
new file mode 100644
index 000000000..80f12b0a9
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc
@@ -0,0 +1,66 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h"
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <cstdio>
+#include <cstring>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Verifies that a newly instantiated TCP socket does not have the
+// broadcast socket option enabled.
+TEST_P(IPv4TCPUnboundExternalNetworkingSocketTest, TCPBroadcastDefault) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &get, &get_sz),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get, kSockOptOff);
+  EXPECT_EQ(get_sz, sizeof(get));
+}
+
+// Verifies that a newly instantiated TCP socket returns true after enabling
+// the broadcast socket option.
+TEST_P(IPv4TCPUnboundExternalNetworkingSocketTest, SetTCPBroadcast) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  EXPECT_THAT(setsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &get, &get_sz),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get, kSockOptOn);
+  EXPECT_EQ(get_sz, sizeof(get));
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h
new file mode 100644
index 000000000..fb582b224
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h
@@ -0,0 +1,30 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_TCP_UNBOUND_EXTERNAL_NETWORKING_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_TCP_UNBOUND_EXTERNAL_NETWORKING_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to unbound IPv4 TCP sockets in a sandbox
+// with external networking support.
+using IPv4TCPUnboundExternalNetworkingSocketTest = SimpleSocketTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_TCP_UNBOUND_EXTERNAL_NETWORKING_H_
diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc
new file mode 100644
index 000000000..797c4174e
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc
@@ -0,0 +1,39 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h"
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketKind> GetSockets() {
+  return ApplyVec<SocketKind>(
+      IPv4TCPUnboundSocket,
+      AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK}));
+}
+
+INSTANTIATE_TEST_SUITE_P(IPv4TCPUnboundSockets,
+                         IPv4TCPUnboundExternalNetworkingSocketTest,
+                         ::testing::ValuesIn(GetSockets()));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
new file mode 100644
index 000000000..de0f5f01b
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
@@ -0,0 +1,2456 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ipv4_udp_unbound.h"
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <cstdio>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Check that packets are not received without a group membership. Default send
+// interface configured by bind.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNoGroup) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the first FD to the loopback. This is an alternative to
+  // IP_MULTICAST_IF for setting the default send interface.
+  auto sender_addr = V4Loopback();
+  EXPECT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address. If multicast worked like unicast,
+  // this would ensure that we get the packet.
+  auto receiver_addr = V4Any();
+  EXPECT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Send the multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we did not receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  EXPECT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Check that not setting a default send interface prevents multicast packets
+// from being sent. Group membership interface configured by address.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackAddrNoDefaultSendIf) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive any
+  // unicast packet.
+  auto receiver_addr = V4Any();
+  EXPECT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallFailsWithErrno(ENETUNREACH));
+}
+
+// Check that not setting a default send interface prevents multicast packets
+// from being sent. Group membership interface configured by NIC ID.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNicNoDefaultSendIf) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive any
+  // unicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallFailsWithErrno(ENETUNREACH));
+}
+
+// Check that multicast works when the default send interface is configured by
+// bind and the group membership is configured by address.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackAddr) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the first FD to the loopback. This is an alternative to
+  // IP_MULTICAST_IF for setting the default send interface.
+  auto sender_addr = V4Loopback();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast works when the default send interface is configured by
+// bind and the group membership is configured by NIC ID.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNic) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the first FD to the loopback. This is an alternative to
+  // IP_MULTICAST_IF for setting the default send interface.
+  auto sender_addr = V4Loopback();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in sendto, and the group
+// membership is configured by address.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddr) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreq iface = {};
+  iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in sendto, and the group
+// membership is configured by NIC ID.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNic) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreqn iface = {};
+  iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in connect, and the group
+// membership is configured by address.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrConnect) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreq iface = {};
+  iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto connect_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&connect_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  ASSERT_THAT(
+      RetryEINTR(connect)(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&connect_addr.addr),
+                          connect_addr.addr_len),
+      SyscallSucceeds());
+
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in connect, and the group
+// membership is configured by NIC ID.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicConnect) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreqn iface = {};
+  iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto connect_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&connect_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  ASSERT_THAT(
+      RetryEINTR(connect)(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&connect_addr.addr),
+                          connect_addr.addr_len),
+      SyscallSucceeds());
+
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in sendto, and the group
+// membership is configured by address.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrSelf) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreq iface = {};
+  iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  // Bind the first FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in sendto, and the group
+// membership is configured by NIC ID.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicSelf) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreqn iface = {};
+  iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  // Bind the first FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in connect, and the group
+// membership is configured by address.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrSelfConnect) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreq iface = {};
+  iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  // Bind the first FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto connect_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&connect_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  EXPECT_THAT(
+      RetryEINTR(connect)(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&connect_addr.addr),
+                          connect_addr.addr_len),
+      SyscallSucceeds());
+
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we did not receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  EXPECT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in connect, and the group
+// membership is configured by NIC ID.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicSelfConnect) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreqn iface = {};
+  iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  // Bind the first FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto connect_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&connect_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  ASSERT_THAT(
+      RetryEINTR(connect)(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&connect_addr.addr),
+                          connect_addr.addr_len),
+      SyscallSucceeds());
+
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we did not receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  EXPECT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in sendto, and the group
+// membership is configured by address.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrSelfNoLoop) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreq iface = {};
+  iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  // Bind the first FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast works when the default send interface is configured by
+// IP_MULTICAST_IF, the send address is specified in sendto, and the group
+// membership is configured by NIC ID.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicSelfNoLoop) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Set the default send interface.
+  ip_mreqn iface = {};
+  iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that dropping a group membership that does not exist fails.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastInvalidDrop) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Unregister from a membership that we didn't have.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallFailsWithErrno(EADDRNOTAVAIL));
+}
+
+// Check that dropping a group membership prevents multicast packets from being
+// delivered. Default send address configured by bind and group membership
+// interface configured by address.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastDropAddr) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the first FD to the loopback. This is an alternative to
+  // IP_MULTICAST_IF for setting the default send interface.
+  auto sender_addr = V4Loopback();
+  EXPECT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  EXPECT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register and unregister to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+  EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we did not receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  EXPECT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Check that dropping a group membership prevents multicast packets from being
+// delivered. Default send address configured by bind and group membership
+// interface configured by NIC ID.
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastDropNic) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the first FD to the loopback. This is an alternative to
+  // IP_MULTICAST_IF for setting the default send interface.
+  auto sender_addr = V4Loopback();
+  EXPECT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  EXPECT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register and unregister to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+  EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we did not receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  EXPECT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfZero) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreqn iface = {};
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfInvalidNic) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreqn iface = {};
+  iface.imr_ifindex = -1;
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallFailsWithErrno(EADDRNOTAVAIL));
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfInvalidAddr) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreq iface = {};
+  iface.imr_interface.s_addr = inet_addr("255.255.255");
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallFailsWithErrno(EADDRNOTAVAIL));
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetShort) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Create a valid full-sized request.
+  ip_mreqn iface = {};
+  iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+
+  // Send an optlen of 1 to check that optlen is enforced.
+  EXPECT_THAT(
+      setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, 1),
+      SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfDefault) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  in_addr get = {};
+  socklen_t size = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size),
+      SyscallSucceeds());
+  EXPECT_EQ(size, sizeof(get));
+  EXPECT_EQ(get.s_addr, 0);
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfDefaultReqn) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreqn get = {};
+  socklen_t size = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size),
+      SyscallSucceeds());
+
+  // getsockopt(IP_MULTICAST_IF) can only return an in_addr, so it treats the
+  // first sizeof(struct in_addr) bytes of struct ip_mreqn as a struct in_addr.
+  // Conveniently, this corresponds to the field ip_mreqn::imr_multiaddr.
+  EXPECT_EQ(size, sizeof(in_addr));
+
+  // getsockopt(IP_MULTICAST_IF) will only return the interface address which
+  // hasn't been set.
+  EXPECT_EQ(get.imr_multiaddr.s_addr, 0);
+  EXPECT_EQ(get.imr_address.s_addr, 0);
+  EXPECT_EQ(get.imr_ifindex, 0);
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetAddrGetReqn) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  in_addr set = {};
+  set.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set,
+                         sizeof(set)),
+              SyscallSucceeds());
+
+  ip_mreqn get = {};
+  socklen_t size = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size),
+      SyscallSucceeds());
+
+  // getsockopt(IP_MULTICAST_IF) can only return an in_addr, so it treats the
+  // first sizeof(struct in_addr) bytes of struct ip_mreqn as a struct in_addr.
+  // Conveniently, this corresponds to the field ip_mreqn::imr_multiaddr.
+  EXPECT_EQ(size, sizeof(in_addr));
+  EXPECT_EQ(get.imr_multiaddr.s_addr, set.s_addr);
+  EXPECT_EQ(get.imr_address.s_addr, 0);
+  EXPECT_EQ(get.imr_ifindex, 0);
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetReqAddrGetReqn) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreq set = {};
+  set.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set,
+                         sizeof(set)),
+              SyscallSucceeds());
+
+  ip_mreqn get = {};
+  socklen_t size = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size),
+      SyscallSucceeds());
+
+  // getsockopt(IP_MULTICAST_IF) can only return an in_addr, so it treats the
+  // first sizeof(struct in_addr) bytes of struct ip_mreqn as a struct in_addr.
+  // Conveniently, this corresponds to the field ip_mreqn::imr_multiaddr.
+  EXPECT_EQ(size, sizeof(in_addr));
+  EXPECT_EQ(get.imr_multiaddr.s_addr, set.imr_interface.s_addr);
+  EXPECT_EQ(get.imr_address.s_addr, 0);
+  EXPECT_EQ(get.imr_ifindex, 0);
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetNicGetReqn) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreqn set = {};
+  set.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set,
+                         sizeof(set)),
+              SyscallSucceeds());
+
+  ip_mreqn get = {};
+  socklen_t size = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size),
+      SyscallSucceeds());
+  EXPECT_EQ(size, sizeof(in_addr));
+  EXPECT_EQ(get.imr_multiaddr.s_addr, 0);
+  EXPECT_EQ(get.imr_address.s_addr, 0);
+  EXPECT_EQ(get.imr_ifindex, 0);
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetAddr) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  in_addr set = {};
+  set.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set,
+                         sizeof(set)),
+              SyscallSucceeds());
+
+  in_addr get = {};
+  socklen_t size = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size),
+      SyscallSucceeds());
+
+  EXPECT_EQ(size, sizeof(get));
+  EXPECT_EQ(get.s_addr, set.s_addr);
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetReqAddr) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreq set = {};
+  set.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set,
+                         sizeof(set)),
+              SyscallSucceeds());
+
+  in_addr get = {};
+  socklen_t size = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size),
+      SyscallSucceeds());
+
+  EXPECT_EQ(size, sizeof(get));
+  EXPECT_EQ(get.s_addr, set.imr_interface.s_addr);
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetNic) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreqn set = {};
+  set.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set,
+                         sizeof(set)),
+              SyscallSucceeds());
+
+  in_addr get = {};
+  socklen_t size = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size),
+      SyscallSucceeds());
+  EXPECT_EQ(size, sizeof(get));
+  EXPECT_EQ(get.s_addr, 0);
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, TestJoinGroupNoIf) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallFailsWithErrno(ENODEV));
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, TestJoinGroupInvalidIf) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreqn group = {};
+  group.imr_address.s_addr = inet_addr("255.255.255");
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallFailsWithErrno(ENODEV));
+}
+
+// Check that multiple memberships are not allowed on the same socket.
+TEST_P(IPv4UDPUnboundSocketTest, TestMultipleJoinsOnSingleSocket) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto fd = socket1->get();
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+
+  EXPECT_THAT(
+      setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, sizeof(group)),
+      SyscallSucceeds());
+
+  EXPECT_THAT(
+      setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, sizeof(group)),
+      SyscallFailsWithErrno(EADDRINUSE));
+}
+
+// Check that two sockets can join the same multicast group at the same time.
+TEST_P(IPv4UDPUnboundSocketTest, TestTwoSocketsJoinSameMulticastGroup) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+  EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Drop the membership twice on each socket, the second call for each socket
+  // should fail.
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+  EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallFailsWithErrno(EADDRNOTAVAIL));
+  EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+  EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallFailsWithErrno(EADDRNOTAVAIL));
+}
+
+// Check that two sockets can join the same multicast group at the same time,
+// and both will receive data on it.
+TEST_P(IPv4UDPUnboundSocketTest, TestMcastReceptionOnTwoSockets) {
+  std::unique_ptr<SocketPair> socket_pairs[2] = {
+      absl::make_unique<FDSocketPair>(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
+                                      ASSERT_NO_ERRNO_AND_VALUE(NewSocket())),
+      absl::make_unique<FDSocketPair>(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
+                                      ASSERT_NO_ERRNO_AND_VALUE(NewSocket()))};
+
+  ip_mreq iface = {}, group = {};
+  iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  auto receiver_addr = V4Any();
+  int bound_port = 0;
+
+  // Create two socketpairs with the exact same configuration.
+  for (auto& sockets : socket_pairs) {
+    ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF,
+                           &iface, sizeof(iface)),
+                SyscallSucceeds());
+    ASSERT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_REUSEPORT,
+                           &kSockOptOn, sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP,
+                           &group, sizeof(group)),
+                SyscallSucceeds());
+    ASSERT_THAT(bind(sockets->second_fd(),
+                     reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                     receiver_addr.addr_len),
+                SyscallSucceeds());
+    // Get the port assigned.
+    socklen_t receiver_addr_len = receiver_addr.addr_len;
+    ASSERT_THAT(getsockname(sockets->second_fd(),
+                            reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                            &receiver_addr_len),
+                SyscallSucceeds());
+    EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+    // On the first iteration, save the port we are bound to. On the second
+    // iteration, verify the port is the same as the one from the first
+    // iteration. In other words, both sockets listen on the same port.
+    if (bound_port == 0) {
+      bound_port =
+          reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+    } else {
+      EXPECT_EQ(bound_port,
+                reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port);
+    }
+  }
+
+  // Send a multicast packet to the group from two different sockets and verify
+  // it is received by both sockets that joined that group.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port;
+  for (auto& sockets : socket_pairs) {
+    char send_buf[200];
+    RandomizeBuffer(send_buf, sizeof(send_buf));
+    ASSERT_THAT(
+        RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0,
+                           reinterpret_cast<sockaddr*>(&send_addr.addr),
+                           send_addr.addr_len),
+        SyscallSucceedsWithValue(sizeof(send_buf)));
+
+    // Check that we received the multicast packet on both sockets.
+    for (auto& sockets : socket_pairs) {
+      char recv_buf[sizeof(send_buf)] = {};
+      ASSERT_THAT(
+          RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0),
+          SyscallSucceedsWithValue(sizeof(recv_buf)));
+      EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+    }
+  }
+}
+
+// Check that on two sockets that joined a group and listen on ANY, dropping
+// memberships one by one will continue to deliver packets to both sockets until
+// both memberships have been dropped.
+TEST_P(IPv4UDPUnboundSocketTest, TestMcastReceptionWhenDroppingMemberships) {
+  std::unique_ptr<SocketPair> socket_pairs[2] = {
+      absl::make_unique<FDSocketPair>(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
+                                      ASSERT_NO_ERRNO_AND_VALUE(NewSocket())),
+      absl::make_unique<FDSocketPair>(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
+                                      ASSERT_NO_ERRNO_AND_VALUE(NewSocket()))};
+
+  ip_mreq iface = {}, group = {};
+  iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  auto receiver_addr = V4Any();
+  int bound_port = 0;
+
+  // Create two socketpairs with the exact same configuration.
+  for (auto& sockets : socket_pairs) {
+    ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF,
+                           &iface, sizeof(iface)),
+                SyscallSucceeds());
+    ASSERT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_REUSEPORT,
+                           &kSockOptOn, sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP,
+                           &group, sizeof(group)),
+                SyscallSucceeds());
+    ASSERT_THAT(bind(sockets->second_fd(),
+                     reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                     receiver_addr.addr_len),
+                SyscallSucceeds());
+    // Get the port assigned.
+    socklen_t receiver_addr_len = receiver_addr.addr_len;
+    ASSERT_THAT(getsockname(sockets->second_fd(),
+                            reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                            &receiver_addr_len),
+                SyscallSucceeds());
+    EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+    // On the first iteration, save the port we are bound to. On the second
+    // iteration, verify the port is the same as the one from the first
+    // iteration. In other words, both sockets listen on the same port.
+    if (bound_port == 0) {
+      bound_port =
+          reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+    } else {
+      EXPECT_EQ(bound_port,
+                reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port);
+    }
+  }
+
+  // Drop the membership of the first socket pair and verify data is still
+  // received.
+  ASSERT_THAT(setsockopt(socket_pairs[0]->second_fd(), IPPROTO_IP,
+                         IP_DROP_MEMBERSHIP, &group, sizeof(group)),
+              SyscallSucceeds());
+  // Send a packet from each socket_pair.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port;
+  for (auto& sockets : socket_pairs) {
+    char send_buf[200];
+    RandomizeBuffer(send_buf, sizeof(send_buf));
+    ASSERT_THAT(
+        RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0,
+                           reinterpret_cast<sockaddr*>(&send_addr.addr),
+                           send_addr.addr_len),
+        SyscallSucceedsWithValue(sizeof(send_buf)));
+
+    // Check that we received the multicast packet on both sockets.
+    for (auto& sockets : socket_pairs) {
+      char recv_buf[sizeof(send_buf)] = {};
+      ASSERT_THAT(
+          RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0),
+          SyscallSucceedsWithValue(sizeof(recv_buf)));
+      EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+    }
+  }
+
+  // Drop the membership of the second socket pair and verify data stops being
+  // received.
+  ASSERT_THAT(setsockopt(socket_pairs[1]->second_fd(), IPPROTO_IP,
+                         IP_DROP_MEMBERSHIP, &group, sizeof(group)),
+              SyscallSucceeds());
+  // Send a packet from each socket_pair.
+  for (auto& sockets : socket_pairs) {
+    char send_buf[200];
+    ASSERT_THAT(
+        RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0,
+                           reinterpret_cast<sockaddr*>(&send_addr.addr),
+                           send_addr.addr_len),
+        SyscallSucceedsWithValue(sizeof(send_buf)));
+
+    char recv_buf[sizeof(send_buf)] = {};
+    for (auto& sockets : socket_pairs) {
+      ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf,
+                                   sizeof(recv_buf), MSG_DONTWAIT),
+                  SyscallFailsWithErrno(EAGAIN));
+    }
+  }
+}
+
+// Check that a receiving socket can bind to the multicast address before
+// joining the group and receive data once the group has been joined.
+TEST_P(IPv4UDPUnboundSocketTest, TestBindToMcastThenJoinThenReceive) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind second socket (receiver) to the multicast address.
+  auto receiver_addr = V4Multicast();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  // Update receiver_addr with the correct port number.
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo"));
+  ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet on the first socket out the loopback interface.
+  ip_mreq iface = {};
+  iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+  auto sendto_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&sendto_addr.addr),
+                                 sendto_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that a receiving socket can bind to the multicast address and won't
+// receive multicast data if it hasn't joined the group.
+TEST_P(IPv4UDPUnboundSocketTest, TestBindToMcastThenNoJoinThenNoReceive) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind second socket (receiver) to the multicast address.
+  auto receiver_addr = V4Multicast();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  // Update receiver_addr with the correct port number.
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Send a multicast packet on the first socket out the loopback interface.
+  ip_mreq iface = {};
+  iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK);
+  ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+  auto sendto_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&sendto_addr.addr),
+                                 sendto_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we don't receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Check that a socket can bind to a multicast address and still send out
+// packets.
+TEST_P(IPv4UDPUnboundSocketTest, TestBindToMcastThenSend) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind second socket (receiver) to the ANY address.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Bind the first socket (sender) to the multicast address.
+  auto sender_addr = V4Multicast();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t sender_addr_len = sender_addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&sender_addr.addr),
+                          &sender_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(sender_addr_len, sender_addr.addr_len);
+
+  // Send a packet on the first socket to the loopback address.
+  auto sendto_addr = V4Loopback();
+  reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&sendto_addr.addr),
+                                 sendto_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that a receiving socket can bind to the broadcast address and receive
+// broadcast packets.
+TEST_P(IPv4UDPUnboundSocketTest, TestBindToBcastThenReceive) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind second socket (receiver) to the broadcast address.
+  auto receiver_addr = V4Broadcast();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Send a broadcast packet on the first socket out the loopback interface.
+  EXPECT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+  // Note: Binding to the loopback interface makes the broadcast go out of it.
+  auto sender_bind_addr = V4Loopback();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_bind_addr.addr),
+           sender_bind_addr.addr_len),
+      SyscallSucceeds());
+  auto sendto_addr = V4Broadcast();
+  reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&sendto_addr.addr),
+                                 sendto_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that a socket can bind to the broadcast address and still send out
+// packets.
+TEST_P(IPv4UDPUnboundSocketTest, TestBindToBcastThenSend) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind second socket (receiver) to the ANY address.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(socket2->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(socket2->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Bind the first socket (sender) to the broadcast address.
+  auto sender_addr = V4Broadcast();
+  ASSERT_THAT(
+      bind(socket1->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t sender_addr_len = sender_addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&sender_addr.addr),
+                          &sender_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(sender_addr_len, sender_addr.addr_len);
+
+  // Send a packet on the first socket to the loopback address.
+  auto sendto_addr = V4Loopback();
+  reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&sendto_addr.addr),
+                                 sendto_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that SO_REUSEADDR always delivers to the most recently bound socket.
+//
+// FIXME(gvisor.dev/issue/873): Endpoint order is not restored correctly. Enable
+// random and co-op save (below) once that is fixed.
+TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrDistribution_NoRandomSave) {
+  std::vector<std::unique_ptr<FileDescriptor>> sockets;
+  sockets.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()));
+
+  ASSERT_THAT(setsockopt(sockets[0]->get(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(sockets[0]->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(sockets[0]->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  constexpr int kMessageSize = 200;
+
+  // FIXME(gvisor.dev/issue/873): Endpoint order is not restored correctly.
+  const DisableSave ds;
+
+  for (int i = 0; i < 10; i++) {
+    // Add a new receiver.
+    sockets.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()));
+    auto& last = sockets.back();
+    ASSERT_THAT(setsockopt(last->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                           sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(bind(last->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                     addr.addr_len),
+                SyscallSucceeds());
+
+    // Send a new message to the SO_REUSEADDR group. We use a new socket each
+    // time so that a new ephemeral port will be used each time. This ensures
+    // that we aren't doing REUSEPORT-like hash load blancing.
+    auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+    char send_buf[kMessageSize];
+    RandomizeBuffer(send_buf, sizeof(send_buf));
+    EXPECT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                   reinterpret_cast<sockaddr*>(&addr.addr),
+                                   addr.addr_len),
+                SyscallSucceedsWithValue(sizeof(send_buf)));
+
+    // Verify that the most recent socket got the message. We don't expect any
+    // of the other sockets to have received it, but we will check that later.
+    char recv_buf[sizeof(send_buf)] = {};
+    EXPECT_THAT(
+        RetryEINTR(recv)(last->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT),
+        SyscallSucceedsWithValue(sizeof(send_buf)));
+    EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+  }
+
+  // Verify that no other messages were received.
+  for (auto& socket : sockets) {
+    char recv_buf[kMessageSize] = {};
+    EXPECT_THAT(RetryEINTR(recv)(socket->get(), recv_buf, sizeof(recv_buf),
+                                 MSG_DONTWAIT),
+                SyscallFailsWithErrno(EAGAIN));
+  }
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrThenReusePort) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind socket1 with REUSEADDR.
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Bind socket2 to the same address as socket1, only with REUSEPORT.
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, BindReusePortThenReuseAddr) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind socket1 with REUSEPORT.
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Bind socket2 to the same address as socket1, only with REUSEADDR.
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertibleToReusePort) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind socket1 with REUSEADDR and REUSEPORT.
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Bind socket2 to the same address as socket1, only with REUSEPORT.
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+
+  // Bind socket3 to the same address as socket1, only with REUSEADDR.
+  ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertibleToReuseAddr) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind socket1 with REUSEADDR and REUSEPORT.
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Bind socket2 to the same address as socket1, only with REUSEADDR.
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+
+  // Bind socket3 to the same address as socket1, only with REUSEPORT.
+  ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConversionReversable1) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind socket1 with REUSEADDR and REUSEPORT.
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Bind socket2 to the same address as socket1, only with REUSEPORT.
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+
+  // Close socket2 to revert to just socket1 with REUSEADDR and REUSEPORT.
+  socket2->reset();
+
+  // Bind socket3 to the same address as socket1, only with REUSEADDR.
+  ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConversionReversable2) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind socket1 with REUSEADDR and REUSEPORT.
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Bind socket2 to the same address as socket1, only with REUSEADDR.
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+
+  // Close socket2 to revert to just socket1 with REUSEADDR and REUSEPORT.
+  socket2->reset();
+
+  // Bind socket3 to the same address as socket1, only with REUSEPORT.
+  ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, BindDoubleReuseAddrReusePortThenReusePort) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind socket1 with REUSEADDR and REUSEPORT.
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Bind socket2 to the same address as socket1, also with REUSEADDR and
+  // REUSEPORT.
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+
+  // Bind socket3 to the same address as socket1, only with REUSEPORT.
+  ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+}
+
+TEST_P(IPv4UDPUnboundSocketTest, BindDoubleReuseAddrReusePortThenReuseAddr) {
+  auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind socket1 with REUSEADDR and REUSEPORT.
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(socket1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(socket1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Bind socket2 to the same address as socket1, also with REUSEADDR and
+  // REUSEPORT.
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(bind(socket2->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+
+  // Bind socket3 to the same address as socket1, only with REUSEADDR.
+  ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(socket3->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+}
+
+// Check that REUSEPORT takes precedence over REUSEADDR.
+TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) {
+  auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto receiver2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  ASSERT_THAT(setsockopt(receiver1->get(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(receiver1->get(), SOL_SOCKET, SO_REUSEPORT,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(receiver1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(receiver1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Bind receiver2 to the same address as socket1, also with REUSEADDR and
+  // REUSEPORT.
+  ASSERT_THAT(setsockopt(receiver2->get(), SOL_SOCKET, SO_REUSEADDR,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(setsockopt(receiver2->get(), SOL_SOCKET, SO_REUSEPORT,
+                         &kSockOptOn, sizeof(kSockOptOn)),
+              SyscallSucceeds());
+  ASSERT_THAT(bind(receiver2->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+
+  constexpr int kMessageSize = 10;
+
+  for (int i = 0; i < 100; ++i) {
+    // Send a new message to the REUSEADDR/REUSEPORT group. We use a new socket
+    // each time so that a new ephemerial port will be used each time. This
+    // ensures that we cycle through hashes.
+    auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+    char send_buf[kMessageSize] = {};
+    EXPECT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                   reinterpret_cast<sockaddr*>(&addr.addr),
+                                   addr.addr_len),
+                SyscallSucceedsWithValue(sizeof(send_buf)));
+  }
+
+  // Check that both receivers got messages. This checks that we are using load
+  // balancing (REUSEPORT) instead of the most recently bound socket
+  // (REUSEADDR).
+  char recv_buf[kMessageSize] = {};
+  EXPECT_THAT(RetryEINTR(recv)(receiver1->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallSucceedsWithValue(kMessageSize));
+  EXPECT_THAT(RetryEINTR(recv)(receiver2->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallSucceedsWithValue(kMessageSize));
+}
+
+// Check that connect returns EADDRNOTAVAIL when out of local ephemeral ports.
+// We disable S/R because this test creates a large number of sockets.
+TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) {
+  auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  constexpr int kClients = 65536;
+  // Bind the first socket to the loopback and take note of the selected port.
+  auto addr = V4Loopback();
+  ASSERT_THAT(bind(receiver1->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                   addr.addr_len),
+              SyscallSucceeds());
+  socklen_t addr_len = addr.addr_len;
+  ASSERT_THAT(getsockname(receiver1->get(),
+                          reinterpret_cast<sockaddr*>(&addr.addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, addr.addr_len);
+
+  // Disable cooperative S/R as we are making too many syscalls.
+  DisableSave ds;
+  std::vector<std::unique_ptr<FileDescriptor>> sockets;
+  for (int i = 0; i < kClients; i++) {
+    auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+    int ret = connect(s->get(), reinterpret_cast<sockaddr*>(&addr.addr),
+                      addr.addr_len);
+    if (ret == 0) {
+      sockets.push_back(std::move(s));
+      continue;
+    }
+    ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN));
+    break;
+  }
+}
+
+// Test that socket will receive packet info control message.
+TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) {
+  // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.
+  SKIP_IF((IsRunningWithHostinet()));
+
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto sender_addr = V4Loopback();
+  int level = SOL_IP;
+  int type = IP_PKTINFO;
+
+  ASSERT_THAT(
+      bind(receiver->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+           sender_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t sender_addr_len = sender_addr.addr_len;
+  ASSERT_THAT(getsockname(receiver->get(),
+                          reinterpret_cast<sockaddr*>(&sender_addr.addr),
+                          &sender_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(sender_addr_len, sender_addr.addr_len);
+
+  auto receiver_addr = V4Loopback();
+  reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&sender_addr.addr)->sin_port;
+  ASSERT_THAT(
+      connect(sender->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+              receiver_addr.addr_len),
+      SyscallSucceeds());
+
+  // Allow socket to receive control message.
+  ASSERT_THAT(
+      setsockopt(receiver->get(), level, type, &kSockOptOn, sizeof(kSockOptOn)),
+      SyscallSucceeds());
+
+  // Prepare message to send.
+  constexpr size_t kDataLength = 1024;
+  msghdr sent_msg = {};
+  iovec sent_iov = {};
+  char sent_data[kDataLength];
+  sent_iov.iov_base = sent_data;
+  sent_iov.iov_len = kDataLength;
+  sent_msg.msg_iov = &sent_iov;
+  sent_msg.msg_iovlen = 1;
+  sent_msg.msg_flags = 0;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(sender->get(), &sent_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  msghdr received_msg = {};
+  iovec received_iov = {};
+  char received_data[kDataLength];
+  char received_cmsg_buf[CMSG_SPACE(sizeof(in_pktinfo))] = {};
+  size_t cmsg_data_len = sizeof(in_pktinfo);
+  received_iov.iov_base = received_data;
+  received_iov.iov_len = kDataLength;
+  received_msg.msg_iov = &received_iov;
+  received_msg.msg_iovlen = 1;
+  received_msg.msg_controllen = CMSG_LEN(cmsg_data_len);
+  received_msg.msg_control = received_cmsg_buf;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(receiver->get(), &received_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len));
+  EXPECT_EQ(cmsg->cmsg_level, level);
+  EXPECT_EQ(cmsg->cmsg_type, type);
+
+  // Get loopback index.
+  ifreq ifr = {};
+  absl::SNPrintF(ifr.ifr_name, IFNAMSIZ, "lo");
+  ASSERT_THAT(ioctl(sender->get(), SIOCGIFINDEX, &ifr), SyscallSucceeds());
+  ASSERT_NE(ifr.ifr_ifindex, 0);
+
+  // Check the data
+  in_pktinfo received_pktinfo = {};
+  memcpy(&received_pktinfo, CMSG_DATA(cmsg), sizeof(in_pktinfo));
+  EXPECT_EQ(received_pktinfo.ipi_ifindex, ifr.ifr_ifindex);
+  EXPECT_EQ(received_pktinfo.ipi_spec_dst.s_addr, htonl(INADDR_LOOPBACK));
+  EXPECT_EQ(received_pktinfo.ipi_addr.s_addr, htonl(INADDR_LOOPBACK));
+}
+
+// Check that setting SO_RCVBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufBelowMin) {
+  auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Discover minimum buffer size by setting it to zero.
+  constexpr int kRcvBufSz = 0;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+                         sizeof(kRcvBufSz)),
+              SyscallSucceeds());
+
+  int min = 0;
+  socklen_t min_len = sizeof(min);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value so let's use a value that when doubled will still
+  // be smaller than min.
+  int below_min = min / 2 - 1;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &below_min,
+                         sizeof(below_min)),
+              SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_RCVBUF above max is clamped to the maximum
+// receive buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufAboveMax) {
+  auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Discover maxmimum buffer size by setting to a really large value.
+  constexpr int kRcvBufSz = 0xffffffff;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+                         sizeof(kRcvBufSz)),
+              SyscallSucceeds());
+
+  int max = 0;
+  socklen_t max_len = sizeof(max);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+              SyscallSucceeds());
+
+  int above_max = max + 1;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &above_max,
+                         sizeof(above_max)),
+              SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+              SyscallSucceeds());
+  ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_RCVBUF min <= rcvBufSz <= max is honored.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBuf) {
+  auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int max = 0;
+  int min = 0;
+  {
+    // Discover maxmimum buffer size by setting to a really large value.
+    constexpr int kRcvBufSz = 0xffffffff;
+    ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+                           sizeof(kRcvBufSz)),
+                SyscallSucceeds());
+
+    max = 0;
+    socklen_t max_len = sizeof(max);
+    ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+                SyscallSucceeds());
+  }
+
+  {
+    // Discover minimum buffer size by setting it to zero.
+    constexpr int kRcvBufSz = 0;
+    ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+                           sizeof(kRcvBufSz)),
+                SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  int quarter_sz = min + (max - min) / 4;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &quarter_sz,
+                         sizeof(quarter_sz)),
+              SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+  if (!IsRunningOnGvisor()) {
+    quarter_sz *= 2;
+  }
+  ASSERT_EQ(quarter_sz, val);
+}
+
+// Check that setting SO_SNDBUF below min is clamped to the minimum
+// send buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufBelowMin) {
+  auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Discover minimum buffer size by setting it to zero.
+  constexpr int kSndBufSz = 0;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+                         sizeof(kSndBufSz)),
+              SyscallSucceeds());
+
+  int min = 0;
+  socklen_t min_len = sizeof(min);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value so let's use a value that when doubled will still
+  // be smaller than min.
+  int below_min = min / 2 - 1;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &below_min,
+                         sizeof(below_min)),
+              SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_SNDBUF above max is clamped to the maximum
+// send buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufAboveMax) {
+  auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Discover maxmimum buffer size by setting to a really large value.
+  constexpr int kSndBufSz = 0xffffffff;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+                         sizeof(kSndBufSz)),
+              SyscallSucceeds());
+
+  int max = 0;
+  socklen_t max_len = sizeof(max);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+              SyscallSucceeds());
+
+  int above_max = max + 1;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &above_max,
+                         sizeof(above_max)),
+              SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+              SyscallSucceeds());
+  ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBuf) {
+  auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int max = 0;
+  int min = 0;
+  {
+    // Discover maxmimum buffer size by setting to a really large value.
+    constexpr int kSndBufSz = 0xffffffff;
+    ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+                           sizeof(kSndBufSz)),
+                SyscallSucceeds());
+
+    max = 0;
+    socklen_t max_len = sizeof(max);
+    ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+                SyscallSucceeds());
+  }
+
+  {
+    // Discover minimum buffer size by setting it to zero.
+    constexpr int kSndBufSz = 0;
+    ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+                           sizeof(kSndBufSz)),
+                SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  int quarter_sz = min + (max - min) / 4;
+  ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &quarter_sz,
+                         sizeof(quarter_sz)),
+              SyscallSucceeds());
+
+  int val = 0;
+  socklen_t val_len = sizeof(val);
+  ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+              SyscallSucceeds());
+
+  // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+  if (!IsRunningOnGvisor()) {
+    quarter_sz *= 2;
+  }
+
+  ASSERT_EQ(quarter_sz, val);
+}
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.h b/test/syscalls/linux/socket_ipv4_udp_unbound.h
new file mode 100644
index 000000000..f64c57645
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound.h
@@ -0,0 +1,29 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to IPv4 UDP sockets.
+using IPv4UDPUnboundSocketTest = SimpleSocketTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc
new file mode 100644
index 000000000..d690d9564
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc
@@ -0,0 +1,1099 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h"
+
+#include <arpa/inet.h>
+#include <ifaddrs.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TestAddress V4EmptyAddress() {
+  TestAddress t("V4Empty");
+  t.addr.ss_family = AF_INET;
+  t.addr_len = sizeof(sockaddr_in);
+  return t;
+}
+
+void IPv4UDPUnboundExternalNetworkingSocketTest::SetUp() {
+  got_if_infos_ = false;
+
+  // Get interface list.
+  ASSERT_NO_ERRNO(if_helper_.Load());
+  std::vector<std::string> if_names = if_helper_.InterfaceList(AF_INET);
+  if (if_names.size() != 2) {
+    return;
+  }
+
+  // Figure out which interface is where.
+  std::string lo = if_names[0];
+  std::string eth = if_names[1];
+  if (lo != "lo") std::swap(lo, eth);
+  if (lo != "lo") return;
+
+  lo_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(lo));
+  auto lo_if_addr = if_helper_.GetAddr(AF_INET, lo);
+  if (lo_if_addr == nullptr) {
+    return;
+  }
+  lo_if_addr_ = *reinterpret_cast<const sockaddr_in*>(lo_if_addr);
+
+  eth_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(eth));
+  auto eth_if_addr = if_helper_.GetAddr(AF_INET, eth);
+  if (eth_if_addr == nullptr) {
+    return;
+  }
+  eth_if_addr_ = *reinterpret_cast<const sockaddr_in*>(eth_if_addr);
+
+  got_if_infos_ = true;
+}
+
+// Verifies that a newly instantiated UDP socket does not have the
+// broadcast socket option enabled.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, UDPBroadcastDefault) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &get, &get_sz),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get, kSockOptOff);
+  EXPECT_EQ(get_sz, sizeof(get));
+}
+
+// Verifies that a newly instantiated UDP socket returns true after enabling
+// the broadcast socket option.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, SetUDPBroadcast) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  EXPECT_THAT(setsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  int get = -1;
+  socklen_t get_sz = sizeof(get);
+  EXPECT_THAT(
+      getsockopt(socket->get(), SOL_SOCKET, SO_BROADCAST, &get, &get_sz),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get, kSockOptOn);
+  EXPECT_EQ(get_sz, sizeof(get));
+}
+
+// Verifies that a broadcast UDP packet will arrive at all UDP sockets with
+// the destination port number.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       UDPBroadcastReceivedOnExpectedPort) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto rcvr1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto rcvr2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto norcv = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Enable SO_BROADCAST on the sending socket.
+  ASSERT_THAT(setsockopt(sender->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  // Enable SO_REUSEPORT on the receiving sockets so that they may both be bound
+  // to the broadcast messages destination port.
+  ASSERT_THAT(setsockopt(rcvr1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(setsockopt(rcvr2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  // Bind the first socket to the ANY address and let the system assign a port.
+  auto rcv1_addr = V4Any();
+  ASSERT_THAT(bind(rcvr1->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr),
+                   rcv1_addr.addr_len),
+              SyscallSucceedsWithValue(0));
+  // Retrieve port number from first socket so that it can be bound to the
+  // second socket.
+  socklen_t rcv_addr_sz = rcv1_addr.addr_len;
+  ASSERT_THAT(
+      getsockname(rcvr1->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr),
+                  &rcv_addr_sz),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(rcv_addr_sz, rcv1_addr.addr_len);
+  auto port = reinterpret_cast<sockaddr_in*>(&rcv1_addr.addr)->sin_port;
+
+  // Bind the second socket to the same address:port as the first.
+  ASSERT_THAT(bind(rcvr2->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr),
+                   rcv_addr_sz),
+              SyscallSucceedsWithValue(0));
+
+  // Bind the non-receiving socket to an ephemeral port.
+  auto norecv_addr = V4Any();
+  ASSERT_THAT(bind(norcv->get(), reinterpret_cast<sockaddr*>(&norecv_addr.addr),
+                   norecv_addr.addr_len),
+              SyscallSucceedsWithValue(0));
+
+  // Broadcast a test message.
+  auto dst_addr = V4Broadcast();
+  reinterpret_cast<sockaddr_in*>(&dst_addr.addr)->sin_port = port;
+  constexpr char kTestMsg[] = "hello, world";
+  EXPECT_THAT(
+      sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0,
+             reinterpret_cast<sockaddr*>(&dst_addr.addr), dst_addr.addr_len),
+      SyscallSucceedsWithValue(sizeof(kTestMsg)));
+
+  // Verify that the receiving sockets received the test message.
+  char buf[sizeof(kTestMsg)] = {};
+  EXPECT_THAT(recv(rcvr1->get(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(kTestMsg)));
+  EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg)));
+  memset(buf, 0, sizeof(buf));
+  EXPECT_THAT(recv(rcvr2->get(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(kTestMsg)));
+  EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg)));
+
+  // Verify that the non-receiving socket did not receive the test message.
+  memset(buf, 0, sizeof(buf));
+  EXPECT_THAT(RetryEINTR(recv)(norcv->get(), buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Verifies that a broadcast UDP packet will arrive at all UDP sockets bound to
+// the destination port number and either INADDR_ANY or INADDR_BROADCAST, but
+// not a unicast address.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       UDPBroadcastReceivedOnExpectedAddresses) {
+  // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its
+  // IPv4 address on eth0.
+  SKIP_IF(!got_if_infos_);
+
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto rcvr1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto rcvr2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto norcv = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Enable SO_BROADCAST on the sending socket.
+  ASSERT_THAT(setsockopt(sender->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  // Enable SO_REUSEPORT on all sockets so that they may all be bound to the
+  // broadcast messages destination port.
+  ASSERT_THAT(setsockopt(rcvr1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(setsockopt(rcvr2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+  ASSERT_THAT(setsockopt(norcv->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  // Bind the first socket the ANY address and let the system assign a port.
+  auto rcv1_addr = V4Any();
+  ASSERT_THAT(bind(rcvr1->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr),
+                   rcv1_addr.addr_len),
+              SyscallSucceedsWithValue(0));
+  // Retrieve port number from first socket so that it can be bound to the
+  // second socket.
+  socklen_t rcv_addr_sz = rcv1_addr.addr_len;
+  ASSERT_THAT(
+      getsockname(rcvr1->get(), reinterpret_cast<sockaddr*>(&rcv1_addr.addr),
+                  &rcv_addr_sz),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(rcv_addr_sz, rcv1_addr.addr_len);
+  auto port = reinterpret_cast<sockaddr_in*>(&rcv1_addr.addr)->sin_port;
+
+  // Bind the second socket to the broadcast address.
+  auto rcv2_addr = V4Broadcast();
+  reinterpret_cast<sockaddr_in*>(&rcv2_addr.addr)->sin_port = port;
+  ASSERT_THAT(bind(rcvr2->get(), reinterpret_cast<sockaddr*>(&rcv2_addr.addr),
+                   rcv2_addr.addr_len),
+              SyscallSucceedsWithValue(0));
+
+  // Bind the non-receiving socket to the unicast ethernet address.
+  auto norecv_addr = rcv1_addr;
+  reinterpret_cast<sockaddr_in*>(&norecv_addr.addr)->sin_addr =
+      eth_if_addr_.sin_addr;
+  ASSERT_THAT(bind(norcv->get(), reinterpret_cast<sockaddr*>(&norecv_addr.addr),
+                   norecv_addr.addr_len),
+              SyscallSucceedsWithValue(0));
+
+  // Broadcast a test message.
+  auto dst_addr = V4Broadcast();
+  reinterpret_cast<sockaddr_in*>(&dst_addr.addr)->sin_port = port;
+  constexpr char kTestMsg[] = "hello, world";
+  EXPECT_THAT(
+      sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0,
+             reinterpret_cast<sockaddr*>(&dst_addr.addr), dst_addr.addr_len),
+      SyscallSucceedsWithValue(sizeof(kTestMsg)));
+
+  // Verify that the receiving sockets received the test message.
+  char buf[sizeof(kTestMsg)] = {};
+  EXPECT_THAT(recv(rcvr1->get(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(kTestMsg)));
+  EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg)));
+  memset(buf, 0, sizeof(buf));
+  EXPECT_THAT(recv(rcvr2->get(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(kTestMsg)));
+  EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg)));
+
+  // Verify that the non-receiving socket did not receive the test message.
+  memset(buf, 0, sizeof(buf));
+  EXPECT_THAT(RetryEINTR(recv)(norcv->get(), buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Verifies that a UDP broadcast can be sent and then received back on the same
+// socket that is bound to the broadcast address (255.255.255.255).
+// FIXME(b/141938460): This can be combined with the next test
+//                     (UDPBroadcastSendRecvOnSocketBoundToAny).
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       UDPBroadcastSendRecvOnSocketBoundToBroadcast) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Enable SO_BROADCAST.
+  ASSERT_THAT(setsockopt(sender->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  // Bind the sender to the broadcast address.
+  auto src_addr = V4Broadcast();
+  ASSERT_THAT(bind(sender->get(), reinterpret_cast<sockaddr*>(&src_addr.addr),
+                   src_addr.addr_len),
+              SyscallSucceedsWithValue(0));
+  socklen_t src_sz = src_addr.addr_len;
+  ASSERT_THAT(getsockname(sender->get(),
+                          reinterpret_cast<sockaddr*>(&src_addr.addr), &src_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(src_sz, src_addr.addr_len);
+
+  // Send the message.
+  auto dst_addr = V4Broadcast();
+  reinterpret_cast<sockaddr_in*>(&dst_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&src_addr.addr)->sin_port;
+  constexpr char kTestMsg[] = "hello, world";
+  EXPECT_THAT(
+      sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0,
+             reinterpret_cast<sockaddr*>(&dst_addr.addr), dst_addr.addr_len),
+      SyscallSucceedsWithValue(sizeof(kTestMsg)));
+
+  // Verify that the message was received.
+  char buf[sizeof(kTestMsg)] = {};
+  EXPECT_THAT(RetryEINTR(recv)(sender->get(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(kTestMsg)));
+  EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg)));
+}
+
+// Verifies that a UDP broadcast can be sent and then received back on the same
+// socket that is bound to the ANY address (0.0.0.0).
+// FIXME(b/141938460): This can be combined with the previous test
+//                     (UDPBroadcastSendRecvOnSocketBoundToBroadcast).
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       UDPBroadcastSendRecvOnSocketBoundToAny) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Enable SO_BROADCAST.
+  ASSERT_THAT(setsockopt(sender->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceedsWithValue(0));
+
+  // Bind the sender to the ANY address.
+  auto src_addr = V4Any();
+  ASSERT_THAT(bind(sender->get(), reinterpret_cast<sockaddr*>(&src_addr.addr),
+                   src_addr.addr_len),
+              SyscallSucceedsWithValue(0));
+  socklen_t src_sz = src_addr.addr_len;
+  ASSERT_THAT(getsockname(sender->get(),
+                          reinterpret_cast<sockaddr*>(&src_addr.addr), &src_sz),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(src_sz, src_addr.addr_len);
+
+  // Send the message.
+  auto dst_addr = V4Broadcast();
+  reinterpret_cast<sockaddr_in*>(&dst_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&src_addr.addr)->sin_port;
+  constexpr char kTestMsg[] = "hello, world";
+  EXPECT_THAT(
+      sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0,
+             reinterpret_cast<sockaddr*>(&dst_addr.addr), dst_addr.addr_len),
+      SyscallSucceedsWithValue(sizeof(kTestMsg)));
+
+  // Verify that the message was received.
+  char buf[sizeof(kTestMsg)] = {};
+  EXPECT_THAT(RetryEINTR(recv)(sender->get(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(kTestMsg)));
+  EXPECT_EQ(0, memcmp(buf, kTestMsg, sizeof(kTestMsg)));
+}
+
+// Verifies that a UDP broadcast fails to send on a socket with SO_BROADCAST
+// disabled.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendBroadcast) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Broadcast a test message without having enabled SO_BROADCAST on the sending
+  // socket.
+  auto addr = V4Broadcast();
+  reinterpret_cast<sockaddr_in*>(&addr.addr)->sin_port = htons(12345);
+  constexpr char kTestMsg[] = "hello, world";
+
+  EXPECT_THAT(sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0,
+                     reinterpret_cast<sockaddr*>(&addr.addr), addr.addr_len),
+              SyscallFailsWithErrno(EACCES));
+}
+
+// Verifies that a UDP unicast on an unbound socket reaches its destination.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendUnicastOnUnbound) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto rcvr = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the receiver and retrieve its address and port number.
+  sockaddr_in addr = {};
+  addr.sin_family = AF_INET;
+  addr.sin_addr.s_addr = htonl(INADDR_ANY);
+  addr.sin_port = htons(0);
+  ASSERT_THAT(bind(rcvr->get(), reinterpret_cast<struct sockaddr*>(&addr),
+                   sizeof(addr)),
+              SyscallSucceedsWithValue(0));
+  memset(&addr, 0, sizeof(addr));
+  socklen_t addr_sz = sizeof(addr);
+  ASSERT_THAT(getsockname(rcvr->get(),
+                          reinterpret_cast<struct sockaddr*>(&addr), &addr_sz),
+              SyscallSucceedsWithValue(0));
+
+  // Send a test message to the receiver.
+  constexpr char kTestMsg[] = "hello, world";
+  ASSERT_THAT(sendto(sender->get(), kTestMsg, sizeof(kTestMsg), 0,
+                     reinterpret_cast<struct sockaddr*>(&addr), addr_sz),
+              SyscallSucceedsWithValue(sizeof(kTestMsg)));
+  char buf[sizeof(kTestMsg)] = {};
+  ASSERT_THAT(recv(rcvr->get(), buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(kTestMsg)));
+}
+
+// Check that multicast packets won't be delivered to the sending socket with no
+// set interface or group membership.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       TestSendMulticastSelfNoGroup) {
+  // FIXME(b/125485338): A group membership is not required for external
+  // multicast on gVisor.
+  SKIP_IF(IsRunningOnGvisor());
+
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  auto bind_addr = V4Any();
+  ASSERT_THAT(bind(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr),
+                   bind_addr.addr_len),
+              SyscallSucceeds());
+  socklen_t bind_addr_len = bind_addr.addr_len;
+  ASSERT_THAT(
+      getsockname(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr),
+                  &bind_addr_len),
+      SyscallSucceeds());
+  EXPECT_EQ(bind_addr_len, bind_addr.addr_len);
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&bind_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we did not receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(
+      RetryEINTR(recv)(socket->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT),
+      SyscallFailsWithErrno(EAGAIN));
+}
+
+// Check that multicast packets will be delivered to the sending socket without
+// setting an interface.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastSelf) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  auto bind_addr = V4Any();
+  ASSERT_THAT(bind(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr),
+                   bind_addr.addr_len),
+              SyscallSucceeds());
+  socklen_t bind_addr_len = bind_addr.addr_len;
+  ASSERT_THAT(
+      getsockname(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr),
+                  &bind_addr_len),
+      SyscallSucceeds());
+  EXPECT_EQ(bind_addr_len, bind_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  ASSERT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&bind_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(socket->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast packets won't be delivered to the sending socket with no
+// set interface and IP_MULTICAST_LOOP disabled.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       TestSendMulticastSelfLoopOff) {
+  auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  auto bind_addr = V4Any();
+  ASSERT_THAT(bind(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr),
+                   bind_addr.addr_len),
+              SyscallSucceeds());
+  socklen_t bind_addr_len = bind_addr.addr_len;
+  ASSERT_THAT(
+      getsockname(socket->get(), reinterpret_cast<sockaddr*>(&bind_addr.addr),
+                  &bind_addr_len),
+      SyscallSucceeds());
+  EXPECT_EQ(bind_addr_len, bind_addr.addr_len);
+
+  // Disable multicast looping.
+  EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  // Register to receive multicast packets.
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  EXPECT_THAT(setsockopt(socket->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&bind_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(socket->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we did not receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  EXPECT_THAT(
+      RetryEINTR(recv)(socket->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT),
+      SyscallFailsWithErrno(EAGAIN));
+}
+
+// Check that multicast packets won't be delivered to another socket with no
+// set interface or group membership.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastNoGroup) {
+  // FIXME(b/125485338): A group membership is not required for external
+  // multicast on gVisor.
+  SKIP_IF(IsRunningOnGvisor());
+
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(receiver->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we did not receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Check that multicast packets will be delivered to another socket without
+// setting an interface.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticast) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(receiver->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that multicast packets won't be delivered to another socket with no
+// set interface and IP_MULTICAST_LOOP disabled on the sending socket.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       TestSendMulticastSenderNoLoop) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(receiver->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Disable multicast looping on the sender.
+  EXPECT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  EXPECT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we did not receive the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf),
+                               MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Check that multicast packets will be delivered to the sending socket without
+// setting an interface and IP_MULTICAST_LOOP disabled on the receiving socket.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       TestSendMulticastReceiverNoLoop) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  // Bind the second FD to the v4 any address to ensure that we can receive the
+  // multicast packet.
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(receiver->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+
+  // Disable multicast looping on the receiver.
+  ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_MULTICAST_LOOP,
+                         &kSockOptOff, sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  // Register to receive multicast packets.
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Check that we received the multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0),
+              SyscallSucceedsWithValue(sizeof(recv_buf)));
+
+  EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+}
+
+// Check that two sockets can join the same multicast group at the same time,
+// and both will receive data on it when bound to the ANY address.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       TestSendMulticastToTwoBoundToAny) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  std::unique_ptr<FileDescriptor> receivers[2] = {
+      ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
+      ASSERT_NO_ERRNO_AND_VALUE(NewSocket())};
+
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  auto receiver_addr = V4Any();
+  int bound_port = 0;
+  for (auto& receiver : receivers) {
+    ASSERT_THAT(setsockopt(receiver->get(), SOL_SOCKET, SO_REUSEPORT,
+                           &kSockOptOn, sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    // Bind to ANY to receive multicast packets.
+    ASSERT_THAT(
+        bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+             receiver_addr.addr_len),
+        SyscallSucceeds());
+    socklen_t receiver_addr_len = receiver_addr.addr_len;
+    ASSERT_THAT(getsockname(receiver->get(),
+                            reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                            &receiver_addr_len),
+                SyscallSucceeds());
+    EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+    EXPECT_EQ(
+        htonl(INADDR_ANY),
+        reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr);
+    // On the first iteration, save the port we are bound to. On the second
+    // iteration, verify the port is the same as the one from the first
+    // iteration. In other words, both sockets listen on the same port.
+    if (bound_port == 0) {
+      bound_port =
+          reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+    } else {
+      EXPECT_EQ(bound_port,
+                reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port);
+    }
+
+    // Register to receive multicast packets.
+    ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP,
+                           &group, sizeof(group)),
+                SyscallSucceeds());
+  }
+
+  // Send a multicast packet to the group and verify both receivers get it.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+  for (auto& receiver : receivers) {
+    char recv_buf[sizeof(send_buf)] = {};
+    ASSERT_THAT(
+        RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0),
+        SyscallSucceedsWithValue(sizeof(recv_buf)));
+    EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+  }
+}
+
+// Check that two sockets can join the same multicast group at the same time,
+// and both will receive data on it when bound to the multicast address.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       TestSendMulticastToTwoBoundToMulticastAddress) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  std::unique_ptr<FileDescriptor> receivers[2] = {
+      ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
+      ASSERT_NO_ERRNO_AND_VALUE(NewSocket())};
+
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  auto receiver_addr = V4Multicast();
+  int bound_port = 0;
+  for (auto& receiver : receivers) {
+    ASSERT_THAT(setsockopt(receiver->get(), SOL_SOCKET, SO_REUSEPORT,
+                           &kSockOptOn, sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(
+        bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+             receiver_addr.addr_len),
+        SyscallSucceeds());
+    socklen_t receiver_addr_len = receiver_addr.addr_len;
+    ASSERT_THAT(getsockname(receiver->get(),
+                            reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                            &receiver_addr_len),
+                SyscallSucceeds());
+    EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+    EXPECT_EQ(
+        inet_addr(kMulticastAddress),
+        reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr);
+    // On the first iteration, save the port we are bound to. On the second
+    // iteration, verify the port is the same as the one from the first
+    // iteration. In other words, both sockets listen on the same port.
+    if (bound_port == 0) {
+      bound_port =
+          reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+    } else {
+      EXPECT_EQ(
+          inet_addr(kMulticastAddress),
+          reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr);
+      EXPECT_EQ(bound_port,
+                reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port);
+    }
+
+    // Register to receive multicast packets.
+    ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP,
+                           &group, sizeof(group)),
+                SyscallSucceeds());
+  }
+
+  // Send a multicast packet to the group and verify both receivers get it.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+  for (auto& receiver : receivers) {
+    char recv_buf[sizeof(send_buf)] = {};
+    ASSERT_THAT(
+        RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0),
+        SyscallSucceedsWithValue(sizeof(recv_buf)));
+    EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+  }
+}
+
+// Check that two sockets can join the same multicast group at the same time,
+// and with one bound to the wildcard address and the other bound to the
+// multicast address, both will receive data.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       TestSendMulticastToTwoBoundToAnyAndMulticastAddress) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  std::unique_ptr<FileDescriptor> receivers[2] = {
+      ASSERT_NO_ERRNO_AND_VALUE(NewSocket()),
+      ASSERT_NO_ERRNO_AND_VALUE(NewSocket())};
+
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  // The first receiver binds to the wildcard address.
+  auto receiver_addr = V4Any();
+  int bound_port = 0;
+  for (auto& receiver : receivers) {
+    ASSERT_THAT(setsockopt(receiver->get(), SOL_SOCKET, SO_REUSEPORT,
+                           &kSockOptOn, sizeof(kSockOptOn)),
+                SyscallSucceeds());
+    ASSERT_THAT(
+        bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+             receiver_addr.addr_len),
+        SyscallSucceeds());
+    socklen_t receiver_addr_len = receiver_addr.addr_len;
+    ASSERT_THAT(getsockname(receiver->get(),
+                            reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                            &receiver_addr_len),
+                SyscallSucceeds());
+    EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+    // On the first iteration, save the port we are bound to and change the
+    // receiver address from V4Any to V4Multicast so the second receiver binds
+    // to that. On the second iteration, verify the port is the same as the one
+    // from the first iteration but the address is different.
+    if (bound_port == 0) {
+      EXPECT_EQ(
+          htonl(INADDR_ANY),
+          reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr);
+      bound_port =
+          reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+      receiver_addr = V4Multicast();
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port =
+          bound_port;
+    } else {
+      EXPECT_EQ(
+          inet_addr(kMulticastAddress),
+          reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_addr.s_addr);
+      EXPECT_EQ(bound_port,
+                reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port);
+    }
+
+    // Register to receive multicast packets.
+    ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP,
+                           &group, sizeof(group)),
+                SyscallSucceeds());
+  }
+
+  // Send a multicast packet to the group and verify both receivers get it.
+  auto send_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&send_addr.addr)->sin_port = bound_port;
+  char send_buf[200];
+  RandomizeBuffer(send_buf, sizeof(send_buf));
+  ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&send_addr.addr),
+                                 send_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+  for (auto& receiver : receivers) {
+    char recv_buf[sizeof(send_buf)] = {};
+    ASSERT_THAT(
+        RetryEINTR(recv)(receiver->get(), recv_buf, sizeof(recv_buf), 0),
+        SyscallSucceedsWithValue(sizeof(recv_buf)));
+    EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf)));
+  }
+}
+
+// Check that when receiving a looped-back multicast packet, its source address
+// is not a multicast address.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       IpMulticastLoopbackFromAddr) {
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(receiver->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+  int receiver_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+
+  ip_mreq group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Connect to the multicast address. This binds us to the outgoing interface
+  // and allows us to get its IP (to be compared against the src-IP on the
+  // receiver side).
+  auto sendto_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = receiver_port;
+  ASSERT_THAT(RetryEINTR(connect)(
+                  sender->get(), reinterpret_cast<sockaddr*>(&sendto_addr.addr),
+                  sendto_addr.addr_len),
+              SyscallSucceeds());
+  auto sender_addr = V4EmptyAddress();
+  ASSERT_THAT(
+      getsockname(sender->get(), reinterpret_cast<sockaddr*>(&sender_addr.addr),
+                  &sender_addr.addr_len),
+      SyscallSucceeds());
+  ASSERT_EQ(sizeof(struct sockaddr_in), sender_addr.addr_len);
+  sockaddr_in* sender_addr_in =
+      reinterpret_cast<sockaddr_in*>(&sender_addr.addr);
+
+  // Send a multicast packet.
+  char send_buf[4] = {};
+  ASSERT_THAT(RetryEINTR(send)(sender->get(), send_buf, sizeof(send_buf), 0),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Receive a multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  auto src_addr = V4EmptyAddress();
+  ASSERT_THAT(
+      RetryEINTR(recvfrom)(receiver->get(), recv_buf, sizeof(recv_buf), 0,
+                           reinterpret_cast<sockaddr*>(&src_addr.addr),
+                           &src_addr.addr_len),
+      SyscallSucceedsWithValue(sizeof(recv_buf)));
+  ASSERT_EQ(sizeof(struct sockaddr_in), src_addr.addr_len);
+  sockaddr_in* src_addr_in = reinterpret_cast<sockaddr_in*>(&src_addr.addr);
+
+  // Verify that the received source IP:port matches the sender one.
+  EXPECT_EQ(sender_addr_in->sin_port, src_addr_in->sin_port);
+  EXPECT_EQ(sender_addr_in->sin_addr.s_addr, src_addr_in->sin_addr.s_addr);
+}
+
+// Check that when setting the IP_MULTICAST_IF option to both an index pointing
+// to the loopback interface and an address pointing to the non-loopback
+// interface, a multicast packet sent out uses the latter as its source address.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       IpMulticastLoopbackIfNicAndAddr) {
+  // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its
+  // IPv4 address on eth0.
+  SKIP_IF(!got_if_infos_);
+
+  // Create receiver, bind to ANY and join the multicast group.
+  auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  auto receiver_addr = V4Any();
+  ASSERT_THAT(
+      bind(receiver->get(), reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+           receiver_addr.addr_len),
+      SyscallSucceeds());
+  socklen_t receiver_addr_len = receiver_addr.addr_len;
+  ASSERT_THAT(getsockname(receiver->get(),
+                          reinterpret_cast<sockaddr*>(&receiver_addr.addr),
+                          &receiver_addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len);
+  int receiver_port =
+      reinterpret_cast<sockaddr_in*>(&receiver_addr.addr)->sin_port;
+  ip_mreqn group = {};
+  group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress);
+  group.imr_ifindex = lo_if_idx_;
+  ASSERT_THAT(setsockopt(receiver->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group,
+                         sizeof(group)),
+              SyscallSucceeds());
+
+  // Set outgoing multicast interface config, with NIC and addr pointing to
+  // different interfaces.
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  ip_mreqn iface = {};
+  iface.imr_ifindex = lo_if_idx_;
+  iface.imr_address = eth_if_addr_.sin_addr;
+  ASSERT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                         sizeof(iface)),
+              SyscallSucceeds());
+
+  // Send a multicast packet.
+  auto sendto_addr = V4Multicast();
+  reinterpret_cast<sockaddr_in*>(&sendto_addr.addr)->sin_port = receiver_port;
+  char send_buf[4] = {};
+  ASSERT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0,
+                                 reinterpret_cast<sockaddr*>(&sendto_addr.addr),
+                                 sendto_addr.addr_len),
+              SyscallSucceedsWithValue(sizeof(send_buf)));
+
+  // Receive a multicast packet.
+  char recv_buf[sizeof(send_buf)] = {};
+  auto src_addr = V4EmptyAddress();
+  ASSERT_THAT(
+      RetryEINTR(recvfrom)(receiver->get(), recv_buf, sizeof(recv_buf), 0,
+                           reinterpret_cast<sockaddr*>(&src_addr.addr),
+                           &src_addr.addr_len),
+      SyscallSucceedsWithValue(sizeof(recv_buf)));
+  ASSERT_EQ(sizeof(struct sockaddr_in), src_addr.addr_len);
+  sockaddr_in* src_addr_in = reinterpret_cast<sockaddr_in*>(&src_addr.addr);
+
+  // FIXME (b/137781162): When sending a multicast packet use the proper logic
+  // to determine the packet's src-IP.
+  SKIP_IF(IsRunningOnGvisor());
+
+  // Verify the received source address.
+  EXPECT_EQ(eth_if_addr_.sin_addr.s_addr, src_addr_in->sin_addr.s_addr);
+}
+
+// Check that when we are bound to one interface we can set IP_MULTICAST_IF to
+// another interface.
+TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest,
+       IpMulticastLoopbackBindToOneIfSetMcastIfToAnother) {
+  // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its
+  // IPv4 address on eth0.
+  SKIP_IF(!got_if_infos_);
+
+  // FIXME (b/137790511): When bound to one interface it is not possible to set
+  // IP_MULTICAST_IF to a different interface.
+  SKIP_IF(IsRunningOnGvisor());
+
+  // Create sender and bind to eth interface.
+  auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+  ASSERT_THAT(bind(sender->get(), reinterpret_cast<sockaddr*>(&eth_if_addr_),
+                   sizeof(eth_if_addr_)),
+              SyscallSucceeds());
+
+  // Run through all possible combinations of index and address for
+  // IP_MULTICAST_IF that selects the loopback interface.
+  struct {
+    int imr_ifindex;
+    struct in_addr imr_address;
+  } test_data[] = {
+      {lo_if_idx_, {}},
+      {0, lo_if_addr_.sin_addr},
+      {lo_if_idx_, lo_if_addr_.sin_addr},
+      {lo_if_idx_, eth_if_addr_.sin_addr},
+  };
+  for (auto t : test_data) {
+    ip_mreqn iface = {};
+    iface.imr_ifindex = t.imr_ifindex;
+    iface.imr_address = t.imr_address;
+    EXPECT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface,
+                           sizeof(iface)),
+                SyscallSucceeds())
+        << "imr_index=" << iface.imr_ifindex
+        << " imr_address=" << GetAddr4Str(&iface.imr_address);
+  }
+}
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h
new file mode 100644
index 000000000..10b90b1e0
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h
@@ -0,0 +1,46 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_EXTERNAL_NETWORKING_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_EXTERNAL_NETWORKING_H_
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to unbound IPv4 UDP sockets in a sandbox
+// with external networking support.
+class IPv4UDPUnboundExternalNetworkingSocketTest : public SimpleSocketTest {
+ protected:
+  void SetUp();
+
+  IfAddrHelper if_helper_;
+
+  // got_if_infos_ is set to false if SetUp() could not obtain all interface
+  // infos that we need.
+  bool got_if_infos_;
+
+  // Interface infos.
+  int lo_if_idx_;
+  int eth_if_idx_;
+  sockaddr_in lo_if_addr_;
+  sockaddr_in eth_if_addr_;
+};
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_EXTERNAL_NETWORKING_H_
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc
new file mode 100644
index 000000000..f6e64c157
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc
@@ -0,0 +1,39 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h"
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketKind> GetSockets() {
+  return ApplyVec<SocketKind>(
+      IPv4UDPUnboundSocket,
+      AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK}));
+}
+
+INSTANTIATE_TEST_SUITE_P(IPv4UDPUnboundSockets,
+                         IPv4UDPUnboundExternalNetworkingSocketTest,
+                         ::testing::ValuesIn(GetSockets()));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc
new file mode 100644
index 000000000..f121c044d
--- /dev/null
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc
@@ -0,0 +1,32 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_ipv4_udp_unbound.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+INSTANTIATE_TEST_SUITE_P(
+    IPv4UDPSockets, IPv4UDPUnboundSocketTest,
+    ::testing::ValuesIn(ApplyVec<SocketKind>(IPv4UDPUnboundSocket,
+                                             AllBitwiseCombinations(List<int>{
+                                                 0, SOCK_NONBLOCK}))));
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc
new file mode 100644
index 000000000..15d4b85a7
--- /dev/null
+++ b/test/syscalls/linux/socket_netdevice.cc
@@ -0,0 +1,184 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/endian.h"
+#include "test/syscalls/linux/socket_netlink_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+// Tests for netdevice queries.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::AnyOf;
+using ::testing::Eq;
+
+TEST(NetdeviceTest, Loopback) {
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+  // Prepare the request.
+  struct ifreq ifr;
+  snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+  // Check for a non-zero interface index.
+  ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds());
+  EXPECT_NE(ifr.ifr_ifindex, 0);
+
+  // Check that the loopback is zero hardware address.
+  ASSERT_THAT(ioctl(sock.get(), SIOCGIFHWADDR, &ifr), SyscallSucceeds());
+  EXPECT_EQ(ifr.ifr_hwaddr.sa_data[0], 0);
+  EXPECT_EQ(ifr.ifr_hwaddr.sa_data[1], 0);
+  EXPECT_EQ(ifr.ifr_hwaddr.sa_data[2], 0);
+  EXPECT_EQ(ifr.ifr_hwaddr.sa_data[3], 0);
+  EXPECT_EQ(ifr.ifr_hwaddr.sa_data[4], 0);
+  EXPECT_EQ(ifr.ifr_hwaddr.sa_data[5], 0);
+}
+
+TEST(NetdeviceTest, Netmask) {
+  // We need an interface index to identify the loopback device.
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  struct ifreq ifr;
+  snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+  ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds());
+  EXPECT_NE(ifr.ifr_ifindex, 0);
+
+  // Use a netlink socket to get the netmask, which we'll then compare to the
+  // netmask obtained via ioctl.
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+  uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct rtgenmsg rgm;
+  };
+
+  constexpr uint32_t kSeq = 12345;
+
+  struct request req;
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETADDR;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.rgm.rtgen_family = AF_UNSPEC;
+
+  // Iterate through messages until we find the one containing the prefix length
+  // (i.e. netmask) for the loopback device.
+  int prefixlen = -1;
+  ASSERT_NO_ERRNO(NetlinkRequestResponse(
+      fd, &req, sizeof(req),
+      [&](const struct nlmsghdr* hdr) {
+        EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWADDR), Eq(NLMSG_DONE)));
+
+        EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI)
+            << std::hex << hdr->nlmsg_flags;
+
+        EXPECT_EQ(hdr->nlmsg_seq, kSeq);
+        EXPECT_EQ(hdr->nlmsg_pid, port);
+
+        if (hdr->nlmsg_type != RTM_NEWADDR) {
+          return;
+        }
+
+        // RTM_NEWADDR contains at least the header and ifaddrmsg.
+        EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct ifaddrmsg));
+
+        struct ifaddrmsg* ifaddrmsg =
+            reinterpret_cast<struct ifaddrmsg*>(NLMSG_DATA(hdr));
+        if (ifaddrmsg->ifa_index == static_cast<uint32_t>(ifr.ifr_ifindex) &&
+            ifaddrmsg->ifa_family == AF_INET) {
+          prefixlen = ifaddrmsg->ifa_prefixlen;
+        }
+      },
+      false));
+
+  ASSERT_GE(prefixlen, 0);
+
+  // Netmask is stored big endian in struct sockaddr_in, so we do the same for
+  // comparison.
+  uint32_t mask = 0xffffffff << (32 - prefixlen);
+  mask = absl::gbswap_32(mask);
+
+  // Check that the loopback interface has the correct subnet mask.
+  snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+  ASSERT_THAT(ioctl(sock.get(), SIOCGIFNETMASK, &ifr), SyscallSucceeds());
+  EXPECT_EQ(ifr.ifr_netmask.sa_family, AF_INET);
+  struct sockaddr_in* sin =
+      reinterpret_cast<struct sockaddr_in*>(&ifr.ifr_netmask);
+  EXPECT_EQ(sin->sin_addr.s_addr, mask);
+}
+
+TEST(NetdeviceTest, InterfaceName) {
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+  // Prepare the request.
+  struct ifreq ifr;
+  snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+  // Check for a non-zero interface index.
+  ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds());
+  EXPECT_NE(ifr.ifr_ifindex, 0);
+
+  // Check that SIOCGIFNAME finds the loopback interface.
+  snprintf(ifr.ifr_name, IFNAMSIZ, "foo");
+  ASSERT_THAT(ioctl(sock.get(), SIOCGIFNAME, &ifr), SyscallSucceeds());
+  EXPECT_STREQ(ifr.ifr_name, "lo");
+}
+
+TEST(NetdeviceTest, InterfaceFlags) {
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+  // Prepare the request.
+  struct ifreq ifr;
+  snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+  // Check that SIOCGIFFLAGS marks the interface with IFF_LOOPBACK, IFF_UP, and
+  // IFF_RUNNING.
+  ASSERT_THAT(ioctl(sock.get(), SIOCGIFFLAGS, &ifr), SyscallSucceeds());
+  EXPECT_EQ(ifr.ifr_flags & IFF_UP, IFF_UP);
+  EXPECT_EQ(ifr.ifr_flags & IFF_RUNNING, IFF_RUNNING);
+}
+
+TEST(NetdeviceTest, InterfaceMTU) {
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+  // Prepare the request.
+  struct ifreq ifr = {};
+  snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+  // Check that SIOCGIFMTU returns a nonzero MTU.
+  ASSERT_THAT(ioctl(sock.get(), SIOCGIFMTU, &ifr), SyscallSucceeds());
+  EXPECT_GT(ifr.ifr_mtu, 0);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink.cc b/test/syscalls/linux/socket_netlink.cc
new file mode 100644
index 000000000..4ec0fd4fa
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink.cc
@@ -0,0 +1,153 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/netlink.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+// Tests for all netlink socket protocols.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// NetlinkTest parameter is the protocol to test.
+using NetlinkTest = ::testing::TestWithParam<int>;
+
+// Netlink sockets must be SOCK_DGRAM or SOCK_RAW.
+TEST_P(NetlinkTest, Types) {
+  const int protocol = GetParam();
+
+  EXPECT_THAT(socket(AF_NETLINK, SOCK_STREAM, protocol),
+              SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+  EXPECT_THAT(socket(AF_NETLINK, SOCK_SEQPACKET, protocol),
+              SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+  EXPECT_THAT(socket(AF_NETLINK, SOCK_RDM, protocol),
+              SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+  EXPECT_THAT(socket(AF_NETLINK, SOCK_DCCP, protocol),
+              SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+  EXPECT_THAT(socket(AF_NETLINK, SOCK_PACKET, protocol),
+              SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+
+  int fd;
+  EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_DGRAM, protocol), SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_RAW, protocol), SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_P(NetlinkTest, AutomaticPort) {
+  const int protocol = GetParam();
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, protocol));
+
+  struct sockaddr_nl addr = {};
+  addr.nl_family = AF_NETLINK;
+
+  EXPECT_THAT(
+      bind(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+      SyscallSucceeds());
+
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                          &addrlen),
+              SyscallSucceeds());
+  EXPECT_EQ(addrlen, sizeof(addr));
+  // This is the only netlink socket in the process, so it should get the PID as
+  // the port id.
+  //
+  // N.B. Another process could theoretically have explicitly reserved our pid
+  // as a port ID, but that is very unlikely.
+  EXPECT_EQ(addr.nl_pid, getpid());
+}
+
+// Calling connect automatically binds to an automatic port.
+TEST_P(NetlinkTest, ConnectBinds) {
+  const int protocol = GetParam();
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, protocol));
+
+  struct sockaddr_nl addr = {};
+  addr.nl_family = AF_NETLINK;
+
+  EXPECT_THAT(connect(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                      sizeof(addr)),
+              SyscallSucceeds());
+
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                          &addrlen),
+              SyscallSucceeds());
+  EXPECT_EQ(addrlen, sizeof(addr));
+
+  // Each test is running in a pid namespace, so another process can explicitly
+  // reserve our pid as a port ID. In this case, a negative portid value will be
+  // set.
+  if (static_cast<pid_t>(addr.nl_pid) > 0) {
+    EXPECT_EQ(addr.nl_pid, getpid());
+  }
+
+  memset(&addr, 0, sizeof(addr));
+  addr.nl_family = AF_NETLINK;
+
+  // Connecting again is allowed, but keeps the same port.
+  EXPECT_THAT(connect(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                      sizeof(addr)),
+              SyscallSucceeds());
+
+  addrlen = sizeof(addr);
+  EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                          &addrlen),
+              SyscallSucceeds());
+  EXPECT_EQ(addrlen, sizeof(addr));
+  EXPECT_EQ(addr.nl_pid, getpid());
+}
+
+TEST_P(NetlinkTest, GetPeerName) {
+  const int protocol = GetParam();
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, protocol));
+
+  struct sockaddr_nl addr = {};
+  socklen_t addrlen = sizeof(addr);
+
+  EXPECT_THAT(getpeername(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                          &addrlen),
+              SyscallSucceeds());
+
+  EXPECT_EQ(addrlen, sizeof(addr));
+  EXPECT_EQ(addr.nl_family, AF_NETLINK);
+  // Peer is the kernel if we didn't connect elsewhere.
+  EXPECT_EQ(addr.nl_pid, 0);
+}
+
+INSTANTIATE_TEST_SUITE_P(ProtocolTest, NetlinkTest,
+                         ::testing::Values(NETLINK_ROUTE,
+                                           NETLINK_KOBJECT_UEVENT));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc
new file mode 100644
index 000000000..e6647a1c3
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink_route.cc
@@ -0,0 +1,935 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <ifaddrs.h>
+#include <linux/if.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_format.h"
+#include "test/syscalls/linux/socket_netlink_route_util.h"
+#include "test/syscalls/linux/socket_netlink_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+// Tests for NETLINK_ROUTE sockets.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr uint32_t kSeq = 12345;
+
+using ::testing::AnyOf;
+using ::testing::Eq;
+
+// Parameters for SockOptTest. They are:
+// 0: Socket option to query.
+// 1: A predicate to run on the returned sockopt value. Should return true if
+//    the value is considered ok.
+// 2: A description of what the sockopt value is expected to be. Should complete
+//    the sentence "<value> was unexpected, expected <description>"
+using SockOptTest = ::testing::TestWithParam<
+    std::tuple<int, std::function<bool(int)>, std::string>>;
+
+TEST_P(SockOptTest, GetSockOpt) {
+  int sockopt = std::get<0>(GetParam());
+  auto verifier = std::get<1>(GetParam());
+  std::string verifier_description = std::get<2>(GetParam());
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE));
+
+  int res;
+  socklen_t len = sizeof(res);
+
+  EXPECT_THAT(getsockopt(fd.get(), SOL_SOCKET, sockopt, &res, &len),
+              SyscallSucceeds());
+
+  EXPECT_EQ(len, sizeof(res));
+  EXPECT_TRUE(verifier(res)) << absl::StrFormat(
+      "getsockopt(%d, SOL_SOCKET, %d, &res, &len) => res=%d was unexpected, "
+      "expected %s",
+      fd.get(), sockopt, res, verifier_description);
+}
+
+std::function<bool(int)> IsPositive() {
+  return [](int val) { return val > 0; };
+}
+
+std::function<bool(int)> IsEqual(int target) {
+  return [target](int val) { return val == target; };
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NetlinkRouteTest, SockOptTest,
+    ::testing::Values(
+        std::make_tuple(SO_SNDBUF, IsPositive(), "positive send buffer size"),
+        std::make_tuple(SO_RCVBUF, IsPositive(),
+                        "positive receive buffer size"),
+        std::make_tuple(SO_TYPE, IsEqual(SOCK_RAW),
+                        absl::StrFormat("SOCK_RAW (%d)", SOCK_RAW)),
+        std::make_tuple(SO_DOMAIN, IsEqual(AF_NETLINK),
+                        absl::StrFormat("AF_NETLINK (%d)", AF_NETLINK)),
+        std::make_tuple(SO_PROTOCOL, IsEqual(NETLINK_ROUTE),
+                        absl::StrFormat("NETLINK_ROUTE (%d)", NETLINK_ROUTE)),
+        std::make_tuple(SO_PASSCRED, IsEqual(0), "0")));
+
+// Validates the reponses to RTM_GETLINK + NLM_F_DUMP.
+void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) {
+  EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWLINK), Eq(NLMSG_DONE)));
+
+  EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI)
+      << std::hex << hdr->nlmsg_flags;
+
+  EXPECT_EQ(hdr->nlmsg_seq, seq);
+  EXPECT_EQ(hdr->nlmsg_pid, port);
+
+  if (hdr->nlmsg_type != RTM_NEWLINK) {
+    return;
+  }
+
+  // RTM_NEWLINK contains at least the header and ifinfomsg.
+  EXPECT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg)));
+
+  // TODO(mpratt): Check ifinfomsg contents and following attrs.
+}
+
+TEST(NetlinkRouteTest, GetLinkDump) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+  uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+  // Loopback is common among all tests, check that it's found.
+  bool loopbackFound = false;
+  ASSERT_NO_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) {
+    CheckGetLinkResponse(hdr, kSeq, port);
+    if (hdr->nlmsg_type != RTM_NEWLINK) {
+      return;
+    }
+    ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg)));
+    const struct ifinfomsg* msg =
+        reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr));
+    std::cout << "Found interface idx=" << msg->ifi_index
+              << ", type=" << std::hex << msg->ifi_type << std::endl;
+    if (msg->ifi_type == ARPHRD_LOOPBACK) {
+      loopbackFound = true;
+      EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0);
+    }
+  }));
+  EXPECT_TRUE(loopbackFound);
+}
+
+// CheckLinkMsg checks a netlink message against an expected link.
+void CheckLinkMsg(const struct nlmsghdr* hdr, const Link& link) {
+  ASSERT_THAT(hdr->nlmsg_type, Eq(RTM_NEWLINK));
+  ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg)));
+  const struct ifinfomsg* msg =
+      reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr));
+  EXPECT_EQ(msg->ifi_index, link.index);
+
+  const struct rtattr* rta = FindRtAttr(hdr, msg, IFLA_IFNAME);
+  EXPECT_NE(nullptr, rta) << "IFLA_IFNAME not found in message.";
+  if (rta != nullptr) {
+    std::string name(reinterpret_cast<const char*>(RTA_DATA(rta)));
+    EXPECT_EQ(name, link.name);
+  }
+}
+
+TEST(NetlinkRouteTest, GetLinkByIndex) {
+  Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink());
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifm;
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETLINK;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifm.ifi_family = AF_UNSPEC;
+  req.ifm.ifi_index = loopback_link.index;
+
+  bool found = false;
+  ASSERT_NO_ERRNO(NetlinkRequestResponse(
+      fd, &req, sizeof(req),
+      [&](const struct nlmsghdr* hdr) {
+        CheckLinkMsg(hdr, loopback_link);
+        found = true;
+      },
+      false));
+  EXPECT_TRUE(found) << "Netlink response does not contain any links.";
+}
+
+TEST(NetlinkRouteTest, GetLinkByName) {
+  Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink());
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifm;
+    struct rtattr rtattr;
+    char ifname[IFNAMSIZ];
+    char pad[NLMSG_ALIGNTO + RTA_ALIGNTO];
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_type = RTM_GETLINK;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifm.ifi_family = AF_UNSPEC;
+  req.rtattr.rta_type = IFLA_IFNAME;
+  req.rtattr.rta_len = RTA_LENGTH(loopback_link.name.size() + 1);
+  strncpy(req.ifname, loopback_link.name.c_str(), sizeof(req.ifname));
+  req.hdr.nlmsg_len =
+      NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len);
+
+  bool found = false;
+  ASSERT_NO_ERRNO(NetlinkRequestResponse(
+      fd, &req, sizeof(req),
+      [&](const struct nlmsghdr* hdr) {
+        CheckLinkMsg(hdr, loopback_link);
+        found = true;
+      },
+      false));
+  EXPECT_TRUE(found) << "Netlink response does not contain any links.";
+}
+
+TEST(NetlinkRouteTest, GetLinkByIndexNotFound) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifm;
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETLINK;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifm.ifi_family = AF_UNSPEC;
+  req.ifm.ifi_index = 1234590;
+
+  EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)),
+              PosixErrorIs(ENODEV, ::testing::_));
+}
+
+TEST(NetlinkRouteTest, GetLinkByNameNotFound) {
+  const std::string name = "nodevice?!";
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifm;
+    struct rtattr rtattr;
+    char ifname[IFNAMSIZ];
+    char pad[NLMSG_ALIGNTO + RTA_ALIGNTO];
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_type = RTM_GETLINK;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifm.ifi_family = AF_UNSPEC;
+  req.rtattr.rta_type = IFLA_IFNAME;
+  req.rtattr.rta_len = RTA_LENGTH(name.size() + 1);
+  strncpy(req.ifname, name.c_str(), sizeof(req.ifname));
+  req.hdr.nlmsg_len =
+      NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len);
+
+  EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)),
+              PosixErrorIs(ENODEV, ::testing::_));
+}
+
+TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifm;
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = sizeof(req);
+  // If type & 0x3 is equal to 0x2, this means a get request
+  // which doesn't require CAP_SYS_ADMIN.
+  req.hdr.nlmsg_type = ((__RTM_MAX + 1024) & (~0x3)) | 0x2;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifm.ifi_family = AF_UNSPEC;
+
+  EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)),
+              PosixErrorIs(EOPNOTSUPP, ::testing::_));
+}
+
+TEST(NetlinkRouteTest, MsgHdrMsgTrunc) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifm;
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETLINK;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifm.ifi_family = AF_UNSPEC;
+
+  struct iovec iov = {};
+  iov.iov_base = &req;
+  iov.iov_len = sizeof(req);
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  // No destination required; it defaults to pid 0, the kernel.
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds());
+
+  // Small enough to ensure that the response doesn't fit.
+  constexpr size_t kBufferSize = 10;
+  std::vector<char> buf(kBufferSize);
+  iov.iov_base = buf.data();
+  iov.iov_len = buf.size();
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(fd.get(), &msg, 0),
+              SyscallSucceedsWithValue(kBufferSize));
+  EXPECT_EQ((msg.msg_flags & MSG_TRUNC), MSG_TRUNC);
+}
+
+TEST(NetlinkRouteTest, MsgTruncMsgHdrMsgTrunc) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifm;
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETLINK;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifm.ifi_family = AF_UNSPEC;
+
+  struct iovec iov = {};
+  iov.iov_base = &req;
+  iov.iov_len = sizeof(req);
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  // No destination required; it defaults to pid 0, the kernel.
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds());
+
+  // Small enough to ensure that the response doesn't fit.
+  constexpr size_t kBufferSize = 10;
+  std::vector<char> buf(kBufferSize);
+  iov.iov_base = buf.data();
+  iov.iov_len = buf.size();
+
+  int res = 0;
+  ASSERT_THAT(res = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC),
+              SyscallSucceeds());
+  EXPECT_GT(res, kBufferSize);
+  EXPECT_EQ((msg.msg_flags & MSG_TRUNC), MSG_TRUNC);
+}
+
+TEST(NetlinkRouteTest, ControlMessageIgnored) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+  uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+  struct request {
+    struct nlmsghdr control_hdr;
+    struct nlmsghdr message_hdr;
+    struct ifinfomsg ifm;
+  };
+
+  struct request req = {};
+
+  // This control message is ignored. We still receive a response for the
+  // following RTM_GETLINK.
+  req.control_hdr.nlmsg_len = sizeof(req.control_hdr);
+  req.control_hdr.nlmsg_type = NLMSG_DONE;
+  req.control_hdr.nlmsg_seq = kSeq;
+
+  req.message_hdr.nlmsg_len = sizeof(req.message_hdr) + sizeof(req.ifm);
+  req.message_hdr.nlmsg_type = RTM_GETLINK;
+  req.message_hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.message_hdr.nlmsg_seq = kSeq;
+
+  req.ifm.ifi_family = AF_UNSPEC;
+
+  ASSERT_NO_ERRNO(NetlinkRequestResponse(
+      fd, &req, sizeof(req),
+      [&](const struct nlmsghdr* hdr) {
+        CheckGetLinkResponse(hdr, kSeq, port);
+      },
+      false));
+}
+
+TEST(NetlinkRouteTest, GetAddrDump) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+  uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct rtgenmsg rgm;
+  };
+
+  struct request req;
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETADDR;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.rgm.rtgen_family = AF_UNSPEC;
+
+  ASSERT_NO_ERRNO(NetlinkRequestResponse(
+      fd, &req, sizeof(req),
+      [&](const struct nlmsghdr* hdr) {
+        EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWADDR), Eq(NLMSG_DONE)));
+
+        EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI)
+            << std::hex << hdr->nlmsg_flags;
+
+        EXPECT_EQ(hdr->nlmsg_seq, kSeq);
+        EXPECT_EQ(hdr->nlmsg_pid, port);
+
+        if (hdr->nlmsg_type != RTM_NEWADDR) {
+          return;
+        }
+
+        // RTM_NEWADDR contains at least the header and ifaddrmsg.
+        EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct ifaddrmsg));
+
+        // TODO(mpratt): Check ifaddrmsg contents and following attrs.
+      },
+      false));
+}
+
+TEST(NetlinkRouteTest, LookupAll) {
+  struct ifaddrs* if_addr_list = nullptr;
+  auto cleanup = Cleanup([&if_addr_list]() { freeifaddrs(if_addr_list); });
+
+  // Not a syscall but we can use the syscall matcher as glibc sets errno.
+  ASSERT_THAT(getifaddrs(&if_addr_list), SyscallSucceeds());
+
+  int count = 0;
+  for (struct ifaddrs* i = if_addr_list; i; i = i->ifa_next) {
+    if (!i->ifa_addr || (i->ifa_addr->sa_family != AF_INET &&
+                         i->ifa_addr->sa_family != AF_INET6)) {
+      continue;
+    }
+    count++;
+  }
+  ASSERT_GT(count, 0);
+}
+
+TEST(NetlinkRouteTest, AddAddr) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink());
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifaddrmsg ifa;
+    struct rtattr rtattr;
+    struct in_addr addr;
+    char pad[NLMSG_ALIGNTO + RTA_ALIGNTO];
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_type = RTM_NEWADDR;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifa.ifa_family = AF_INET;
+  req.ifa.ifa_prefixlen = 24;
+  req.ifa.ifa_flags = 0;
+  req.ifa.ifa_scope = 0;
+  req.ifa.ifa_index = loopback_link.index;
+  req.rtattr.rta_type = IFA_LOCAL;
+  req.rtattr.rta_len = RTA_LENGTH(sizeof(req.addr));
+  inet_pton(AF_INET, "10.0.0.1", &req.addr);
+  req.hdr.nlmsg_len =
+      NLMSG_LENGTH(sizeof(req.ifa)) + NLMSG_ALIGN(req.rtattr.rta_len);
+
+  // Create should succeed, as no such address in kernel.
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK;
+  EXPECT_NO_ERRNO(
+      NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len));
+
+  // Replace an existing address should succeed.
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_ACK;
+  req.hdr.nlmsg_seq++;
+  EXPECT_NO_ERRNO(
+      NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len));
+
+  // Create exclusive should fail, as we created the address above.
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
+  req.hdr.nlmsg_seq++;
+  EXPECT_THAT(
+      NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len),
+      PosixErrorIs(EEXIST, ::testing::_));
+}
+
+// GetRouteDump tests a RTM_GETROUTE + NLM_F_DUMP request.
+TEST(NetlinkRouteTest, GetRouteDump) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+  uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct rtmsg rtm;
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETROUTE;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.rtm.rtm_family = AF_UNSPEC;
+
+  bool routeFound = false;
+  bool dstFound = true;
+  ASSERT_NO_ERRNO(NetlinkRequestResponse(
+      fd, &req, sizeof(req),
+      [&](const struct nlmsghdr* hdr) {
+        // Validate the reponse to RTM_GETROUTE + NLM_F_DUMP.
+        EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWROUTE), Eq(NLMSG_DONE)));
+
+        EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI)
+            << std::hex << hdr->nlmsg_flags;
+
+        EXPECT_EQ(hdr->nlmsg_seq, kSeq);
+        EXPECT_EQ(hdr->nlmsg_pid, port);
+
+        // The test should not proceed if it's not a RTM_NEWROUTE message.
+        if (hdr->nlmsg_type != RTM_NEWROUTE) {
+          return;
+        }
+
+        // RTM_NEWROUTE contains at least the header and rtmsg.
+        ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct rtmsg)));
+        const struct rtmsg* msg =
+            reinterpret_cast<const struct rtmsg*>(NLMSG_DATA(hdr));
+        // NOTE: rtmsg fields are char fields.
+        std::cout << "Found route table=" << static_cast<int>(msg->rtm_table)
+                  << ", protocol=" << static_cast<int>(msg->rtm_protocol)
+                  << ", scope=" << static_cast<int>(msg->rtm_scope)
+                  << ", type=" << static_cast<int>(msg->rtm_type);
+
+        int len = RTM_PAYLOAD(hdr);
+        bool rtDstFound = false;
+        for (struct rtattr* attr = RTM_RTA(msg); RTA_OK(attr, len);
+             attr = RTA_NEXT(attr, len)) {
+          if (attr->rta_type == RTA_DST) {
+            char address[INET_ADDRSTRLEN] = {};
+            inet_ntop(AF_INET, RTA_DATA(attr), address, sizeof(address));
+            std::cout << ", dst=" << address;
+            rtDstFound = true;
+          }
+        }
+
+        std::cout << std::endl;
+
+        if (msg->rtm_table == RT_TABLE_MAIN) {
+          routeFound = true;
+          dstFound = rtDstFound && dstFound;
+        }
+      },
+      false));
+  // At least one route found in main route table.
+  EXPECT_TRUE(routeFound);
+  // Found RTA_DST for each route in main table.
+  EXPECT_TRUE(dstFound);
+}
+
+// GetRouteRequest tests a RTM_GETROUTE request with RTM_F_LOOKUP_TABLE flag.
+TEST(NetlinkRouteTest, GetRouteRequest) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+  uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct rtmsg rtm;
+    struct nlattr nla;
+    struct in_addr sin_addr;
+  };
+
+  constexpr uint32_t kSeq = 12345;
+
+  struct request req = {};
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETROUTE;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST;
+  req.hdr.nlmsg_seq = kSeq;
+
+  req.rtm.rtm_family = AF_INET;
+  req.rtm.rtm_dst_len = 32;
+  req.rtm.rtm_src_len = 0;
+  req.rtm.rtm_tos = 0;
+  req.rtm.rtm_table = RT_TABLE_UNSPEC;
+  req.rtm.rtm_protocol = RTPROT_UNSPEC;
+  req.rtm.rtm_scope = RT_SCOPE_UNIVERSE;
+  req.rtm.rtm_type = RTN_UNSPEC;
+  req.rtm.rtm_flags = RTM_F_LOOKUP_TABLE;
+
+  req.nla.nla_len = 8;
+  req.nla.nla_type = RTA_DST;
+  inet_aton("127.0.0.2", &req.sin_addr);
+
+  bool rtDstFound = false;
+  ASSERT_NO_ERRNO(NetlinkRequestResponseSingle(
+      fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) {
+        // Validate the reponse to RTM_GETROUTE request with RTM_F_LOOKUP_TABLE
+        // flag.
+        EXPECT_THAT(hdr->nlmsg_type, RTM_NEWROUTE);
+
+        EXPECT_TRUE(hdr->nlmsg_flags == 0) << std::hex << hdr->nlmsg_flags;
+
+        EXPECT_EQ(hdr->nlmsg_seq, kSeq);
+        EXPECT_EQ(hdr->nlmsg_pid, port);
+
+        // RTM_NEWROUTE contains at least the header and rtmsg.
+        ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct rtmsg)));
+        const struct rtmsg* msg =
+            reinterpret_cast<const struct rtmsg*>(NLMSG_DATA(hdr));
+
+        // NOTE: rtmsg fields are char fields.
+        std::cout << "Found route table=" << static_cast<int>(msg->rtm_table)
+                  << ", protocol=" << static_cast<int>(msg->rtm_protocol)
+                  << ", scope=" << static_cast<int>(msg->rtm_scope)
+                  << ", type=" << static_cast<int>(msg->rtm_type);
+
+        EXPECT_EQ(msg->rtm_family, AF_INET);
+        EXPECT_EQ(msg->rtm_dst_len, 32);
+        EXPECT_TRUE((msg->rtm_flags & RTM_F_CLONED) == RTM_F_CLONED)
+            << std::hex << msg->rtm_flags;
+
+        int len = RTM_PAYLOAD(hdr);
+        std::cout << ", len=" << len;
+        for (struct rtattr* attr = RTM_RTA(msg); RTA_OK(attr, len);
+             attr = RTA_NEXT(attr, len)) {
+          if (attr->rta_type == RTA_DST) {
+            char address[INET_ADDRSTRLEN] = {};
+            inet_ntop(AF_INET, RTA_DATA(attr), address, sizeof(address));
+            std::cout << ", dst=" << address;
+            rtDstFound = true;
+          } else if (attr->rta_type == RTA_OIF) {
+            const char* oif = reinterpret_cast<const char*>(RTA_DATA(attr));
+            std::cout << ", oif=" << oif;
+          }
+        }
+
+        std::cout << std::endl;
+      }));
+  // Found RTA_DST for RTM_F_LOOKUP_TABLE.
+  EXPECT_TRUE(rtDstFound);
+}
+
+// RecvmsgTrunc tests the recvmsg MSG_TRUNC flag with zero length output
+// buffer. MSG_TRUNC with a zero length buffer should consume subsequent
+// messages off the socket.
+TEST(NetlinkRouteTest, RecvmsgTrunc) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct rtgenmsg rgm;
+  };
+
+  struct request req;
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETADDR;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.rgm.rtgen_family = AF_UNSPEC;
+
+  struct iovec iov = {};
+  iov.iov_base = &req;
+  iov.iov_len = sizeof(req);
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds());
+
+  iov.iov_base = NULL;
+  iov.iov_len = 0;
+
+  int trunclen, trunclen2;
+
+  // Note: This test assumes at least two messages are returned by the
+  // RTM_GETADDR request. That means at least one RTM_NEWLINK message and one
+  // NLMSG_DONE message. We cannot read all the messages without blocking
+  // because we would need to read the message into a buffer and check the
+  // nlmsg_type for NLMSG_DONE. However, the test depends on reading into a
+  // zero-length buffer.
+
+  // First, call recvmsg with MSG_TRUNC. This will read the full message from
+  // the socket and return it's full length. Subsequent calls to recvmsg will
+  // read the next messages from the socket.
+  ASSERT_THAT(trunclen = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC),
+              SyscallSucceeds());
+
+  // Message should always be truncated. However, While the destination iov is
+  // zero length, MSG_TRUNC returns the size of the next message so it should
+  // not be zero.
+  ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
+  ASSERT_NE(trunclen, 0);
+  // Returned length is at least the header and ifaddrmsg.
+  EXPECT_GE(trunclen, sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg));
+
+  // Reset the msg_flags to make sure that the recvmsg call is setting them
+  // properly.
+  msg.msg_flags = 0;
+
+  // Make a second recvvmsg call to get the next message.
+  ASSERT_THAT(trunclen2 = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_TRUNC),
+              SyscallSucceeds());
+  ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
+  ASSERT_NE(trunclen2, 0);
+
+  // Assert that the received messages are not the same.
+  //
+  // We are calling recvmsg with a zero length buffer so we have no way to
+  // inspect the messages to make sure they are not equal in value. The best
+  // we can do is to compare their lengths.
+  ASSERT_NE(trunclen, trunclen2);
+}
+
+// RecvmsgTruncPeek tests recvmsg with the combination of the MSG_TRUNC and
+// MSG_PEEK flags and a zero length output buffer. This is normally used to
+// read the full length of the next message on the socket without consuming
+// it, so a properly sized buffer can be allocated to store the message. This
+// test tests that scenario.
+TEST(NetlinkRouteTest, RecvmsgTruncPeek) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct rtgenmsg rgm;
+  };
+
+  struct request req;
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETADDR;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.rgm.rtgen_family = AF_UNSPEC;
+
+  struct iovec iov = {};
+  iov.iov_base = &req;
+  iov.iov_len = sizeof(req);
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds());
+
+  int type = -1;
+  do {
+    int peeklen;
+    int len;
+
+    iov.iov_base = NULL;
+    iov.iov_len = 0;
+
+    // Call recvmsg with MSG_PEEK and MSG_TRUNC. This will peek at the message
+    // and return it's full length.
+    // See: MSG_TRUNC http://man7.org/linux/man-pages/man2/recv.2.html
+    ASSERT_THAT(
+        peeklen = RetryEINTR(recvmsg)(fd.get(), &msg, MSG_PEEK | MSG_TRUNC),
+        SyscallSucceeds());
+
+    // Message should always be truncated.
+    ASSERT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
+    ASSERT_NE(peeklen, 0);
+
+    // Reset the message flags for the next call.
+    msg.msg_flags = 0;
+
+    // Make the actual call to recvmsg to get the actual data. We will use
+    // the length returned from the peek call for the allocated buffer size..
+    std::vector<char> buf(peeklen);
+    iov.iov_base = buf.data();
+    iov.iov_len = buf.size();
+    ASSERT_THAT(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0),
+                SyscallSucceeds());
+
+    // Message should not be truncated since we allocated the correct buffer
+    // size.
+    EXPECT_NE(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
+
+    // MSG_PEEK should have left data on the socket and the subsequent call
+    // with should have retrieved the same data. Both calls should have
+    // returned the message's full length so they should be equal.
+    ASSERT_NE(len, 0);
+    ASSERT_EQ(peeklen, len);
+
+    for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data());
+         NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) {
+      type = hdr->nlmsg_type;
+    }
+  } while (type != NLMSG_DONE && type != NLMSG_ERROR);
+}
+
+// No SCM_CREDENTIALS are received without SO_PASSCRED set.
+TEST(NetlinkRouteTest, NoPasscredNoCreds) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  ASSERT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOff,
+                         sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct rtgenmsg rgm;
+  };
+
+  struct request req;
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETADDR;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.rgm.rtgen_family = AF_UNSPEC;
+
+  struct iovec iov = {};
+  iov.iov_base = &req;
+  iov.iov_len = sizeof(req);
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds());
+
+  iov.iov_base = NULL;
+  iov.iov_len = 0;
+
+  char control[CMSG_SPACE(sizeof(struct ucred))] = {};
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  // Note: This test assumes at least one message is returned by the
+  // RTM_GETADDR request.
+  ASSERT_THAT(RetryEINTR(recvmsg)(fd.get(), &msg, 0), SyscallSucceeds());
+
+  // No control messages.
+  EXPECT_EQ(CMSG_FIRSTHDR(&msg), nullptr);
+}
+
+// SCM_CREDENTIALS are received with SO_PASSCRED set.
+TEST(NetlinkRouteTest, PasscredCreds) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+  ASSERT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct rtgenmsg rgm;
+  };
+
+  struct request req;
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETADDR;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = kSeq;
+  req.rgm.rtgen_family = AF_UNSPEC;
+
+  struct iovec iov = {};
+  iov.iov_base = &req;
+  iov.iov_len = sizeof(req);
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds());
+
+  iov.iov_base = NULL;
+  iov.iov_len = 0;
+
+  char control[CMSG_SPACE(sizeof(struct ucred))] = {};
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  // Note: This test assumes at least one message is returned by the
+  // RTM_GETADDR request.
+  ASSERT_THAT(RetryEINTR(recvmsg)(fd.get(), &msg, 0), SyscallSucceeds());
+
+  struct ucred creds;
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(creds)));
+  ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS);
+
+  memcpy(&creds, CMSG_DATA(cmsg), sizeof(creds));
+
+  // The peer is the kernel, which is "PID" 0.
+  EXPECT_EQ(creds.pid, 0);
+  // The kernel identifies as root. Also allow nobody in case this test is
+  // running in a userns without root mapped.
+  EXPECT_THAT(creds.uid, AnyOf(Eq(0), Eq(65534)));
+  EXPECT_THAT(creds.gid, AnyOf(Eq(0), Eq(65534)));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_route_util.cc b/test/syscalls/linux/socket_netlink_route_util.cc
new file mode 100644
index 000000000..bde1dbb4d
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink_route_util.cc
@@ -0,0 +1,162 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_netlink_route_util.h"
+
+#include <linux/if.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include "test/syscalls/linux/socket_netlink_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+constexpr uint32_t kSeq = 12345;
+
+}  // namespace
+
+PosixError DumpLinks(
+    const FileDescriptor& fd, uint32_t seq,
+    const std::function<void(const struct nlmsghdr* hdr)>& fn) {
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifm;
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = sizeof(req);
+  req.hdr.nlmsg_type = RTM_GETLINK;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+  req.hdr.nlmsg_seq = seq;
+  req.ifm.ifi_family = AF_UNSPEC;
+
+  return NetlinkRequestResponse(fd, &req, sizeof(req), fn, false);
+}
+
+PosixErrorOr<std::vector<Link>> DumpLinks() {
+  ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE));
+
+  std::vector<Link> links;
+  RETURN_IF_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) {
+    if (hdr->nlmsg_type != RTM_NEWLINK ||
+        hdr->nlmsg_len < NLMSG_SPACE(sizeof(struct ifinfomsg))) {
+      return;
+    }
+    const struct ifinfomsg* msg =
+        reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr));
+    const auto* rta = FindRtAttr(hdr, msg, IFLA_IFNAME);
+    if (rta == nullptr) {
+      // Ignore links that do not have a name.
+      return;
+    }
+
+    links.emplace_back();
+    links.back().index = msg->ifi_index;
+    links.back().type = msg->ifi_type;
+    links.back().name =
+        std::string(reinterpret_cast<const char*>(RTA_DATA(rta)));
+  }));
+  return links;
+}
+
+PosixErrorOr<Link> LoopbackLink() {
+  ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks());
+  for (const auto& link : links) {
+    if (link.type == ARPHRD_LOOPBACK) {
+      return link;
+    }
+  }
+  return PosixError(ENOENT, "loopback link not found");
+}
+
+PosixError LinkAddLocalAddr(int index, int family, int prefixlen,
+                            const void* addr, int addrlen) {
+  ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifaddrmsg ifaddr;
+    char attrbuf[512];
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifaddr));
+  req.hdr.nlmsg_type = RTM_NEWADDR;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifaddr.ifa_index = index;
+  req.ifaddr.ifa_family = family;
+  req.ifaddr.ifa_prefixlen = prefixlen;
+
+  struct rtattr* rta = reinterpret_cast<struct rtattr*>(
+      reinterpret_cast<int8_t*>(&req) + NLMSG_ALIGN(req.hdr.nlmsg_len));
+  rta->rta_type = IFA_LOCAL;
+  rta->rta_len = RTA_LENGTH(addrlen);
+  req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + RTA_LENGTH(addrlen);
+  memcpy(RTA_DATA(rta), addr, addrlen);
+
+  return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len);
+}
+
+PosixError LinkChangeFlags(int index, unsigned int flags, unsigned int change) {
+  ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifinfo;
+    char pad[NLMSG_ALIGNTO];
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo));
+  req.hdr.nlmsg_type = RTM_NEWLINK;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifinfo.ifi_index = index;
+  req.ifinfo.ifi_flags = flags;
+  req.ifinfo.ifi_change = change;
+
+  return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len);
+}
+
+PosixError LinkSetMacAddr(int index, const void* addr, int addrlen) {
+  ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE));
+
+  struct request {
+    struct nlmsghdr hdr;
+    struct ifinfomsg ifinfo;
+    char attrbuf[512];
+  };
+
+  struct request req = {};
+  req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo));
+  req.hdr.nlmsg_type = RTM_NEWLINK;
+  req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+  req.hdr.nlmsg_seq = kSeq;
+  req.ifinfo.ifi_index = index;
+
+  struct rtattr* rta = reinterpret_cast<struct rtattr*>(
+      reinterpret_cast<int8_t*>(&req) + NLMSG_ALIGN(req.hdr.nlmsg_len));
+  rta->rta_type = IFLA_ADDRESS;
+  rta->rta_len = RTA_LENGTH(addrlen);
+  req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + RTA_LENGTH(addrlen);
+  memcpy(RTA_DATA(rta), addr, addrlen);
+
+  return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len);
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_route_util.h b/test/syscalls/linux/socket_netlink_route_util.h
new file mode 100644
index 000000000..149c4a7f6
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink_route_util.h
@@ -0,0 +1,55 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_
+
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_netlink_util.h"
+
+namespace gvisor {
+namespace testing {
+
+struct Link {
+  int index;
+  int16_t type;
+  std::string name;
+};
+
+PosixError DumpLinks(const FileDescriptor& fd, uint32_t seq,
+                     const std::function<void(const struct nlmsghdr* hdr)>& fn);
+
+PosixErrorOr<std::vector<Link>> DumpLinks();
+
+// Returns the loopback link on the system. ENOENT if not found.
+PosixErrorOr<Link> LoopbackLink();
+
+// LinkAddLocalAddr sets IFA_LOCAL attribute on the interface.
+PosixError LinkAddLocalAddr(int index, int family, int prefixlen,
+                            const void* addr, int addrlen);
+
+// LinkChangeFlags changes interface flags. E.g. IFF_UP.
+PosixError LinkChangeFlags(int index, unsigned int flags, unsigned int change);
+
+// LinkSetMacAddr sets IFLA_ADDRESS attribute of the interface.
+PosixError LinkSetMacAddr(int index, const void* addr, int addrlen);
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_
diff --git a/test/syscalls/linux/socket_netlink_uevent.cc b/test/syscalls/linux/socket_netlink_uevent.cc
new file mode 100644
index 000000000..da425bed4
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink_uevent.cc
@@ -0,0 +1,83 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/filter.h>
+#include <linux/netlink.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_netlink_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+// Tests for NETLINK_KOBJECT_UEVENT sockets.
+//
+// gVisor never sends any messages on these sockets, so we don't test the events
+// themselves.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// SO_PASSCRED can be enabled. Since no messages are sent in gVisor, we don't
+// actually test receiving credentials.
+TEST(NetlinkUeventTest, PassCred) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_KOBJECT_UEVENT));
+
+  EXPECT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+}
+
+// SO_DETACH_FILTER fails without a filter already installed.
+TEST(NetlinkUeventTest, DetachNoFilter) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_KOBJECT_UEVENT));
+
+  int opt;
+  EXPECT_THAT(
+      setsockopt(fd.get(), SOL_SOCKET, SO_DETACH_FILTER, &opt, sizeof(opt)),
+      SyscallFailsWithErrno(ENOENT));
+}
+
+// We can attach a BPF filter.
+TEST(NetlinkUeventTest, AttachFilter) {
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_KOBJECT_UEVENT));
+
+  // Minimal BPF program: a single ret.
+  struct sock_filter filter = {0x6, 0, 0, 0};
+  struct sock_fprog prog = {};
+  prog.len = 1;
+  prog.filter = &filter;
+
+  EXPECT_THAT(
+      setsockopt(fd.get(), SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)),
+      SyscallSucceeds());
+
+  int opt;
+  EXPECT_THAT(
+      setsockopt(fd.get(), SOL_SOCKET, SO_DETACH_FILTER, &opt, sizeof(opt)),
+      SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc
new file mode 100644
index 000000000..952eecfe8
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink_util.cc
@@ -0,0 +1,187 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_netlink_util.h"
+
+#include <linux/if_arp.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+PosixErrorOr<FileDescriptor> NetlinkBoundSocket(int protocol) {
+  FileDescriptor fd;
+  ASSIGN_OR_RETURN_ERRNO(fd, Socket(AF_NETLINK, SOCK_RAW, protocol));
+
+  struct sockaddr_nl addr = {};
+  addr.nl_family = AF_NETLINK;
+
+  RETURN_ERROR_IF_SYSCALL_FAIL(
+      bind(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+  MaybeSave();
+
+  return std::move(fd);
+}
+
+PosixErrorOr<uint32_t> NetlinkPortID(int fd) {
+  struct sockaddr_nl addr;
+  socklen_t addrlen = sizeof(addr);
+
+  RETURN_ERROR_IF_SYSCALL_FAIL(
+      getsockname(fd, reinterpret_cast<struct sockaddr*>(&addr), &addrlen));
+  MaybeSave();
+
+  return static_cast<uint32_t>(addr.nl_pid);
+}
+
+PosixError NetlinkRequestResponse(
+    const FileDescriptor& fd, void* request, size_t len,
+    const std::function<void(const struct nlmsghdr* hdr)>& fn,
+    bool expect_nlmsgerr) {
+  struct iovec iov = {};
+  iov.iov_base = request;
+  iov.iov_len = len;
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  // No destination required; it defaults to pid 0, the kernel.
+
+  RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(sendmsg)(fd.get(), &msg, 0));
+
+  constexpr size_t kBufferSize = 4096;
+  std::vector<char> buf(kBufferSize);
+  iov.iov_base = buf.data();
+  iov.iov_len = buf.size();
+
+  // If NLM_F_MULTI is set, response is a series of messages that ends with a
+  // NLMSG_DONE message.
+  int type = -1;
+  int flags = 0;
+  do {
+    int len;
+    RETURN_ERROR_IF_SYSCALL_FAIL(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0));
+
+    // We don't bother with the complexity of dealing with truncated messages.
+    // We must allocate a large enough buffer up front.
+    if ((msg.msg_flags & MSG_TRUNC) == MSG_TRUNC) {
+      return PosixError(EIO,
+                        absl::StrCat("Received truncated message with flags: ",
+                                     msg.msg_flags));
+    }
+
+    for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data());
+         NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) {
+      fn(hdr);
+      flags = hdr->nlmsg_flags;
+      type = hdr->nlmsg_type;
+      // Done should include an integer payload for dump_done_errno.
+      // See net/netlink/af_netlink.c:netlink_dump
+      // Some tools like the 'ip' tool check the minimum length of the
+      // NLMSG_DONE message.
+      if (type == NLMSG_DONE) {
+        EXPECT_GE(hdr->nlmsg_len, NLMSG_LENGTH(sizeof(int)));
+      }
+    }
+  } while ((flags & NLM_F_MULTI) && type != NLMSG_DONE && type != NLMSG_ERROR);
+
+  if (expect_nlmsgerr) {
+    EXPECT_EQ(type, NLMSG_ERROR);
+  } else if (flags & NLM_F_MULTI) {
+    EXPECT_EQ(type, NLMSG_DONE);
+  }
+  return NoError();
+}
+
+PosixError NetlinkRequestResponseSingle(
+    const FileDescriptor& fd, void* request, size_t len,
+    const std::function<void(const struct nlmsghdr* hdr)>& fn) {
+  struct iovec iov = {};
+  iov.iov_base = request;
+  iov.iov_len = len;
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  // No destination required; it defaults to pid 0, the kernel.
+
+  RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(sendmsg)(fd.get(), &msg, 0));
+
+  constexpr size_t kBufferSize = 4096;
+  std::vector<char> buf(kBufferSize);
+  iov.iov_base = buf.data();
+  iov.iov_len = buf.size();
+
+  int ret;
+  RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(recvmsg)(fd.get(), &msg, 0));
+
+  // We don't bother with the complexity of dealing with truncated messages.
+  // We must allocate a large enough buffer up front.
+  if ((msg.msg_flags & MSG_TRUNC) == MSG_TRUNC) {
+    return PosixError(
+        EIO,
+        absl::StrCat("Received truncated message with flags: ", msg.msg_flags));
+  }
+
+  for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data());
+       NLMSG_OK(hdr, ret); hdr = NLMSG_NEXT(hdr, ret)) {
+    fn(hdr);
+  }
+
+  return NoError();
+}
+
+PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq,
+                                    void* request, size_t len) {
+  // Dummy negative number for no error message received.
+  // We won't get a negative error number so there will be no confusion.
+  int err = -42;
+  RETURN_IF_ERRNO(NetlinkRequestResponse(
+      fd, request, len,
+      [&](const struct nlmsghdr* hdr) {
+        EXPECT_EQ(NLMSG_ERROR, hdr->nlmsg_type);
+        EXPECT_EQ(hdr->nlmsg_seq, seq);
+        EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct nlmsgerr));
+
+        const struct nlmsgerr* msg =
+            reinterpret_cast<const struct nlmsgerr*>(NLMSG_DATA(hdr));
+        err = -msg->error;
+      },
+      true));
+  return PosixError(err);
+}
+
+const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr,
+                                const struct ifinfomsg* msg, int16_t attr) {
+  const int ifi_space = NLMSG_SPACE(sizeof(*msg));
+  int attrlen = hdr->nlmsg_len - ifi_space;
+  const struct rtattr* rta = reinterpret_cast<const struct rtattr*>(
+      reinterpret_cast<const uint8_t*>(hdr) + NLMSG_ALIGN(ifi_space));
+  for (; RTA_OK(rta, attrlen); rta = RTA_NEXT(rta, attrlen)) {
+    if (rta->rta_type == attr) {
+      return rta;
+    }
+  }
+  return nullptr;
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h
new file mode 100644
index 000000000..e13ead406
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink_util.h
@@ -0,0 +1,62 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_
+
+#include <sys/socket.h>
+// socket.h has to be included before if_arp.h.
+#include <linux/if_arp.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+
+namespace gvisor {
+namespace testing {
+
+// Returns a bound netlink socket.
+PosixErrorOr<FileDescriptor> NetlinkBoundSocket(int protocol);
+
+// Returns the port ID of the passed socket.
+PosixErrorOr<uint32_t> NetlinkPortID(int fd);
+
+// Send the passed request and call fn on all response netlink messages.
+//
+// To be used on requests with NLM_F_MULTI reponses.
+PosixError NetlinkRequestResponse(
+    const FileDescriptor& fd, void* request, size_t len,
+    const std::function<void(const struct nlmsghdr* hdr)>& fn,
+    bool expect_nlmsgerr);
+
+// Send the passed request and call fn on all response netlink messages.
+//
+// To be used on requests without NLM_F_MULTI reponses.
+PosixError NetlinkRequestResponseSingle(
+    const FileDescriptor& fd, void* request, size_t len,
+    const std::function<void(const struct nlmsghdr* hdr)>& fn);
+
+// Send the passed request then expect and return an ack or error.
+PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq,
+                                    void* request, size_t len);
+
+// Find rtnetlink attribute in message.
+const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr,
+                                const struct ifinfomsg* msg, int16_t attr);
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_
diff --git a/test/syscalls/linux/socket_non_blocking.cc b/test/syscalls/linux/socket_non_blocking.cc
new file mode 100644
index 000000000..c3520cadd
--- /dev/null
+++ b/test/syscalls/linux/socket_non_blocking.cc
@@ -0,0 +1,62 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_non_blocking.h"
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+TEST_P(NonBlockingSocketPairTest, ReadNothingAvailable) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char buf[20] = {};
+  ASSERT_THAT(ReadFd(sockets->first_fd(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(NonBlockingSocketPairTest, RecvNothingAvailable) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char buf[20] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(NonBlockingSocketPairTest, RecvMsgNothingAvailable) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct iovec iov;
+  char buf[20] = {};
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_non_blocking.h b/test/syscalls/linux/socket_non_blocking.h
new file mode 100644
index 000000000..bd3e02fd2
--- /dev/null
+++ b/test/syscalls/linux/socket_non_blocking.h
@@ -0,0 +1,29 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected non-blocking sockets.
+using NonBlockingSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_
diff --git a/test/syscalls/linux/socket_non_stream.cc b/test/syscalls/linux/socket_non_stream.cc
new file mode 100644
index 000000000..c61817f14
--- /dev/null
+++ b/test/syscalls/linux/socket_non_stream.cc
@@ -0,0 +1,337 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_non_stream.h"
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(NonStreamSocketPairTest, SendMsgTooLarge) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int sndbuf;
+  socklen_t length = sizeof(sndbuf);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length),
+      SyscallSucceeds());
+
+  // Make the call too large to fit in the send buffer.
+  const int buffer_size = 3 * sndbuf;
+
+  EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, false /* reader */),
+              SyscallFailsWithErrno(EMSGSIZE));
+}
+
+// Stream sockets allow data sent with a single (e.g. write, sendmsg) syscall
+// to be read in pieces with multiple (e.g. read, recvmsg) syscalls.
+//
+// SplitRecv checks that control messages can only be read on the first (e.g.
+// read, recvmsg) syscall, even if it doesn't provide space for the control
+// message.
+TEST_P(NonStreamSocketPairTest, SplitRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data) / 2];
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+// Stream sockets allow data sent with multiple sends to be read in a single
+// recv. Datagram sockets do not.
+//
+// SingleRecv checks that only a single message is readable in a single recv.
+TEST_P(NonStreamSocketPairTest, SingleRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data1)));
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data2)));
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+}
+
+TEST_P(NonStreamSocketPairTest, RecvmsgMsghdrFlagMsgTrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data) / 2] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(received_data)));
+
+  // Check that msghdr flags were updated.
+  EXPECT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
+}
+
+// Stream sockets allow data sent with multiple sends to be peeked at in a
+// single recv. Datagram sockets (except for unix sockets) do not.
+//
+// SinglePeek checks that only a single message is peekable in a single recv.
+TEST_P(NonStreamSocketPairTest, SinglePeek) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data1)));
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data2)));
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+  for (int i = 0; i < 3; i++) {
+    memset(received_data, 0, sizeof(received_data));
+    ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                                 sizeof(received_data), MSG_PEEK),
+                SyscallSucceedsWithValue(sizeof(sent_data1)));
+    EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+  }
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(sent_data1), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(sent_data2), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+  EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2)));
+}
+
+TEST_P(NonStreamSocketPairTest, MsgTruncTruncation) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data) / 2, MSG_TRUNC),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+
+  // Check that we didn't get any extra data.
+  EXPECT_NE(0, memcmp(sent_data + sizeof(sent_data) / 2,
+                      received_data + sizeof(received_data) / 2,
+                      sizeof(sent_data) / 2));
+}
+
+TEST_P(NonStreamSocketPairTest, MsgTruncTruncationRecvmsgMsghdrFlagMsgTrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data) / 2] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(received_data)));
+
+  // Check that msghdr flags were updated.
+  EXPECT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
+}
+
+TEST_P(NonStreamSocketPairTest, MsgTruncSameSize) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data)];
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), MSG_TRUNC),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(NonStreamSocketPairTest, MsgTruncNotFull) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[2 * sizeof(sent_data)];
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), MSG_TRUNC),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+// This test tests reading from a socket with MSG_TRUNC and a zero length
+// receive buffer. The user should be able to get the message length.
+TEST_P(NonStreamSocketPairTest, RecvmsgMsgTruncZeroLen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  // The receive buffer is of zero length.
+  char received_data[0] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  // The syscall succeeds returning the full size of the message on the socket.
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  // Check that MSG_TRUNC is set on msghdr flags.
+  EXPECT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
+}
+
+// This test tests reading from a socket with MSG_TRUNC | MSG_PEEK and a zero
+// length receive buffer. The user should be able to get the message length
+// without reading data off the socket.
+TEST_P(NonStreamSocketPairTest, RecvmsgMsgTruncMsgPeekZeroLen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  // The receive buffer is of zero length.
+  char peek_data[0] = {};
+
+  struct iovec peek_iov;
+  peek_iov.iov_base = peek_data;
+  peek_iov.iov_len = sizeof(peek_data);
+  struct msghdr peek_msg = {};
+  peek_msg.msg_flags = -1;
+  peek_msg.msg_iov = &peek_iov;
+  peek_msg.msg_iovlen = 1;
+
+  // The syscall succeeds returning the full size of the message on the socket.
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &peek_msg,
+                                  MSG_TRUNC | MSG_PEEK),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  // Check that MSG_TRUNC is set on msghdr flags because the receive buffer is
+  // smaller than the message size.
+  EXPECT_EQ(peek_msg.msg_flags & MSG_TRUNC, MSG_TRUNC);
+
+  char received_data[sizeof(sent_data)] = {};
+
+  struct iovec received_iov;
+  received_iov.iov_base = received_data;
+  received_iov.iov_len = sizeof(received_data);
+  struct msghdr received_msg = {};
+  received_msg.msg_flags = -1;
+  received_msg.msg_iov = &received_iov;
+  received_msg.msg_iovlen = 1;
+
+  // Next we can read the actual data.
+  ASSERT_THAT(
+      RetryEINTR(recvmsg)(sockets->second_fd(), &received_msg, MSG_TRUNC),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  // Check that MSG_TRUNC is not set on msghdr flags because we read the whole
+  // message.
+  EXPECT_EQ(received_msg.msg_flags & MSG_TRUNC, 0);
+}
+
+// This test tests reading from a socket with MSG_TRUNC | MSG_PEEK and a zero
+// length receive buffer and MSG_DONTWAIT. The user should be able to get an
+// EAGAIN or EWOULDBLOCK error response.
+TEST_P(NonStreamSocketPairTest, RecvmsgTruncPeekDontwaitZeroLen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // NOTE: We don't send any data on the socket.
+
+  // The receive buffer is of zero length.
+  char peek_data[0] = {};
+
+  struct iovec peek_iov;
+  peek_iov.iov_base = peek_data;
+  peek_iov.iov_len = sizeof(peek_data);
+  struct msghdr peek_msg = {};
+  peek_msg.msg_flags = -1;
+  peek_msg.msg_iov = &peek_iov;
+  peek_msg.msg_iovlen = 1;
+
+  // recvmsg fails with EAGAIN because no data is available on the socket.
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &peek_msg,
+                                  MSG_TRUNC | MSG_PEEK | MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_non_stream.h b/test/syscalls/linux/socket_non_stream.h
new file mode 100644
index 000000000..469fbe6a2
--- /dev/null
+++ b/test/syscalls/linux/socket_non_stream.h
@@ -0,0 +1,29 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected non-stream sockets.
+using NonStreamSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_
diff --git a/test/syscalls/linux/socket_non_stream_blocking.cc b/test/syscalls/linux/socket_non_stream_blocking.cc
new file mode 100644
index 000000000..b052f6e61
--- /dev/null
+++ b/test/syscalls/linux/socket_non_stream_blocking.cc
@@ -0,0 +1,85 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_non_stream_blocking.h"
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(BlockingNonStreamSocketPairTest, RecvLessThanBufferWaitAll) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[100];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data) * 2] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), MSG_WAITALL),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+}
+
+// This test tests reading from a socket with MSG_TRUNC | MSG_PEEK and a zero
+// length receive buffer and MSG_DONTWAIT. The recvmsg call should block on
+// reading the data.
+TEST_P(BlockingNonStreamSocketPairTest,
+       RecvmsgTruncPeekDontwaitZeroLenBlocking) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // NOTE: We don't initially send any data on the socket.
+  const int data_size = 10;
+  char sent_data[data_size];
+  RandomizeBuffer(sent_data, data_size);
+
+  // The receive buffer is of zero length.
+  char peek_data[0] = {};
+
+  struct iovec peek_iov;
+  peek_iov.iov_base = peek_data;
+  peek_iov.iov_len = sizeof(peek_data);
+  struct msghdr peek_msg = {};
+  peek_msg.msg_flags = -1;
+  peek_msg.msg_iov = &peek_iov;
+  peek_msg.msg_iovlen = 1;
+
+  ScopedThread t([&]() {
+    // The syscall succeeds returning the full size of the message on the
+    // socket. This should block until there is data on the socket.
+    ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &peek_msg,
+                                    MSG_TRUNC | MSG_PEEK),
+                SyscallSucceedsWithValue(data_size));
+  });
+
+  absl::SleepFor(absl::Seconds(1));
+  ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), sent_data, data_size, 0),
+              SyscallSucceedsWithValue(data_size));
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_non_stream_blocking.h b/test/syscalls/linux/socket_non_stream_blocking.h
new file mode 100644
index 000000000..6e205a039
--- /dev/null
+++ b/test/syscalls/linux/socket_non_stream_blocking.h
@@ -0,0 +1,30 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of blocking connected non-stream
+// sockets.
+using BlockingNonStreamSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_
diff --git a/test/syscalls/linux/socket_stream.cc b/test/syscalls/linux/socket_stream.cc
new file mode 100644
index 000000000..6522b2e01
--- /dev/null
+++ b/test/syscalls/linux/socket_stream.cc
@@ -0,0 +1,178 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_stream.h"
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(StreamSocketPairTest, SplitRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data) / 2];
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(sent_data + sizeof(received_data), received_data,
+                      sizeof(received_data)));
+}
+
+// Stream sockets allow data sent with multiple sends to be read in a single
+// recv.
+//
+// CoalescedRecv checks that multiple messages are readable in a single recv.
+TEST_P(StreamSocketPairTest, CoalescedRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data1)));
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data2)));
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+  EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+                      sizeof(sent_data2)));
+}
+
+TEST_P(StreamSocketPairTest, WriteOneSideClosed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+  const char str[] = "abc";
+  ASSERT_THAT(write(sockets->second_fd(), str, 3),
+              SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(StreamSocketPairTest, RecvmsgMsghdrFlagsNoMsgTrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data) / 2] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(received_data)));
+
+  // Check that msghdr flags were cleared (MSG_TRUNC was not set).
+  ASSERT_EQ(msg.msg_flags & MSG_TRUNC, 0);
+}
+
+TEST_P(StreamSocketPairTest, RecvmsgTruncZeroLen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[0] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC),
+              SyscallSucceedsWithValue(0));
+
+  // Check that msghdr flags were cleared (MSG_TRUNC was not set).
+  ASSERT_EQ(msg.msg_flags & MSG_TRUNC, 0);
+}
+
+TEST_P(StreamSocketPairTest, RecvmsgTruncPeekZeroLen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[0] = {};
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(
+      RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC | MSG_PEEK),
+      SyscallSucceedsWithValue(0));
+
+  // Check that msghdr flags were cleared (MSG_TRUNC was not set).
+  ASSERT_EQ(msg.msg_flags & MSG_TRUNC, 0);
+}
+
+TEST_P(StreamSocketPairTest, MsgTrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data)];
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data) / 2, MSG_TRUNC),
+              SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_stream.h b/test/syscalls/linux/socket_stream.h
new file mode 100644
index 000000000..b837b8f8c
--- /dev/null
+++ b/test/syscalls/linux/socket_stream.h
@@ -0,0 +1,30 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of blocking and non-blocking
+// connected stream sockets.
+using StreamSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_
diff --git a/test/syscalls/linux/socket_stream_blocking.cc b/test/syscalls/linux/socket_stream_blocking.cc
new file mode 100644
index 000000000..538ee2268
--- /dev/null
+++ b/test/syscalls/linux/socket_stream_blocking.cc
@@ -0,0 +1,163 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_stream_blocking.h"
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(BlockingStreamSocketPairTest, BlockPartialWriteClosed) {
+  // FIXME(b/35921550): gVisor doesn't support SO_SNDBUF on UDS, nor does it
+  // enforce any limit; it will write arbitrary amounts of data without
+  // blocking.
+  SKIP_IF(IsRunningOnGvisor());
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int buffer_size;
+  socklen_t length = sizeof(buffer_size);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+                         &buffer_size, &length),
+              SyscallSucceeds());
+
+  int wfd = sockets->first_fd();
+  ScopedThread t([wfd, buffer_size]() {
+    std::vector<char> buf(2 * buffer_size);
+    // Write more than fits in the buffer. Blocks then returns partial write
+    // when the other end is closed. The next call returns EPIPE.
+    //
+    // N.B. writes occur in chunks, so we may see less than buffer_size from
+    // the first call.
+    ASSERT_THAT(write(wfd, buf.data(), buf.size()),
+                SyscallSucceedsWithValue(::testing::Gt(0)));
+    ASSERT_THAT(write(wfd, buf.data(), buf.size()),
+                ::testing::AnyOf(SyscallFailsWithErrno(EPIPE),
+                                 SyscallFailsWithErrno(ECONNRESET)));
+  });
+
+  // Leave time for write to become blocked.
+  absl::SleepFor(absl::Seconds(1));
+
+  ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+}
+
+// Random save may interrupt the call to sendmsg() in SendLargeSendMsg(),
+// causing the write to be incomplete and the test to hang.
+TEST_P(BlockingStreamSocketPairTest, SendMsgTooLarge_NoRandomSave) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int sndbuf;
+  socklen_t length = sizeof(sndbuf);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length),
+      SyscallSucceeds());
+
+  // Make the call too large to fit in the send buffer.
+  const int buffer_size = 3 * sndbuf;
+
+  EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, true /* reader */),
+              SyscallSucceedsWithValue(buffer_size));
+}
+
+TEST_P(BlockingStreamSocketPairTest, RecvLessThanBuffer) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[100];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[200] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+}
+
+// Test that MSG_WAITALL causes recv to block until all requested data is
+// received. Random save can interrupt blocking and cause received data to be
+// returned, even if the amount received is less than the full requested amount.
+TEST_P(BlockingStreamSocketPairTest, RecvLessThanBufferWaitAll_NoRandomSave) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[100];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  constexpr auto kDuration = absl::Milliseconds(200);
+  auto before = Now(CLOCK_MONOTONIC);
+
+  const ScopedThread t([&]() {
+    absl::SleepFor(kDuration);
+
+    // Don't let saving after the write interrupt the blocking recv.
+    const DisableSave ds;
+
+    ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+                SyscallSucceedsWithValue(sizeof(sent_data)));
+  });
+
+  char received_data[sizeof(sent_data) * 2] = {};
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(received_data), MSG_WAITALL),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  auto after = Now(CLOCK_MONOTONIC);
+  EXPECT_GE(after - before, kDuration);
+}
+
+TEST_P(BlockingStreamSocketPairTest, SendTimeout) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 10
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  std::vector<char> buf(kPageSize);
+  // We don't know how much data the socketpair will buffer, so we may do an
+  // arbitrarily large number of writes; saving after each write causes this
+  // test's time to explode.
+  const DisableSave ds;
+  for (;;) {
+    int ret;
+    ASSERT_THAT(
+        ret = RetryEINTR(send)(sockets->first_fd(), buf.data(), buf.size(), 0),
+        ::testing::AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(EAGAIN)));
+    if (ret == -1) {
+      break;
+    }
+  }
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_stream_blocking.h b/test/syscalls/linux/socket_stream_blocking.h
new file mode 100644
index 000000000..9fd19ff90
--- /dev/null
+++ b/test/syscalls/linux/socket_stream_blocking.h
@@ -0,0 +1,30 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of blocking connected stream
+// sockets.
+using BlockingStreamSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_
diff --git a/test/syscalls/linux/socket_stream_nonblock.cc b/test/syscalls/linux/socket_stream_nonblock.cc
new file mode 100644
index 000000000..74d608741
--- /dev/null
+++ b/test/syscalls/linux/socket_stream_nonblock.cc
@@ -0,0 +1,49 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_stream_nonblock.h"
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+using ::testing::Le;
+
+TEST_P(NonBlockingStreamSocketPairTest, SendMsgTooLarge) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int sndbuf;
+  socklen_t length = sizeof(sndbuf);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length),
+      SyscallSucceeds());
+
+  // Make the call too large to fit in the send buffer.
+  const int buffer_size = 3 * sndbuf;
+
+  EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, false /* reader */),
+              SyscallSucceedsWithValue(Le(buffer_size)));
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_stream_nonblock.h b/test/syscalls/linux/socket_stream_nonblock.h
new file mode 100644
index 000000000..c3b7fad91
--- /dev/null
+++ b/test/syscalls/linux/socket_stream_nonblock.h
@@ -0,0 +1,30 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of non-blocking connected stream
+// sockets.
+using NonBlockingStreamSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_
diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc
new file mode 100644
index 000000000..53b678e94
--- /dev/null
+++ b/test/syscalls/linux/socket_test_util.cc
@@ -0,0 +1,907 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+#include <arpa/inet.h>
+#include <poll.h>
+#include <sys/socket.h>
+
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/types/optional.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+Creator<SocketPair> SyscallSocketPairCreator(int domain, int type,
+                                             int protocol) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+    int pair[2];
+    RETURN_ERROR_IF_SYSCALL_FAIL(socketpair(domain, type, protocol, pair));
+    MaybeSave();  // Save on successful creation.
+    return absl::make_unique<FDSocketPair>(pair[0], pair[1]);
+  };
+}
+
+Creator<FileDescriptor> SyscallSocketCreator(int domain, int type,
+                                             int protocol) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> {
+    int fd = 0;
+    RETURN_ERROR_IF_SYSCALL_FAIL(fd = socket(domain, type, protocol));
+    MaybeSave();  // Save on successful creation.
+    return absl::make_unique<FileDescriptor>(fd);
+  };
+}
+
+PosixErrorOr<struct sockaddr_un> UniqueUnixAddr(bool abstract, int domain) {
+  struct sockaddr_un addr = {};
+  std::string path = NewTempAbsPathInDir("/tmp");
+  if (path.size() >= sizeof(addr.sun_path)) {
+    return PosixError(EINVAL,
+                      "Unable to generate a temp path of appropriate length");
+  }
+
+  if (abstract) {
+    // Indicate that the path is in the abstract namespace.
+    path[0] = 0;
+  }
+  memcpy(addr.sun_path, path.c_str(), path.length());
+  addr.sun_family = domain;
+  return addr;
+}
+
+Creator<SocketPair> AcceptBindSocketPairCreator(bool abstract, int domain,
+                                                int type, int protocol) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+    ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un bind_addr,
+                           UniqueUnixAddr(abstract, domain));
+    ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un extra_addr,
+                           UniqueUnixAddr(abstract, domain));
+
+    int bound;
+    RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+    RETURN_ERROR_IF_SYSCALL_FAIL(
+        bind(bound, reinterpret_cast<struct sockaddr*>(&bind_addr),
+             sizeof(bind_addr)));
+    MaybeSave();  // Successful bind.
+    RETURN_ERROR_IF_SYSCALL_FAIL(listen(bound, /* backlog = */ 5));
+    MaybeSave();  // Successful listen.
+
+    int connected;
+    RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+    RETURN_ERROR_IF_SYSCALL_FAIL(
+        connect(connected, reinterpret_cast<struct sockaddr*>(&bind_addr),
+                sizeof(bind_addr)));
+    MaybeSave();  // Successful connect.
+
+    int accepted;
+    RETURN_ERROR_IF_SYSCALL_FAIL(
+        accepted = accept4(bound, nullptr, nullptr,
+                           type & (SOCK_NONBLOCK | SOCK_CLOEXEC)));
+    MaybeSave();  // Successful connect.
+
+    // Cleanup no longer needed resources.
+    RETURN_ERROR_IF_SYSCALL_FAIL(close(bound));
+    MaybeSave();  // Dropped original socket.
+
+    // Only unlink if path is not in abstract namespace.
+    if (bind_addr.sun_path[0] != 0) {
+      RETURN_ERROR_IF_SYSCALL_FAIL(unlink(bind_addr.sun_path));
+      MaybeSave();  // Unlinked path.
+    }
+
+    // accepted is before connected to destruct connected before accepted.
+    // Destructors for nonstatic member objects are called in the reverse order
+    // in which they appear in the class declaration.
+    return absl::make_unique<AddrFDSocketPair>(accepted, connected, bind_addr,
+                                               extra_addr);
+  };
+}
+
+Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type,
+                                                          int protocol) {
+  return AcceptBindSocketPairCreator(/* abstract= */ false, domain, type,
+                                     protocol);
+}
+
+Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type,
+                                                        int protocol) {
+  return AcceptBindSocketPairCreator(/* abstract= */ true, domain, type,
+                                     protocol);
+}
+
+Creator<SocketPair> BidirectionalBindSocketPairCreator(bool abstract,
+                                                       int domain, int type,
+                                                       int protocol) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+    ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1,
+                           UniqueUnixAddr(abstract, domain));
+    ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2,
+                           UniqueUnixAddr(abstract, domain));
+
+    int sock1;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+    RETURN_ERROR_IF_SYSCALL_FAIL(
+        bind(sock1, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1)));
+    MaybeSave();  // Successful bind.
+
+    int sock2;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+    RETURN_ERROR_IF_SYSCALL_FAIL(
+        bind(sock2, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2)));
+    MaybeSave();  // Successful bind.
+
+    RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+        sock1, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2)));
+    MaybeSave();  // Successful connect.
+
+    RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+        sock2, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1)));
+    MaybeSave();  // Successful connect.
+
+    // Cleanup no longer needed resources.
+
+    // Only unlink if path is not in abstract namespace.
+    if (addr1.sun_path[0] != 0) {
+      RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr1.sun_path));
+      MaybeSave();  // Successful unlink.
+    }
+
+    // Only unlink if path is not in abstract namespace.
+    if (addr2.sun_path[0] != 0) {
+      RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr2.sun_path));
+      MaybeSave();  // Successful unlink.
+    }
+
+    return absl::make_unique<FDSocketPair>(sock1, sock2);
+  };
+}
+
+Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain,
+                                                                 int type,
+                                                                 int protocol) {
+  return BidirectionalBindSocketPairCreator(/* abstract= */ false, domain, type,
+                                            protocol);
+}
+
+Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain,
+                                                               int type,
+                                                               int protocol) {
+  return BidirectionalBindSocketPairCreator(/* abstract= */ true, domain, type,
+                                            protocol);
+}
+
+Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type,
+                                                     int protocol) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+    struct sockaddr_un addr = {};
+    constexpr char kSocketGoferPath[] = "/socket";
+    memcpy(addr.sun_path, kSocketGoferPath, sizeof(kSocketGoferPath));
+    addr.sun_family = domain;
+
+    int sock1;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+    RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+        sock1, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+    MaybeSave();  // Successful connect.
+
+    int sock2;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+    RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+        sock2, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+    MaybeSave();  // Successful connect.
+
+    // Make and close another socketpair to ensure that the duped ends of the
+    // first socketpair get closed.
+    //
+    // The problem is that there is no way to atomically send and close an FD.
+    // The closest that we can do is send and then immediately close the FD,
+    // which is what we do in the gofer. The gofer won't respond to another
+    // request until the reply is sent and the FD is closed, so forcing the
+    // gofer to handle another request will ensure that this has happened.
+    for (int i = 0; i < 2; i++) {
+      int sock;
+      RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol));
+      RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+          sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+      RETURN_ERROR_IF_SYSCALL_FAIL(close(sock));
+    }
+
+    return absl::make_unique<FDSocketPair>(sock1, sock2);
+  };
+}
+
+Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+    constexpr char kSocketGoferPath[] = "/socket";
+
+    int sock1;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock1 =
+                                     open(kSocketGoferPath, O_RDWR | flags));
+    MaybeSave();  // Successful socket creation.
+
+    int sock2;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock2 =
+                                     open(kSocketGoferPath, O_RDWR | flags));
+    MaybeSave();  // Successful socket creation.
+
+    return absl::make_unique<FDSocketPair>(sock1, sock2);
+  };
+}
+
+Creator<SocketPair> UnboundSocketPairCreator(bool abstract, int domain,
+                                             int type, int protocol) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+    ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1,
+                           UniqueUnixAddr(abstract, domain));
+    ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2,
+                           UniqueUnixAddr(abstract, domain));
+
+    int sock1;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+    int sock2;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+    return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2);
+  };
+}
+
+Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type,
+                                                       int protocol) {
+  return UnboundSocketPairCreator(/* abstract= */ false, domain, type,
+                                  protocol);
+}
+
+Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type,
+                                                     int protocol) {
+  return UnboundSocketPairCreator(/* abstract= */ true, domain, type, protocol);
+}
+
+void LocalhostAddr(struct sockaddr_in* addr, bool dual_stack) {
+  addr->sin_family = AF_INET;
+  addr->sin_port = htons(0);
+  inet_pton(AF_INET, "127.0.0.1",
+            reinterpret_cast<void*>(&addr->sin_addr.s_addr));
+}
+
+void LocalhostAddr(struct sockaddr_in6* addr, bool dual_stack) {
+  addr->sin6_family = AF_INET6;
+  addr->sin6_port = htons(0);
+  if (dual_stack) {
+    inet_pton(AF_INET6, "::ffff:127.0.0.1",
+              reinterpret_cast<void*>(&addr->sin6_addr.s6_addr));
+  } else {
+    inet_pton(AF_INET6, "::1",
+              reinterpret_cast<void*>(&addr->sin6_addr.s6_addr));
+  }
+  addr->sin6_scope_id = 0;
+}
+
+template <typename T>
+PosixErrorOr<T> BindIP(int fd, bool dual_stack) {
+  T addr = {};
+  LocalhostAddr(&addr, dual_stack);
+  RETURN_ERROR_IF_SYSCALL_FAIL(
+      bind(fd, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+  socklen_t addrlen = sizeof(addr);
+  RETURN_ERROR_IF_SYSCALL_FAIL(
+      getsockname(fd, reinterpret_cast<struct sockaddr*>(&addr), &addrlen));
+  return addr;
+}
+
+template <typename T>
+PosixErrorOr<T> TCPBindAndListen(int fd, bool dual_stack) {
+  ASSIGN_OR_RETURN_ERRNO(T addr, BindIP<T>(fd, dual_stack));
+  RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd, /* backlog = */ 5));
+  return addr;
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>>
+CreateTCPConnectAcceptSocketPair(int bound, int connected, int type,
+                                 bool dual_stack, T bind_addr) {
+  int connect_result = 0;
+  RETURN_ERROR_IF_SYSCALL_FAIL(
+      (connect_result = RetryEINTR(connect)(
+           connected, reinterpret_cast<struct sockaddr*>(&bind_addr),
+           sizeof(bind_addr))) == -1 &&
+              errno == EINPROGRESS
+          ? 0
+          : connect_result);
+  MaybeSave();  // Successful connect.
+
+  if (connect_result == -1) {
+    struct pollfd connect_poll = {connected, POLLOUT | POLLERR | POLLHUP, 0};
+    RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&connect_poll, 1, 0));
+    int error = 0;
+    socklen_t errorlen = sizeof(error);
+    RETURN_ERROR_IF_SYSCALL_FAIL(
+        getsockopt(connected, SOL_SOCKET, SO_ERROR, &error, &errorlen));
+    errno = error;
+    RETURN_ERROR_IF_SYSCALL_FAIL(
+        /* connect */ error == 0 ? 0 : -1);
+  }
+
+  int accepted = -1;
+  struct pollfd accept_poll = {bound, POLLIN, 0};
+  while (accepted == -1) {
+    RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&accept_poll, 1, 0));
+
+    RETURN_ERROR_IF_SYSCALL_FAIL(
+        (accepted = RetryEINTR(accept4)(
+             bound, nullptr, nullptr, type & (SOCK_NONBLOCK | SOCK_CLOEXEC))) ==
+                    -1 &&
+                errno == EAGAIN
+            ? 0
+            : accepted);
+  }
+  MaybeSave();  // Successful accept.
+
+  T extra_addr = {};
+  LocalhostAddr(&extra_addr, dual_stack);
+  return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr,
+                                             extra_addr);
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair(
+    int bound, int connected, int type, bool dual_stack) {
+  ASSIGN_OR_RETURN_ERRNO(T bind_addr, TCPBindAndListen<T>(bound, dual_stack));
+
+  auto result = CreateTCPConnectAcceptSocketPair(bound, connected, type,
+                                                 dual_stack, bind_addr);
+
+  // Cleanup no longer needed resources.
+  RETURN_ERROR_IF_SYSCALL_FAIL(close(bound));
+  MaybeSave();  // Successful close.
+
+  return result;
+}
+
+Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type,
+                                                   int protocol,
+                                                   bool dual_stack) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+    int bound;
+    RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+
+    int connected;
+    RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+
+    if (domain == AF_INET) {
+      return CreateTCPAcceptBindSocketPair<sockaddr_in>(bound, connected, type,
+                                                        dual_stack);
+    }
+    return CreateTCPAcceptBindSocketPair<sockaddr_in6>(bound, connected, type,
+                                                       dual_stack);
+  };
+}
+
+Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator(
+    int domain, int type, int protocol, bool dual_stack) {
+  // These are lazily initialized below, on the first call to the returned
+  // lambda. These values are private to each returned lambda, but shared across
+  // invocations of a specific lambda.
+  //
+  // The sharing allows pairs created with the same parameters to share a
+  // listener. This prevents future connects from failing if the connecting
+  // socket selects a port which had previously been used by a listening socket
+  // that still has some connections in TIME-WAIT.
+  //
+  // The lazy initialization is to avoid creating sockets during parameter
+  // enumeration. This is important because parameters are enumerated during the
+  // build process where networking may not be available.
+  auto listener = std::make_shared<absl::optional<int>>(absl::optional<int>());
+  auto addr4 = std::make_shared<absl::optional<sockaddr_in>>(
+      absl::optional<sockaddr_in>());
+  auto addr6 = std::make_shared<absl::optional<sockaddr_in6>>(
+      absl::optional<sockaddr_in6>());
+
+  return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+    int connected;
+    RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+
+    // Share the listener across invocations.
+    if (!listener->has_value()) {
+      int fd = socket(domain, type, protocol);
+      if (fd < 0) {
+        return PosixError(errno, absl::StrCat("socket(", domain, ", ", type,
+                                              ", ", protocol, ")"));
+      }
+      listener->emplace(fd);
+      MaybeSave();  // Successful socket creation.
+    }
+
+    // Bind the listener once, but create a new connect/accept pair each
+    // time.
+    if (domain == AF_INET) {
+      if (!addr4->has_value()) {
+        addr4->emplace(
+            TCPBindAndListen<sockaddr_in>(listener->value(), dual_stack)
+                .ValueOrDie());
+      }
+      return CreateTCPConnectAcceptSocketPair(listener->value(), connected,
+                                              type, dual_stack, addr4->value());
+    }
+    if (!addr6->has_value()) {
+      addr6->emplace(
+          TCPBindAndListen<sockaddr_in6>(listener->value(), dual_stack)
+              .ValueOrDie());
+    }
+    return CreateTCPConnectAcceptSocketPair(listener->value(), connected, type,
+                                            dual_stack, addr6->value());
+  };
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateUDPBoundSocketPair(
+    int sock1, int sock2, int type, bool dual_stack) {
+  ASSIGN_OR_RETURN_ERRNO(T addr1, BindIP<T>(sock1, dual_stack));
+  ASSIGN_OR_RETURN_ERRNO(T addr2, BindIP<T>(sock2, dual_stack));
+
+  return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2);
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>>
+CreateUDPBidirectionalBindSocketPair(int sock1, int sock2, int type,
+                                     bool dual_stack) {
+  ASSIGN_OR_RETURN_ERRNO(
+      auto socks, CreateUDPBoundSocketPair<T>(sock1, sock2, type, dual_stack));
+
+  // Connect sock1 to sock2.
+  RETURN_ERROR_IF_SYSCALL_FAIL(connect(socks->first_fd(), socks->second_addr(),
+                                       socks->second_addr_size()));
+  MaybeSave();  // Successful connection.
+
+  // Connect sock2 to sock1.
+  RETURN_ERROR_IF_SYSCALL_FAIL(connect(socks->second_fd(), socks->first_addr(),
+                                       socks->first_addr_size()));
+  MaybeSave();  // Successful connection.
+
+  return socks;
+}
+
+Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type,
+                                                          int protocol,
+                                                          bool dual_stack) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+    int sock1;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+
+    int sock2;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+
+    if (domain == AF_INET) {
+      return CreateUDPBidirectionalBindSocketPair<sockaddr_in>(
+          sock1, sock2, type, dual_stack);
+    }
+    return CreateUDPBidirectionalBindSocketPair<sockaddr_in6>(sock1, sock2,
+                                                              type, dual_stack);
+  };
+}
+
+Creator<SocketPair> UDPUnboundSocketPairCreator(int domain, int type,
+                                                int protocol, bool dual_stack) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+    int sock1;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+
+    int sock2;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+
+    return absl::make_unique<FDSocketPair>(sock1, sock2);
+  };
+}
+
+SocketPairKind Reversed(SocketPairKind const& base) {
+  auto const& creator = base.creator;
+  return SocketPairKind{
+      absl::StrCat("reversed ", base.description), base.domain, base.type,
+      base.protocol,
+      [creator]() -> PosixErrorOr<std::unique_ptr<ReversedSocketPair>> {
+        ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator());
+        return absl::make_unique<ReversedSocketPair>(std::move(creator_value));
+      }};
+}
+
+Creator<FileDescriptor> UnboundSocketCreator(int domain, int type,
+                                             int protocol) {
+  return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> {
+    int sock;
+    RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol));
+    MaybeSave();  // Successful socket creation.
+
+    return absl::make_unique<FileDescriptor>(sock);
+  };
+}
+
+std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec) {
+  return ApplyVecToVec<SocketPairKind>(std::vector<Middleware>{NoOp, Reversed},
+                                       vec);
+}
+
+SocketPairKind NoOp(SocketPairKind const& base) { return base; }
+
+void TransferTest(int fd1, int fd2) {
+  char buf1[20];
+  RandomizeBuffer(buf1, sizeof(buf1));
+  ASSERT_THAT(WriteFd(fd1, buf1, sizeof(buf1)),
+              SyscallSucceedsWithValue(sizeof(buf1)));
+
+  char buf2[20];
+  ASSERT_THAT(ReadFd(fd2, buf2, sizeof(buf2)),
+              SyscallSucceedsWithValue(sizeof(buf2)));
+
+  EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1)));
+
+  RandomizeBuffer(buf1, sizeof(buf1));
+  ASSERT_THAT(WriteFd(fd2, buf1, sizeof(buf1)),
+              SyscallSucceedsWithValue(sizeof(buf1)));
+
+  ASSERT_THAT(ReadFd(fd1, buf2, sizeof(buf2)),
+              SyscallSucceedsWithValue(sizeof(buf2)));
+
+  EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1)));
+}
+
+// Initializes the given buffer with random data.
+void RandomizeBuffer(char* ptr, size_t len) {
+  uint32_t seed = time(nullptr);
+  for (size_t i = 0; i < len; ++i) {
+    ptr[i] = static_cast<char>(rand_r(&seed));
+  }
+}
+
+size_t CalculateUnixSockAddrLen(const char* sun_path) {
+  // Abstract addresses always return the full length.
+  if (sun_path[0] == 0) {
+    return sizeof(sockaddr_un);
+  }
+  // Filesystem addresses use the address length plus the 2 byte sun_family
+  // and null terminator.
+  return strlen(sun_path) + 3;
+}
+
+struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_un& addr) {
+  struct sockaddr_storage addr_storage = {};
+  memcpy(&addr_storage, &addr, sizeof(addr));
+  return addr_storage;
+}
+
+struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in& addr) {
+  struct sockaddr_storage addr_storage = {};
+  memcpy(&addr_storage, &addr, sizeof(addr));
+  return addr_storage;
+}
+
+struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in6& addr) {
+  struct sockaddr_storage addr_storage = {};
+  memcpy(&addr_storage, &addr, sizeof(addr));
+  return addr_storage;
+}
+
+SocketKind SimpleSocket(int fam, int type, int proto) {
+  return SocketKind{
+      absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto), fam,
+      type, proto, SyscallSocketCreator(fam, type, proto)};
+}
+
+ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets,
+                         size_t size, bool reader) {
+  const int rfd = sockets->second_fd();
+  ScopedThread t([rfd, size, reader] {
+    if (!reader) {
+      return;
+    }
+
+    // Potentially too many syscalls in the loop.
+    const DisableSave ds;
+
+    std::vector<char> buf(size);
+    size_t total = 0;
+
+    while (total < size) {
+      int ret = read(rfd, buf.data(), buf.size());
+      if (ret == -1 && errno == EAGAIN) {
+        continue;
+      }
+      if (ret > 0) {
+        total += ret;
+      }
+
+      // Assert to return on first failure.
+      ASSERT_THAT(ret, SyscallSucceeds());
+    }
+  });
+
+  std::vector<char> buf(size);
+
+  struct iovec iov = {};
+  iov.iov_base = buf.data();
+  iov.iov_len = buf.size();
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  return RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0);
+}
+
+namespace internal {
+PosixErrorOr<int> TryPortAvailable(int port, AddressFamily family,
+                                   SocketType type, bool reuse_addr) {
+  if (port < 0) {
+    return PosixError(EINVAL, "Invalid port");
+  }
+
+  // Both Ipv6 and Dualstack are AF_INET6.
+  int sock_fam = (family == AddressFamily::kIpv4 ? AF_INET : AF_INET6);
+  int sock_type = (type == SocketType::kTcp ? SOCK_STREAM : SOCK_DGRAM);
+  ASSIGN_OR_RETURN_ERRNO(auto fd, Socket(sock_fam, sock_type, 0));
+
+  if (reuse_addr) {
+    int one = 1;
+    RETURN_ERROR_IF_SYSCALL_FAIL(
+        setsockopt(fd.get(), SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)));
+  }
+
+  // Try to bind.
+  sockaddr_storage storage = {};
+  int storage_size = 0;
+  if (family == AddressFamily::kIpv4) {
+    sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&storage);
+    storage_size = sizeof(*addr);
+    addr->sin_family = AF_INET;
+    addr->sin_port = htons(port);
+    addr->sin_addr.s_addr = htonl(INADDR_ANY);
+  } else {
+    sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&storage);
+    storage_size = sizeof(*addr);
+    addr->sin6_family = AF_INET6;
+    addr->sin6_port = htons(port);
+    if (family == AddressFamily::kDualStack) {
+      inet_pton(AF_INET6, "::ffff:0.0.0.0",
+                reinterpret_cast<void*>(&addr->sin6_addr.s6_addr));
+    } else {
+      addr->sin6_addr = in6addr_any;
+    }
+  }
+
+  RETURN_ERROR_IF_SYSCALL_FAIL(
+      bind(fd.get(), reinterpret_cast<sockaddr*>(&storage), storage_size));
+
+  // If the user specified 0 as the port, we will return the port that the
+  // kernel gave us, otherwise we will validate that this socket bound to the
+  // requested port.
+  sockaddr_storage bound_storage = {};
+  socklen_t bound_storage_size = sizeof(bound_storage);
+  RETURN_ERROR_IF_SYSCALL_FAIL(
+      getsockname(fd.get(), reinterpret_cast<sockaddr*>(&bound_storage),
+                  &bound_storage_size));
+
+  int available_port = -1;
+  if (bound_storage.ss_family == AF_INET) {
+    sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&bound_storage);
+    available_port = ntohs(addr->sin_port);
+  } else if (bound_storage.ss_family == AF_INET6) {
+    sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&bound_storage);
+    available_port = ntohs(addr->sin6_port);
+  } else {
+    return PosixError(EPROTOTYPE, "Getsockname returned invalid family");
+  }
+
+  // If we requested a specific port make sure our bound port is that port.
+  if (port != 0 && available_port != port) {
+    return PosixError(EINVAL,
+                      absl::StrCat("Bound port ", available_port,
+                                   " was not equal to requested port ", port));
+  }
+
+  // If we're trying to do a TCP socket, let's also try to listen.
+  if (type == SocketType::kTcp) {
+    RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd.get(), 1));
+  }
+
+  return available_port;
+}
+}  // namespace internal
+
+PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size) {
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = buf_size;
+  msg->msg_iov = &iov;
+  msg->msg_iovlen = 1;
+
+  int ret;
+  RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(sendmsg)(sock, msg, 0));
+  return ret;
+}
+
+void RecvNoData(int sock) {
+  char data = 0;
+  struct iovec iov;
+  iov.iov_base = &data;
+  iov.iov_len = 1;
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TestAddress V4Any() {
+  TestAddress t("V4Any");
+  t.addr.ss_family = AF_INET;
+  t.addr_len = sizeof(sockaddr_in);
+  reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = htonl(INADDR_ANY);
+  return t;
+}
+
+TestAddress V4Loopback() {
+  TestAddress t("V4Loopback");
+  t.addr.ss_family = AF_INET;
+  t.addr_len = sizeof(sockaddr_in);
+  reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr =
+      htonl(INADDR_LOOPBACK);
+  return t;
+}
+
+TestAddress V4MappedAny() {
+  TestAddress t("V4MappedAny");
+  t.addr.ss_family = AF_INET6;
+  t.addr_len = sizeof(sockaddr_in6);
+  inet_pton(AF_INET6, "::ffff:0.0.0.0",
+            reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr.s6_addr);
+  return t;
+}
+
+TestAddress V4MappedLoopback() {
+  TestAddress t("V4MappedLoopback");
+  t.addr.ss_family = AF_INET6;
+  t.addr_len = sizeof(sockaddr_in6);
+  inet_pton(AF_INET6, "::ffff:127.0.0.1",
+            reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr.s6_addr);
+  return t;
+}
+
+TestAddress V4Multicast() {
+  TestAddress t("V4Multicast");
+  t.addr.ss_family = AF_INET;
+  t.addr_len = sizeof(sockaddr_in);
+  reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr =
+      inet_addr(kMulticastAddress);
+  return t;
+}
+
+TestAddress V4Broadcast() {
+  TestAddress t("V4Broadcast");
+  t.addr.ss_family = AF_INET;
+  t.addr_len = sizeof(sockaddr_in);
+  reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr =
+      htonl(INADDR_BROADCAST);
+  return t;
+}
+
+TestAddress V6Any() {
+  TestAddress t("V6Any");
+  t.addr.ss_family = AF_INET6;
+  t.addr_len = sizeof(sockaddr_in6);
+  reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = in6addr_any;
+  return t;
+}
+
+TestAddress V6Loopback() {
+  TestAddress t("V6Loopback");
+  t.addr.ss_family = AF_INET6;
+  t.addr_len = sizeof(sockaddr_in6);
+  reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = in6addr_loopback;
+  return t;
+}
+
+// Checksum computes the internet checksum of a buffer.
+uint16_t Checksum(uint16_t* buf, ssize_t buf_size) {
+  // Add up the 16-bit values in the buffer.
+  uint32_t total = 0;
+  for (unsigned int i = 0; i < buf_size; i += sizeof(*buf)) {
+    total += *buf;
+    buf++;
+  }
+
+  // If buf has an odd size, add the remaining byte.
+  if (buf_size % 2) {
+    total += *(reinterpret_cast<unsigned char*>(buf) - 1);
+  }
+
+  // This carries any bits past the lower 16 until everything fits in 16 bits.
+  while (total >> 16) {
+    uint16_t lower = total & 0xffff;
+    uint16_t upper = total >> 16;
+    total = lower + upper;
+  }
+
+  return ~total;
+}
+
+uint16_t IPChecksum(struct iphdr ip) {
+  return Checksum(reinterpret_cast<uint16_t*>(&ip), sizeof(ip));
+}
+
+// The pseudo-header defined in RFC 768 for calculating the UDP checksum.
+struct udp_pseudo_hdr {
+  uint32_t srcip;
+  uint32_t destip;
+  char zero;
+  char protocol;
+  uint16_t udplen;
+};
+
+uint16_t UDPChecksum(struct iphdr iphdr, struct udphdr udphdr,
+                     const char* payload, ssize_t payload_len) {
+  struct udp_pseudo_hdr phdr = {};
+  phdr.srcip = iphdr.saddr;
+  phdr.destip = iphdr.daddr;
+  phdr.zero = 0;
+  phdr.protocol = IPPROTO_UDP;
+  phdr.udplen = udphdr.len;
+
+  ssize_t buf_size = sizeof(phdr) + sizeof(udphdr) + payload_len;
+  char* buf = static_cast<char*>(malloc(buf_size));
+  memcpy(buf, &phdr, sizeof(phdr));
+  memcpy(buf + sizeof(phdr), &udphdr, sizeof(udphdr));
+  memcpy(buf + sizeof(phdr) + sizeof(udphdr), payload, payload_len);
+
+  uint16_t csum = Checksum(reinterpret_cast<uint16_t*>(buf), buf_size);
+  free(buf);
+  return csum;
+}
+
+uint16_t ICMPChecksum(struct icmphdr icmphdr, const char* payload,
+                      ssize_t payload_len) {
+  ssize_t buf_size = sizeof(icmphdr) + payload_len;
+  char* buf = static_cast<char*>(malloc(buf_size));
+  memcpy(buf, &icmphdr, sizeof(icmphdr));
+  memcpy(buf + sizeof(icmphdr), payload, payload_len);
+
+  uint16_t csum = Checksum(reinterpret_cast<uint16_t*>(buf), buf_size);
+  free(buf);
+  return csum;
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h
new file mode 100644
index 000000000..734b48b96
--- /dev/null
+++ b/test/syscalls/linux/socket_test_util.h
@@ -0,0 +1,518 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_
+
+#include <errno.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/udp.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_format.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Wrapper for socket(2) that returns a FileDescriptor.
+inline PosixErrorOr<FileDescriptor> Socket(int family, int type, int protocol) {
+  int fd = socket(family, type, protocol);
+  MaybeSave();
+  if (fd < 0) {
+    return PosixError(
+        errno, absl::StrFormat("socket(%d, %d, %d)", family, type, protocol));
+  }
+  return FileDescriptor(fd);
+}
+
+// Wrapper for accept(2) that returns a FileDescriptor.
+inline PosixErrorOr<FileDescriptor> Accept(int sockfd, sockaddr* addr,
+                                           socklen_t* addrlen) {
+  int fd = RetryEINTR(accept)(sockfd, addr, addrlen);
+  MaybeSave();
+  if (fd < 0) {
+    return PosixError(
+        errno, absl::StrFormat("accept(%d, %p, %p)", sockfd, addr, addrlen));
+  }
+  return FileDescriptor(fd);
+}
+
+// Wrapper for accept4(2) that returns a FileDescriptor.
+inline PosixErrorOr<FileDescriptor> Accept4(int sockfd, sockaddr* addr,
+                                            socklen_t* addrlen, int flags) {
+  int fd = RetryEINTR(accept4)(sockfd, addr, addrlen, flags);
+  MaybeSave();
+  if (fd < 0) {
+    return PosixError(errno, absl::StrFormat("accept4(%d, %p, %p, %#x)", sockfd,
+                                             addr, addrlen, flags));
+  }
+  return FileDescriptor(fd);
+}
+
+inline ssize_t SendFd(int fd, void* buf, size_t count, int flags) {
+  return internal::ApplyFileIoSyscall(
+      [&](size_t completed) {
+        return sendto(fd, static_cast<char*>(buf) + completed,
+                      count - completed, flags, nullptr, 0);
+      },
+      count);
+}
+
+PosixErrorOr<struct sockaddr_un> UniqueUnixAddr(bool abstract, int domain);
+
+// A Creator<T> is a function that attempts to create and return a new T. (This
+// is copy/pasted from cloud/gvisor/api/sandbox_util.h and is just duplicated
+// here for clarity.)
+template <typename T>
+using Creator = std::function<PosixErrorOr<std::unique_ptr<T>>()>;
+
+// A SocketPair represents a pair of socket file descriptors owned by the
+// SocketPair.
+class SocketPair {
+ public:
+  virtual ~SocketPair() = default;
+
+  virtual int first_fd() const = 0;
+  virtual int second_fd() const = 0;
+  virtual int release_first_fd() = 0;
+  virtual int release_second_fd() = 0;
+  virtual const struct sockaddr* first_addr() const = 0;
+  virtual const struct sockaddr* second_addr() const = 0;
+  virtual size_t first_addr_size() const = 0;
+  virtual size_t second_addr_size() const = 0;
+  virtual size_t first_addr_len() const = 0;
+  virtual size_t second_addr_len() const = 0;
+};
+
+// A FDSocketPair is a SocketPair that consists of only a pair of file
+// descriptors.
+class FDSocketPair : public SocketPair {
+ public:
+  FDSocketPair(int first_fd, int second_fd)
+      : first_(first_fd), second_(second_fd) {}
+  FDSocketPair(std::unique_ptr<FileDescriptor> first_fd,
+               std::unique_ptr<FileDescriptor> second_fd)
+      : first_(first_fd->release()), second_(second_fd->release()) {}
+
+  int first_fd() const override { return first_.get(); }
+  int second_fd() const override { return second_.get(); }
+  int release_first_fd() override { return first_.release(); }
+  int release_second_fd() override { return second_.release(); }
+  const struct sockaddr* first_addr() const override { return nullptr; }
+  const struct sockaddr* second_addr() const override { return nullptr; }
+  size_t first_addr_size() const override { return 0; }
+  size_t second_addr_size() const override { return 0; }
+  size_t first_addr_len() const override { return 0; }
+  size_t second_addr_len() const override { return 0; }
+
+ private:
+  FileDescriptor first_;
+  FileDescriptor second_;
+};
+
+// CalculateUnixSockAddrLen calculates the length returned by recvfrom(2) and
+// recvmsg(2) for Unix sockets.
+size_t CalculateUnixSockAddrLen(const char* sun_path);
+
+// A AddrFDSocketPair is a SocketPair that consists of a pair of file
+// descriptors in addition to a pair of socket addresses.
+class AddrFDSocketPair : public SocketPair {
+ public:
+  AddrFDSocketPair(int first_fd, int second_fd,
+                   const struct sockaddr_un& first_address,
+                   const struct sockaddr_un& second_address)
+      : first_(first_fd),
+        second_(second_fd),
+        first_addr_(to_storage(first_address)),
+        second_addr_(to_storage(second_address)),
+        first_len_(CalculateUnixSockAddrLen(first_address.sun_path)),
+        second_len_(CalculateUnixSockAddrLen(second_address.sun_path)),
+        first_size_(sizeof(first_address)),
+        second_size_(sizeof(second_address)) {}
+
+  AddrFDSocketPair(int first_fd, int second_fd,
+                   const struct sockaddr_in& first_address,
+                   const struct sockaddr_in& second_address)
+      : first_(first_fd),
+        second_(second_fd),
+        first_addr_(to_storage(first_address)),
+        second_addr_(to_storage(second_address)),
+        first_len_(sizeof(first_address)),
+        second_len_(sizeof(second_address)),
+        first_size_(sizeof(first_address)),
+        second_size_(sizeof(second_address)) {}
+
+  AddrFDSocketPair(int first_fd, int second_fd,
+                   const struct sockaddr_in6& first_address,
+                   const struct sockaddr_in6& second_address)
+      : first_(first_fd),
+        second_(second_fd),
+        first_addr_(to_storage(first_address)),
+        second_addr_(to_storage(second_address)),
+        first_len_(sizeof(first_address)),
+        second_len_(sizeof(second_address)),
+        first_size_(sizeof(first_address)),
+        second_size_(sizeof(second_address)) {}
+
+  int first_fd() const override { return first_.get(); }
+  int second_fd() const override { return second_.get(); }
+  int release_first_fd() override { return first_.release(); }
+  int release_second_fd() override { return second_.release(); }
+  const struct sockaddr* first_addr() const override {
+    return reinterpret_cast<const struct sockaddr*>(&first_addr_);
+  }
+  const struct sockaddr* second_addr() const override {
+    return reinterpret_cast<const struct sockaddr*>(&second_addr_);
+  }
+  size_t first_addr_size() const override { return first_size_; }
+  size_t second_addr_size() const override { return second_size_; }
+  size_t first_addr_len() const override { return first_len_; }
+  size_t second_addr_len() const override { return second_len_; }
+
+ private:
+  // to_storage coverts a sockaddr_* to a sockaddr_storage.
+  static struct sockaddr_storage to_storage(const sockaddr_un& addr);
+  static struct sockaddr_storage to_storage(const sockaddr_in& addr);
+  static struct sockaddr_storage to_storage(const sockaddr_in6& addr);
+
+  FileDescriptor first_;
+  FileDescriptor second_;
+  const struct sockaddr_storage first_addr_;
+  const struct sockaddr_storage second_addr_;
+  const size_t first_len_;
+  const size_t second_len_;
+  const size_t first_size_;
+  const size_t second_size_;
+};
+
+// SyscallSocketPairCreator returns a Creator<SocketPair> that obtains file
+// descriptors by invoking the socketpair() syscall.
+Creator<SocketPair> SyscallSocketPairCreator(int domain, int type,
+                                             int protocol);
+
+// SyscallSocketCreator returns a Creator<FileDescriptor> that obtains a file
+// descriptor by invoking the socket() syscall.
+Creator<FileDescriptor> SyscallSocketCreator(int domain, int type,
+                                             int protocol);
+
+// FilesystemBidirectionalBindSocketPairCreator returns a Creator<SocketPair>
+// that obtains file descriptors by invoking the bind() and connect() syscalls
+// on filesystem paths. Only works for DGRAM sockets.
+Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain,
+                                                                 int type,
+                                                                 int protocol);
+
+// AbstractBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by invoking the bind() and connect() syscalls on
+// abstract namespace paths. Only works for DGRAM sockets.
+Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain,
+                                                               int type,
+                                                               int protocol);
+
+// SocketpairGoferSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by connect() syscalls on two sockets with socketpair
+// gofer paths.
+Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type,
+                                                     int protocol);
+
+// SocketpairGoferFileSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by open() syscalls on socketpair gofer paths.
+Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags);
+
+// FilesystemAcceptBindSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by invoking the accept() and bind() syscalls on
+// a filesystem path. Only works for STREAM and SEQPACKET sockets.
+Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type,
+                                                          int protocol);
+
+// AbstractAcceptBindSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by invoking the accept() and bind() syscalls on a
+// abstract namespace path. Only works for STREAM and SEQPACKET sockets.
+Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type,
+                                                        int protocol);
+
+// FilesystemUnboundSocketPairCreator returns a Creator<SocketPair> that obtains
+// file descriptors by invoking the socket() syscall and generates a filesystem
+// path for binding.
+Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type,
+                                                       int protocol);
+
+// AbstractUnboundSocketPairCreator returns a Creator<SocketPair> that obtains
+// file descriptors by invoking the socket() syscall and generates an abstract
+// path for binding.
+Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type,
+                                                     int protocol);
+
+// TCPAcceptBindSocketPairCreator returns a Creator<SocketPair> that obtains
+// file descriptors by invoking the accept() and bind() syscalls on TCP sockets.
+Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type,
+                                                   int protocol,
+                                                   bool dual_stack);
+
+// TCPAcceptBindPersistentListenerSocketPairCreator is like
+// TCPAcceptBindSocketPairCreator, except it uses the same listening socket to
+// create all SocketPairs.
+Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator(
+    int domain, int type, int protocol, bool dual_stack);
+
+// UDPBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by invoking the bind() and connect() syscalls on UDP
+// sockets.
+Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type,
+                                                          int protocol,
+                                                          bool dual_stack);
+
+// UDPUnboundSocketPairCreator returns a Creator<SocketPair> that obtains file
+// descriptors by creating UDP sockets.
+Creator<SocketPair> UDPUnboundSocketPairCreator(int domain, int type,
+                                                int protocol, bool dual_stack);
+
+// UnboundSocketCreator returns a Creator<FileDescriptor> that obtains a file
+// descriptor by creating a socket.
+Creator<FileDescriptor> UnboundSocketCreator(int domain, int type,
+                                             int protocol);
+
+// A SocketPairKind couples a human-readable description of a socket pair with
+// a function that creates such a socket pair.
+struct SocketPairKind {
+  std::string description;
+  int domain;
+  int type;
+  int protocol;
+  Creator<SocketPair> creator;
+
+  // Create creates a socket pair of this kind.
+  PosixErrorOr<std::unique_ptr<SocketPair>> Create() const { return creator(); }
+};
+
+// A SocketKind couples a human-readable description of a socket with
+// a function that creates such a socket.
+struct SocketKind {
+  std::string description;
+  int domain;
+  int type;
+  int protocol;
+  Creator<FileDescriptor> creator;
+
+  // Create creates a socket pair of this kind.
+  PosixErrorOr<std::unique_ptr<FileDescriptor>> Create() const {
+    return creator();
+  }
+};
+
+// A ReversedSocketPair wraps another SocketPair but flips the first and second
+// file descriptors. ReversedSocketPair is used to test socket pairs that
+// should be symmetric.
+class ReversedSocketPair : public SocketPair {
+ public:
+  explicit ReversedSocketPair(std::unique_ptr<SocketPair> base)
+      : base_(std::move(base)) {}
+
+  int first_fd() const override { return base_->second_fd(); }
+  int second_fd() const override { return base_->first_fd(); }
+  int release_first_fd() override { return base_->release_second_fd(); }
+  int release_second_fd() override { return base_->release_first_fd(); }
+  const struct sockaddr* first_addr() const override {
+    return base_->second_addr();
+  }
+  const struct sockaddr* second_addr() const override {
+    return base_->first_addr();
+  }
+  size_t first_addr_size() const override { return base_->second_addr_size(); }
+  size_t second_addr_size() const override { return base_->first_addr_size(); }
+  size_t first_addr_len() const override { return base_->second_addr_len(); }
+  size_t second_addr_len() const override { return base_->first_addr_len(); }
+
+ private:
+  std::unique_ptr<SocketPair> base_;
+};
+
+// Reversed returns a SocketPairKind that represents SocketPairs created by
+// flipping the file descriptors provided by another SocketPair.
+SocketPairKind Reversed(SocketPairKind const& base);
+
+// IncludeReversals returns a vector<SocketPairKind> that returns all
+// SocketPairKinds in `vec` as well as all SocketPairKinds obtained by flipping
+// the file descriptors provided by the kinds in `vec`.
+std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec);
+
+// A Middleware is a function wraps a SocketPairKind.
+using Middleware = std::function<SocketPairKind(SocketPairKind)>;
+
+// Reversed returns a SocketPairKind that represents SocketPairs created by
+// flipping the file descriptors provided by another SocketPair.
+template <typename T>
+Middleware SetSockOpt(int level, int optname, T* value) {
+  return [=](SocketPairKind const& base) {
+    auto const& creator = base.creator;
+    return SocketPairKind{
+        absl::StrCat("setsockopt(", level, ", ", optname, ", ", *value, ") ",
+                     base.description),
+        base.domain, base.type, base.protocol,
+        [creator, level, optname,
+         value]() -> PosixErrorOr<std::unique_ptr<SocketPair>> {
+          ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator());
+          if (creator_value->first_fd() >= 0) {
+            RETURN_ERROR_IF_SYSCALL_FAIL(setsockopt(
+                creator_value->first_fd(), level, optname, value, sizeof(T)));
+          }
+          if (creator_value->second_fd() >= 0) {
+            RETURN_ERROR_IF_SYSCALL_FAIL(setsockopt(
+                creator_value->second_fd(), level, optname, value, sizeof(T)));
+          }
+          return creator_value;
+        }};
+  };
+}
+
+constexpr int kSockOptOn = 1;
+constexpr int kSockOptOff = 0;
+
+// NoOp returns the same SocketPairKind that it is passed.
+SocketPairKind NoOp(SocketPairKind const& base);
+
+// TransferTest tests that data can be send back and fourth between two
+// specified FDs. Note that calls to this function should be wrapped in
+// ASSERT_NO_FATAL_FAILURE().
+void TransferTest(int fd1, int fd2);
+
+// Fills [buf, buf+len) with random bytes.
+void RandomizeBuffer(char* buf, size_t len);
+
+// Base test fixture for tests that operate on pairs of connected sockets.
+class SocketPairTest : public ::testing::TestWithParam<SocketPairKind> {
+ protected:
+  SocketPairTest() {
+    // gUnit uses printf, so so will we.
+    printf("Testing with %s\n", GetParam().description.c_str());
+    fflush(stdout);
+  }
+
+  PosixErrorOr<std::unique_ptr<SocketPair>> NewSocketPair() const {
+    return GetParam().Create();
+  }
+};
+
+// Base test fixture for tests that operate on simple Sockets.
+class SimpleSocketTest : public ::testing::TestWithParam<SocketKind> {
+ protected:
+  SimpleSocketTest() {
+    // gUnit uses printf, so so will we.
+    printf("Testing with %s\n", GetParam().description.c_str());
+  }
+
+  PosixErrorOr<std::unique_ptr<FileDescriptor>> NewSocket() const {
+    return GetParam().Create();
+  }
+};
+
+SocketKind SimpleSocket(int fam, int type, int proto);
+
+// Send a buffer of size 'size' to sockets->first_fd(), returning the result of
+// sendmsg.
+//
+// If reader, read from second_fd() until size bytes have been read.
+ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets,
+                         size_t size, bool reader);
+
+// Initializes the given buffer with random data.
+void RandomizeBuffer(char* ptr, size_t len);
+
+enum class AddressFamily { kIpv4 = 1, kIpv6 = 2, kDualStack = 3 };
+enum class SocketType { kUdp = 1, kTcp = 2 };
+
+// Returns a PosixError or a port that is available. If 0 is specified as the
+// port it will bind port 0 (and allow the kernel to select any free port).
+// Otherwise, it will try to bind the specified port and validate that it can be
+// used for the requested family and socket type. The final option is
+// reuse_addr. This specifies whether SO_REUSEADDR should be applied before a
+// bind(2) attempt. SO_REUSEADDR means that sockets in TIME_WAIT states or other
+// bound UDP sockets would not cause an error on bind(2). This option should be
+// set if subsequent calls to bind on the returned port will also use
+// SO_REUSEADDR.
+//
+// Note: That this test will attempt to bind the ANY address for the respective
+// protocol.
+PosixErrorOr<int> PortAvailable(int port, AddressFamily family, SocketType type,
+                                bool reuse_addr);
+
+// FreeAvailablePort is used to return a port that was obtained by using
+// the PortAvailable helper with port 0.
+PosixError FreeAvailablePort(int port);
+
+// SendMsg converts a buffer to an iovec and adds it to msg before sending it.
+PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size);
+
+// RecvNoData checks that no data is receivable on sock.
+void RecvNoData(int sock);
+
+// Base test fixture for tests that apply to all kinds of pairs of connected
+// sockets.
+using AllSocketPairTest = SocketPairTest;
+
+struct TestAddress {
+  std::string description;
+  sockaddr_storage addr;
+  socklen_t addr_len;
+
+  int family() const { return addr.ss_family; }
+  explicit TestAddress(std::string description = "")
+      : description(std::move(description)), addr(), addr_len() {}
+};
+
+constexpr char kMulticastAddress[] = "224.0.2.1";
+constexpr char kBroadcastAddress[] = "255.255.255.255";
+
+TestAddress V4Any();
+TestAddress V4Broadcast();
+TestAddress V4Loopback();
+TestAddress V4MappedAny();
+TestAddress V4MappedLoopback();
+TestAddress V4Multicast();
+TestAddress V6Any();
+TestAddress V6Loopback();
+
+// Compute the internet checksum of an IP header.
+uint16_t IPChecksum(struct iphdr ip);
+
+// Compute the internet checksum of a UDP header.
+uint16_t UDPChecksum(struct iphdr iphdr, struct udphdr udphdr,
+                     const char* payload, ssize_t payload_len);
+
+// Compute the internet checksum of an ICMP header.
+uint16_t ICMPChecksum(struct icmphdr icmphdr, const char* payload,
+                      ssize_t payload_len);
+
+namespace internal {
+PosixErrorOr<int> TryPortAvailable(int port, AddressFamily family,
+                                   SocketType type, bool reuse_addr);
+}  // namespace internal
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_
diff --git a/test/syscalls/linux/socket_test_util_impl.cc b/test/syscalls/linux/socket_test_util_impl.cc
new file mode 100644
index 000000000..ef661a0e3
--- /dev/null
+++ b/test/syscalls/linux/socket_test_util_impl.cc
@@ -0,0 +1,28 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+PosixErrorOr<int> PortAvailable(int port, AddressFamily family, SocketType type,
+                                bool reuse_addr) {
+  return internal::TryPortAvailable(port, family, type, reuse_addr);
+}
+
+PosixError FreeAvailablePort(int port) { return NoError(); }
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc
new file mode 100644
index 000000000..591cab3fd
--- /dev/null
+++ b/test/syscalls/linux/socket_unix.cc
@@ -0,0 +1,274 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_unix.h"
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+// This file contains tests specific to Unix domain sockets. It does not contain
+// tests for UDS control messages. Those belong in socket_unix_cmsg.cc.
+//
+// This file is a generic socket test file. It must be built with another file
+// that provides the test types.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(UnixSocketPairTest, InvalidGetSockOpt) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  int opt;
+  socklen_t optlen = sizeof(opt);
+  EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, -1, &opt, &optlen),
+              SyscallFailsWithErrno(ENOPROTOOPT));
+}
+
+TEST_P(UnixSocketPairTest, BindToBadName) {
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  constexpr char kBadName[] = "/some/path/that/does/not/exist";
+  sockaddr_un sockaddr;
+  sockaddr.sun_family = AF_LOCAL;
+  memcpy(sockaddr.sun_path, kBadName, sizeof(kBadName));
+
+  EXPECT_THAT(
+      bind(pair->first_fd(), reinterpret_cast<struct sockaddr*>(&sockaddr),
+           sizeof(sockaddr)),
+      SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_P(UnixSocketPairTest, BindToBadFamily) {
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  constexpr char kBadName[] = "/some/path/that/does/not/exist";
+  sockaddr_un sockaddr;
+  sockaddr.sun_family = AF_INET;
+  memcpy(sockaddr.sun_path, kBadName, sizeof(kBadName));
+
+  EXPECT_THAT(
+      bind(pair->first_fd(), reinterpret_cast<struct sockaddr*>(&sockaddr),
+           sizeof(sockaddr)),
+      SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(UnixSocketPairTest, RecvmmsgTimeoutAfterRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[10];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  char received_data[sizeof(sent_data) * 2];
+  std::vector<struct mmsghdr> msgs(2);
+  std::vector<struct iovec> iovs(msgs.size());
+  const int chunk_size = sizeof(received_data) / msgs.size();
+  for (size_t i = 0; i < msgs.size(); i++) {
+    iovs[i].iov_len = chunk_size;
+    iovs[i].iov_base = &received_data[i * chunk_size];
+    msgs[i].msg_hdr.msg_iov = &iovs[i];
+    msgs[i].msg_hdr.msg_iovlen = 1;
+  }
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  struct timespec timeout = {0, 1};
+  ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->second_fd(), &msgs[0], msgs.size(),
+                                   0, &timeout),
+              SyscallSucceedsWithValue(1));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  EXPECT_EQ(chunk_size, msgs[0].msg_len);
+}
+
+TEST_P(UnixSocketPairTest, TIOCINQSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  if (IsRunningOnGvisor()) {
+    // TODO(gvisor.dev/issue/273): Inherited host UDS don't support TIOCINQ.
+    // Skip the test.
+    int size = -1;
+    int ret = ioctl(sockets->first_fd(), TIOCINQ, &size);
+    SKIP_IF(ret == -1 && errno == ENOTTY);
+  }
+
+  int size = -1;
+  EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds());
+  EXPECT_EQ(size, 0);
+
+  const char some_data[] = "dangerzone";
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0),
+      SyscallSucceeds());
+  EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds());
+  EXPECT_EQ(size, sizeof(some_data));
+
+  // Linux only reports the first message's size, which is wrong. We test for
+  // the behavior described in the man page.
+  SKIP_IF(!IsRunningOnGvisor());
+
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0),
+      SyscallSucceeds());
+  EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds());
+  EXPECT_EQ(size, sizeof(some_data) * 2);
+}
+
+TEST_P(UnixSocketPairTest, TIOCOUTQSucceeds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  if (IsRunningOnGvisor()) {
+    // TODO(gvisor.dev/issue/273): Inherited host UDS don't support TIOCOUTQ.
+    // Skip the test.
+    int size = -1;
+    int ret = ioctl(sockets->second_fd(), TIOCOUTQ, &size);
+    SKIP_IF(ret == -1 && errno == ENOTTY);
+  }
+
+  int size = -1;
+  EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds());
+  EXPECT_EQ(size, 0);
+
+  // Linux reports bogus numbers which are related to its internal allocations.
+  // We test for the behavior described in the man page.
+  SKIP_IF(!IsRunningOnGvisor());
+
+  const char some_data[] = "dangerzone";
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0),
+      SyscallSucceeds());
+  EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds());
+  EXPECT_EQ(size, sizeof(some_data));
+
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0),
+      SyscallSucceeds());
+  EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds());
+  EXPECT_EQ(size, sizeof(some_data) * 2);
+}
+
+TEST_P(UnixSocketPairTest, NetdeviceIoctlsSucceed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Prepare the request.
+  struct ifreq ifr;
+  snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+  // Check that the ioctl either succeeds or fails with ENODEV.
+  int err = ioctl(sockets->first_fd(), SIOCGIFINDEX, &ifr);
+  if (err < 0) {
+    ASSERT_EQ(errno, ENODEV);
+  }
+}
+
+TEST_P(UnixSocketPairTest, Shutdown) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  const std::string data = "abc";
+  ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), data.size()),
+              SyscallSucceedsWithValue(data.size()));
+
+  ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RDWR), SyscallSucceeds());
+  ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RDWR), SyscallSucceeds());
+
+  // Shutting down a socket does not clear the buffer.
+  char buf[3];
+  ASSERT_THAT(ReadFd(sockets->second_fd(), buf, data.size()),
+              SyscallSucceedsWithValue(data.size()));
+  EXPECT_EQ(data, absl::string_view(buf, data.size()));
+}
+
+TEST_P(UnixSocketPairTest, ShutdownRead) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RD), SyscallSucceeds());
+
+  // When the socket is shutdown for read, read behavior varies between
+  // different socket types. This is covered by the various ReadOneSideClosed
+  // test cases.
+
+  // ... and the peer cannot write.
+  const std::string data = "abc";
+  EXPECT_THAT(WriteFd(sockets->second_fd(), data.c_str(), data.size()),
+              SyscallFailsWithErrno(EPIPE));
+
+  // ... but the socket can still write.
+  ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), data.size()),
+              SyscallSucceedsWithValue(data.size()));
+
+  // ... and the peer can still read.
+  char buf[3];
+  EXPECT_THAT(ReadFd(sockets->second_fd(), buf, data.size()),
+              SyscallSucceedsWithValue(data.size()));
+  EXPECT_EQ(data, absl::string_view(buf, data.size()));
+}
+
+TEST_P(UnixSocketPairTest, ShutdownWrite) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_WR), SyscallSucceeds());
+
+  // When the socket is shutdown for write, it cannot write.
+  const std::string data = "abc";
+  EXPECT_THAT(WriteFd(sockets->first_fd(), data.c_str(), data.size()),
+              SyscallFailsWithErrno(EPIPE));
+
+  // ... and the peer read behavior varies between different socket types. This
+  // is covered by the various ReadOneSideClosed test cases.
+
+  // ... but the peer can still write.
+  char buf[3];
+  ASSERT_THAT(WriteFd(sockets->second_fd(), data.c_str(), data.size()),
+              SyscallSucceedsWithValue(data.size()));
+
+  // ... and the socket can still read.
+  EXPECT_THAT(ReadFd(sockets->first_fd(), buf, data.size()),
+              SyscallSucceedsWithValue(data.size()));
+  EXPECT_EQ(data, absl::string_view(buf, data.size()));
+}
+
+TEST_P(UnixSocketPairTest, SocketReopenFromProcfs) {
+  // TODO(gvisor.dev/issue/1624): In VFS1, we return EIO instead of ENXIO (see
+  // b/122310852). Remove this skip once VFS1 is deleted.
+  SKIP_IF(IsRunningWithVFS1());
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Opening a socket pair via /proc/self/fd/X is a ENXIO.
+  for (const int fd : {sockets->first_fd(), sockets->second_fd()}) {
+    ASSERT_THAT(Open(absl::StrCat("/proc/self/fd/", fd), O_WRONLY),
+                PosixErrorIs(ENXIO, ::testing::_));
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix.h b/test/syscalls/linux/socket_unix.h
new file mode 100644
index 000000000..3625cc404
--- /dev/null
+++ b/test/syscalls/linux/socket_unix.h
@@ -0,0 +1,29 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected unix sockets.
+using UnixSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_
diff --git a/test/syscalls/linux/socket_unix_abstract_nonblock.cc b/test/syscalls/linux/socket_unix_abstract_nonblock.cc
new file mode 100644
index 000000000..8bef76b67
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_abstract_nonblock.cc
@@ -0,0 +1,39 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return ApplyVec<SocketPairKind>(
+      AbstractBoundUnixDomainSocketPair,
+      AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+                             List<int>{SOCK_NONBLOCK}));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NonBlockingAbstractUnixSockets, NonBlockingSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_blocking_local.cc b/test/syscalls/linux/socket_unix_blocking_local.cc
new file mode 100644
index 000000000..77cb8c6d6
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_blocking_local.cc
@@ -0,0 +1,45 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return VecCat<SocketPairKind>(
+      ApplyVec<SocketPairKind>(
+          UnixDomainSocketPair,
+          std::vector<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM}),
+      ApplyVec<SocketPairKind>(
+          FilesystemBoundUnixDomainSocketPair,
+          std::vector<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM}),
+      ApplyVec<SocketPairKind>(
+          AbstractBoundUnixDomainSocketPair,
+          std::vector<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM}));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NonBlockingUnixDomainSockets, BlockingSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_cmsg.cc b/test/syscalls/linux/socket_unix_cmsg.cc
new file mode 100644
index 000000000..a16899493
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_cmsg.cc
@@ -0,0 +1,1501 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_unix_cmsg.h"
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+// This file contains tests for control message in Unix domain sockets.
+//
+// This file is a generic socket test file. It must be built with another file
+// that provides the test types.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(UnixSocketPairCmsgTest, BasicFDPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+  int fd = -1;
+  ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+                                       sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+TEST_P(UnixSocketPairCmsgTest, BasicTwoFDPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair1 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  auto pair2 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  int sent_fds[] = {pair1->second_fd(), pair2->second_fd()};
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendFDs(sockets->first_fd(), sent_fds, 2, sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+  int received_fds[] = {-1, -1};
+
+  ASSERT_NO_FATAL_FAILURE(RecvFDs(sockets->second_fd(), received_fds, 2,
+                                  received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[0], pair1->first_fd()));
+  ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[1], pair2->first_fd()));
+}
+
+TEST_P(UnixSocketPairCmsgTest, BasicThreeFDPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair1 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  auto pair2 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  auto pair3 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  int sent_fds[] = {pair1->second_fd(), pair2->second_fd(), pair3->second_fd()};
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendFDs(sockets->first_fd(), sent_fds, 3, sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+  int received_fds[] = {-1, -1, -1};
+
+  ASSERT_NO_FATAL_FAILURE(RecvFDs(sockets->second_fd(), received_fds, 3,
+                                  received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[0], pair1->first_fd()));
+  ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[1], pair2->first_fd()));
+  ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[2], pair3->first_fd()));
+}
+
+TEST_P(UnixSocketPairCmsgTest, BadFDPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  int sent_fd = -1;
+
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(sent_fd))];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  cmsg->cmsg_len = CMSG_LEN(sizeof(sent_fd));
+  cmsg->cmsg_level = SOL_SOCKET;
+  cmsg->cmsg_type = SCM_RIGHTS;
+  memcpy(CMSG_DATA(cmsg), &sent_fd, sizeof(sent_fd));
+
+  struct iovec iov;
+  iov.iov_base = sent_data;
+  iov.iov_len = sizeof(sent_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0),
+              SyscallFailsWithErrno(EBADF));
+}
+
+TEST_P(UnixSocketPairCmsgTest, ShortCmsg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  int sent_fd = -1;
+
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(sent_fd))];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  cmsg->cmsg_len = 1;
+  cmsg->cmsg_level = SOL_SOCKET;
+  cmsg->cmsg_type = SCM_RIGHTS;
+  memcpy(CMSG_DATA(cmsg), &sent_fd, sizeof(sent_fd));
+
+  struct iovec iov;
+  iov.iov_base = sent_data;
+  iov.iov_len = sizeof(sent_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// BasicFDPassNoSpace starts off by sending a single FD just like BasicFDPass.
+// The difference is that when calling recvmsg, no space for FDs is provided,
+// only space for the cmsg header.
+TEST_P(UnixSocketPairCmsgTest, BasicFDPassNoSpace) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+
+  struct msghdr msg = {};
+  std::vector<char> control(CMSG_SPACE(0));
+  msg.msg_control = &control[0];
+  msg.msg_controllen = control.size();
+
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(msg.msg_controllen, 0);
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+// BasicFDPassNoSpaceMsgCtrunc sends an FD, but does not provide any space to
+// receive it. It then verifies that the MSG_CTRUNC flag is set in the msghdr.
+TEST_P(UnixSocketPairCmsgTest, BasicFDPassNoSpaceMsgCtrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  std::vector<char> control(CMSG_SPACE(0));
+  msg.msg_control = &control[0];
+  msg.msg_controllen = control.size();
+
+  char received_data[sizeof(sent_data)];
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(msg.msg_controllen, 0);
+  EXPECT_EQ(msg.msg_flags, MSG_CTRUNC);
+}
+
+// BasicFDPassNullControlMsgCtrunc sends an FD and sets contradictory values for
+// msg_controllen and msg_control. msg_controllen is set to the correct size to
+// accommodate the FD, but msg_control is set to NULL. In this case, msg_control
+// should override msg_controllen.
+TEST_P(UnixSocketPairCmsgTest, BasicFDPassNullControlMsgCtrunc) {
+  // FIXME(gvisor.dev/issue/207): Fix handling of NULL msg_control.
+  SKIP_IF(IsRunningOnGvisor());
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  msg.msg_controllen = CMSG_SPACE(1);
+
+  char received_data[sizeof(sent_data)];
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(msg.msg_controllen, 0);
+  EXPECT_EQ(msg.msg_flags, MSG_CTRUNC);
+}
+
+// BasicFDPassNotEnoughSpaceMsgCtrunc sends an FD, but does not provide enough
+// space to receive it. It then verifies that the MSG_CTRUNC flag is set in the
+// msghdr.
+TEST_P(UnixSocketPairCmsgTest, BasicFDPassNotEnoughSpaceMsgCtrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  std::vector<char> control(CMSG_SPACE(0) + 1);
+  msg.msg_control = &control[0];
+  msg.msg_controllen = control.size();
+
+  char received_data[sizeof(sent_data)];
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(msg.msg_controllen, 0);
+  EXPECT_EQ(msg.msg_flags, MSG_CTRUNC);
+}
+
+// BasicThreeFDPassTruncationMsgCtrunc sends three FDs, but only provides enough
+// space to receive two of them. It then verifies that the MSG_CTRUNC flag is
+// set in the msghdr.
+TEST_P(UnixSocketPairCmsgTest, BasicThreeFDPassTruncationMsgCtrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair1 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  auto pair2 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  auto pair3 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  int sent_fds[] = {pair1->second_fd(), pair2->second_fd(), pair3->second_fd()};
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendFDs(sockets->first_fd(), sent_fds, 3, sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  std::vector<char> control(CMSG_SPACE(2 * sizeof(int)));
+  msg.msg_control = &control[0];
+  msg.msg_controllen = control.size();
+
+  char received_data[sizeof(sent_data)];
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(msg.msg_flags, MSG_CTRUNC);
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(2 * sizeof(int)));
+  EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  EXPECT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+}
+
+// BasicFDPassUnalignedRecv starts off by sending a single FD just like
+// BasicFDPass. The difference is that when calling recvmsg, the length of the
+// receive data is only aligned on a 4 byte boundry instead of the normal 8.
+TEST_P(UnixSocketPairCmsgTest, BasicFDPassUnalignedRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+  int fd = -1;
+  ASSERT_NO_FATAL_FAILURE(RecvSingleFDUnaligned(
+      sockets->second_fd(), &fd, received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+// BasicFDPassUnalignedRecvNoMsgTrunc sends one FD and only provides enough
+// space to receive just it. (Normally the minimum amount of space one would
+// provide would be enough space for two FDs.) It then verifies that the
+// MSG_CTRUNC flag is not set in the msghdr.
+TEST_P(UnixSocketPairCmsgTest, BasicFDPassUnalignedRecvNoMsgTrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(int)) - sizeof(int)];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  char received_data[sizeof(sent_data)] = {};
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(msg.msg_flags, 0);
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+  EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  EXPECT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+}
+
+// BasicTwoFDPassUnalignedRecvTruncationMsgTrunc sends two FDs, but only
+// provides enough space to receive one of them. It then verifies that the
+// MSG_CTRUNC flag is set in the msghdr.
+TEST_P(UnixSocketPairCmsgTest, BasicTwoFDPassUnalignedRecvTruncationMsgTrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  int sent_fds[] = {pair->first_fd(), pair->second_fd()};
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendFDs(sockets->first_fd(), sent_fds, 2, sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  // CMSG_SPACE rounds up to two FDs, we only want one.
+  char control[CMSG_SPACE(sizeof(int)) - sizeof(int)];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  char received_data[sizeof(sent_data)] = {};
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(msg.msg_flags, MSG_CTRUNC);
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+  EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  EXPECT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+}
+
+TEST_P(UnixSocketPairCmsgTest, ConcurrentBasicFDPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  int sockfd1 = sockets->first_fd();
+  auto recv_func = [sockfd1, sent_data]() {
+    char received_data[20];
+    int fd = -1;
+    RecvSingleFD(sockfd1, &fd, received_data, sizeof(received_data));
+    ASSERT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+    char buf[20];
+    ASSERT_THAT(ReadFd(fd, buf, sizeof(buf)),
+                SyscallSucceedsWithValue(sizeof(buf)));
+    ASSERT_THAT(WriteFd(fd, buf, sizeof(buf)),
+                SyscallSucceedsWithValue(sizeof(buf)));
+  };
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->second_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  ScopedThread t(recv_func);
+
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(WriteFd(pair->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[20];
+  ASSERT_THAT(ReadFd(pair->first_fd(), received_data, sizeof(received_data)),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  t.Join();
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+// FDPassNoRecv checks that the control message can be safely ignored by using
+// read(2) instead of recvmsg(2).
+TEST_P(UnixSocketPairCmsgTest, FDPassNoRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  // Read while ignoring the passed FD.
+  char received_data[20];
+  ASSERT_THAT(
+      ReadFd(sockets->second_fd(), received_data, sizeof(received_data)),
+      SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  // Check that the socket still works for reads and writes.
+  ASSERT_NO_FATAL_FAILURE(
+      TransferTest(sockets->first_fd(), sockets->second_fd()));
+}
+
+// FDPassInterspersed1 checks that sent control messages cannot be read before
+// their associated data has been read.
+TEST_P(UnixSocketPairCmsgTest, FDPassInterspersed1) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char written_data[20];
+  RandomizeBuffer(written_data, sizeof(written_data));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), written_data, sizeof(written_data)),
+              SyscallSucceedsWithValue(sizeof(written_data)));
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  // Check that we don't get a control message, but do get the data.
+  char received_data[20];
+  RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data));
+  EXPECT_EQ(0, memcmp(written_data, received_data, sizeof(written_data)));
+}
+
+// FDPassInterspersed2 checks that sent control messages cannot be read after
+// their associated data has been read while ignoring the control message by
+// using read(2) instead of recvmsg(2).
+TEST_P(UnixSocketPairCmsgTest, FDPassInterspersed2) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  char written_data[20];
+  RandomizeBuffer(written_data, sizeof(written_data));
+  ASSERT_THAT(WriteFd(sockets->first_fd(), written_data, sizeof(written_data)),
+              SyscallSucceedsWithValue(sizeof(written_data)));
+
+  char received_data[20];
+  ASSERT_THAT(
+      ReadFd(sockets->second_fd(), received_data, sizeof(received_data)),
+      SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  ASSERT_NO_FATAL_FAILURE(
+      RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(written_data, received_data, sizeof(written_data)));
+}
+
+TEST_P(UnixSocketPairCmsgTest, FDPassNotCoalesced) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  auto pair1 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(),
+                                       sent_data1, sizeof(sent_data1)));
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  auto pair2 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(),
+                                       sent_data2, sizeof(sent_data2)));
+
+  char received_data1[sizeof(sent_data1) + sizeof(sent_data2)];
+  int received_fd1 = -1;
+
+  RecvSingleFD(sockets->second_fd(), &received_fd1, received_data1,
+               sizeof(received_data1), sizeof(sent_data1));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1)));
+  TransferTest(pair1->first_fd(), pair1->second_fd());
+
+  char received_data2[sizeof(sent_data1) + sizeof(sent_data2)];
+  int received_fd2 = -1;
+
+  RecvSingleFD(sockets->second_fd(), &received_fd2, received_data2,
+               sizeof(received_data2), sizeof(sent_data2));
+
+  EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2)));
+  TransferTest(pair2->first_fd(), pair2->second_fd());
+}
+
+TEST_P(UnixSocketPairCmsgTest, FDPassPeek) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  char peek_data[20];
+  int peek_fd = -1;
+  PeekSingleFD(sockets->second_fd(), &peek_fd, peek_data, sizeof(peek_data));
+  EXPECT_EQ(0, memcmp(sent_data, peek_data, sizeof(sent_data)));
+  TransferTest(peek_fd, pair->first_fd());
+  EXPECT_THAT(close(peek_fd), SyscallSucceeds());
+
+  char received_data[20];
+  int received_fd = -1;
+  RecvSingleFD(sockets->second_fd(), &received_fd, received_data,
+               sizeof(received_data));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+  TransferTest(received_fd, pair->first_fd());
+  EXPECT_THAT(close(received_fd), SyscallSucceeds());
+}
+
+TEST_P(UnixSocketPairCmsgTest, BasicCredPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  struct ucred sent_creds;
+
+  ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data[20];
+  struct ucred received_creds;
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+  EXPECT_EQ(sent_creds.pid, received_creds.pid);
+  EXPECT_EQ(sent_creds.uid, received_creds.uid);
+  EXPECT_EQ(sent_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairCmsgTest, SendNullCredsBeforeSoPassCredRecvEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data[20];
+  struct ucred received_creds;
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  struct ucred want_creds {
+    0, 65534, 65534
+  };
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairCmsgTest, SendNullCredsAfterSoPassCredRecvEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  SetSoPassCred(sockets->second_fd());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+  struct ucred received_creds;
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  struct ucred want_creds;
+  ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairCmsgTest, SendNullCredsBeforeSoPassCredSendEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+  SetSoPassCred(sockets->first_fd());
+
+  char received_data[20];
+  ASSERT_NO_FATAL_FAILURE(
+      RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairCmsgTest, SendNullCredsAfterSoPassCredSendEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  SetSoPassCred(sockets->first_fd());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+  ASSERT_NO_FATAL_FAILURE(
+      RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairCmsgTest,
+       SendNullCredsBeforeSoPassCredRecvEndAfterSendEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  SetSoPassCred(sockets->first_fd());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data[20];
+  struct ucred received_creds;
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  struct ucred want_creds;
+  ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairCmsgTest, WriteBeforeSoPassCredRecvEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data[20];
+
+  struct ucred received_creds;
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  struct ucred want_creds {
+    0, 65534, 65534
+  };
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairCmsgTest, WriteAfterSoPassCredRecvEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  SetSoPassCred(sockets->second_fd());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[20];
+
+  struct ucred received_creds;
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  struct ucred want_creds;
+  ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairCmsgTest, WriteBeforeSoPassCredSendEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  SetSoPassCred(sockets->first_fd());
+
+  char received_data[20];
+  ASSERT_NO_FATAL_FAILURE(
+      RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairCmsgTest, WriteAfterSoPassCredSendEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  SetSoPassCred(sockets->first_fd());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[20];
+  ASSERT_NO_FATAL_FAILURE(
+      RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairCmsgTest, WriteBeforeSoPassCredRecvEndAfterSendEnd) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  SetSoPassCred(sockets->first_fd());
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data[20];
+
+  struct ucred received_creds;
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  struct ucred want_creds;
+  ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairCmsgTest, CredPassTruncated) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  struct ucred sent_creds;
+
+  ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(0) + sizeof(pid_t)];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  char received_data[sizeof(sent_data)] = {};
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  EXPECT_EQ(msg.msg_controllen, sizeof(control));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, sizeof(control));
+  EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS);
+
+  pid_t pid = 0;
+  memcpy(&pid, CMSG_DATA(cmsg), sizeof(pid));
+  EXPECT_EQ(pid, sent_creds.pid);
+}
+
+// CredPassNoMsgCtrunc passes a full set of credentials. It then verifies that
+// receiving the full set does not result in MSG_CTRUNC being set in the msghdr.
+TEST_P(UnixSocketPairCmsgTest, CredPassNoMsgCtrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  struct ucred sent_creds;
+
+  ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(struct ucred))];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  char received_data[sizeof(sent_data)] = {};
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  // The control message should not be truncated.
+  EXPECT_EQ(msg.msg_flags, 0);
+  EXPECT_EQ(msg.msg_controllen, sizeof(control));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct ucred)));
+  EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS);
+}
+
+// CredPassNoSpaceMsgCtrunc passes a full set of credentials. It then receives
+// the data without providing space for any credentials and verifies that
+// MSG_CTRUNC is set in the msghdr.
+TEST_P(UnixSocketPairCmsgTest, CredPassNoSpaceMsgCtrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  struct ucred sent_creds;
+
+  ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(0)];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  char received_data[sizeof(sent_data)] = {};
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  // The control message should be truncated.
+  EXPECT_EQ(msg.msg_flags, MSG_CTRUNC);
+  EXPECT_EQ(msg.msg_controllen, sizeof(control));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, sizeof(control));
+  EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS);
+}
+
+// CredPassTruncatedMsgCtrunc passes a full set of credentials. It then receives
+// the data while providing enough space for only the first field of the
+// credentials and verifies that MSG_CTRUNC is set in the msghdr.
+TEST_P(UnixSocketPairCmsgTest, CredPassTruncatedMsgCtrunc) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  struct ucred sent_creds;
+
+  ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(0) + sizeof(pid_t)];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  char received_data[sizeof(sent_data)] = {};
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  // The control message should be truncated.
+  EXPECT_EQ(msg.msg_flags, MSG_CTRUNC);
+  EXPECT_EQ(msg.msg_controllen, sizeof(control));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, sizeof(control));
+  EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS);
+}
+
+TEST_P(UnixSocketPairCmsgTest, SoPassCred) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int opt;
+  socklen_t optLen = sizeof(opt);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+      SyscallSucceeds());
+  EXPECT_FALSE(opt);
+
+  optLen = sizeof(opt);
+  EXPECT_THAT(
+      getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+      SyscallSucceeds());
+  EXPECT_FALSE(opt);
+
+  SetSoPassCred(sockets->first_fd());
+
+  optLen = sizeof(opt);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+      SyscallSucceeds());
+  EXPECT_TRUE(opt);
+
+  optLen = sizeof(opt);
+  EXPECT_THAT(
+      getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+      SyscallSucceeds());
+  EXPECT_FALSE(opt);
+
+  int zero = 0;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &zero,
+                         sizeof(zero)),
+              SyscallSucceeds());
+
+  optLen = sizeof(opt);
+  EXPECT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+      SyscallSucceeds());
+  EXPECT_FALSE(opt);
+
+  optLen = sizeof(opt);
+  EXPECT_THAT(
+      getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+      SyscallSucceeds());
+  EXPECT_FALSE(opt);
+}
+
+TEST_P(UnixSocketPairCmsgTest, NoDataCredPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  struct msghdr msg = {};
+
+  struct iovec iov;
+  iov.iov_base = sent_data;
+  iov.iov_len = sizeof(sent_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  char control[CMSG_SPACE(0)];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  cmsg->cmsg_level = SOL_SOCKET;
+  cmsg->cmsg_type = SCM_CREDENTIALS;
+  cmsg->cmsg_len = CMSG_LEN(0);
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(UnixSocketPairCmsgTest, NoPassCred) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  struct ucred sent_creds;
+
+  ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+
+  ASSERT_NO_FATAL_FAILURE(
+      RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairCmsgTest, CredAndFDPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  struct ucred sent_creds;
+
+  ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendCredsAndFD(sockets->first_fd(), sent_creds,
+                                         pair->second_fd(), sent_data,
+                                         sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data[20];
+  struct ucred received_creds;
+  int fd = -1;
+  ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds,
+                                         &fd, received_data,
+                                         sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  EXPECT_EQ(sent_creds.pid, received_creds.pid);
+  EXPECT_EQ(sent_creds.uid, received_creds.uid);
+  EXPECT_EQ(sent_creds.gid, received_creds.gid);
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+TEST_P(UnixSocketPairCmsgTest, FDPassBeforeSoPassCred) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data[20];
+  struct ucred received_creds;
+  int fd = -1;
+  ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds,
+                                         &fd, received_data,
+                                         sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  struct ucred want_creds {
+    0, 65534, 65534
+  };
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+TEST_P(UnixSocketPairCmsgTest, FDPassAfterSoPassCred) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  SetSoPassCred(sockets->second_fd());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+  struct ucred received_creds;
+  int fd = -1;
+  ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds,
+                                         &fd, received_data,
+                                         sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  struct ucred want_creds;
+  ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+TEST_P(UnixSocketPairCmsgTest, CloexecDroppedWhenFDPassed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair = ASSERT_NO_ERRNO_AND_VALUE(
+      UnixDomainSocketPair(SOCK_SEQPACKET | SOCK_CLOEXEC).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  char received_data[20];
+  int fd = -1;
+  ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+                                       sizeof(received_data)));
+
+  EXPECT_THAT(fcntl(fd, F_GETFD), SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UnixSocketPairCmsgTest, CloexecRecvFDPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(int))];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct iovec iov;
+  char received_data[20];
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CMSG_CLOEXEC),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+  ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+  int fd = -1;
+  memcpy(&fd, CMSG_DATA(cmsg), sizeof(int));
+
+  EXPECT_THAT(fcntl(fd, F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+}
+
+TEST_P(UnixSocketPairCmsgTest, FDPassAfterSoPassCredWithoutCredSpace) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  SetSoPassCred(sockets->second_fd());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  char control[CMSG_LEN(0)];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  char received_data[20];
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  EXPECT_EQ(msg.msg_controllen, sizeof(control));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, sizeof(control));
+  EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS);
+}
+
+// This test will validate that MSG_CTRUNC as an input flag to recvmsg will
+// not appear as an output flag on the control message when truncation doesn't
+// happen.
+TEST_P(UnixSocketPairCmsgTest, MsgCtruncInputIsNoop) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(int)) /* we're passing a single fd */];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct iovec iov;
+  char received_data[20];
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CTRUNC),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+  ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+  // Now we should verify that MSG_CTRUNC wasn't set as an output flag.
+  EXPECT_EQ(msg.msg_flags & MSG_CTRUNC, 0);
+}
+
+TEST_P(UnixSocketPairCmsgTest, FDPassAfterSoPassCredWithoutCredHeaderSpace) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  SetSoPassCred(sockets->second_fd());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  struct msghdr msg = {};
+  char control[CMSG_LEN(0) / 2];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  char received_data[20];
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+  EXPECT_EQ(msg.msg_controllen, 0);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_cmsg.h b/test/syscalls/linux/socket_unix_cmsg.h
new file mode 100644
index 000000000..431606903
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_cmsg.h
@@ -0,0 +1,30 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_CMSG_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_CMSG_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected unix sockets about
+// control messages.
+using UnixSocketPairCmsgTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_CMSG_H_
diff --git a/test/syscalls/linux/socket_unix_dgram.cc b/test/syscalls/linux/socket_unix_dgram.cc
new file mode 100644
index 000000000..af0df4fb4
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_dgram.cc
@@ -0,0 +1,45 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_unix_dgram.h"
+
+#include <stdio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(DgramUnixSocketPairTest, WriteOneSideClosed) {
+  // FIXME(b/35925052): gVisor datagram sockets return EPIPE instead of
+  // ECONNREFUSED.
+  SKIP_IF(IsRunningOnGvisor());
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+  constexpr char kStr[] = "abc";
+  ASSERT_THAT(write(sockets->second_fd(), kStr, 3),
+              SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_dgram.h b/test/syscalls/linux/socket_unix_dgram.h
new file mode 100644
index 000000000..0764ef85b
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_dgram.h
@@ -0,0 +1,29 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected dgram unix sockets.
+using DgramUnixSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_
diff --git a/test/syscalls/linux/socket_unix_dgram_local.cc b/test/syscalls/linux/socket_unix_dgram_local.cc
new file mode 100644
index 000000000..31d2d5216
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_dgram_local.cc
@@ -0,0 +1,58 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_stream.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix_dgram.h"
+#include "test/syscalls/linux/socket_unix_non_stream.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return VecCat<SocketPairKind>(VecCat<SocketPairKind>(
+      ApplyVec<SocketPairKind>(
+          UnixDomainSocketPair,
+          AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_RAW},
+                                 List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          FilesystemBoundUnixDomainSocketPair,
+          AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_RAW},
+                                 List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          AbstractBoundUnixDomainSocketPair,
+          AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_RAW},
+                                 List<int>{0, SOCK_NONBLOCK}))));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    DgramUnixSockets, DgramUnixSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    DgramUnixSockets, UnixNonStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    DgramUnixSockets, NonStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc
new file mode 100644
index 000000000..2db8b68d3
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc
@@ -0,0 +1,57 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of connected non-blocking dgram
+// unix sockets.
+using NonBlockingDgramUnixSocketPairTest = SocketPairTest;
+
+TEST_P(NonBlockingDgramUnixSocketPairTest, ReadOneSideClosed) {
+  if (IsRunningOnGvisor()) {
+    // FIXME(b/70803293): gVisor datagram sockets return 0 instead of
+    // EAGAIN.
+    return;
+  }
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+  char data[10] = {};
+  ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NonBlockingDgramUnixSockets, NonBlockingDgramUnixSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(std::vector<SocketPairKind>{
+        UnixDomainSocketPair(SOCK_DGRAM | SOCK_NONBLOCK),
+        FilesystemBoundUnixDomainSocketPair(SOCK_DGRAM | SOCK_NONBLOCK),
+        AbstractBoundUnixDomainSocketPair(SOCK_DGRAM | SOCK_NONBLOCK),
+    })));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_domain.cc b/test/syscalls/linux/socket_unix_domain.cc
new file mode 100644
index 000000000..f7dff8b4d
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_domain.cc
@@ -0,0 +1,39 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return ApplyVec<SocketPairKind>(
+      UnixDomainSocketPair,
+      AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+                             List<int>{0, SOCK_NONBLOCK}));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, AllSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_filesystem_nonblock.cc b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc
new file mode 100644
index 000000000..6700b4d90
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc
@@ -0,0 +1,39 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return ApplyVec<SocketPairKind>(
+      FilesystemBoundUnixDomainSocketPair,
+      AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+                             List<int>{SOCK_NONBLOCK}));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NonBlockingFilesystemUnixSockets, NonBlockingSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_non_stream.cc b/test/syscalls/linux/socket_unix_non_stream.cc
new file mode 100644
index 000000000..884319e1d
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_non_stream.cc
@@ -0,0 +1,256 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_unix_non_stream.h"
+
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/memory_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(UnixNonStreamSocketPairTest, RecvMsgTooLarge) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  int rcvbuf;
+  socklen_t length = sizeof(rcvbuf);
+  ASSERT_THAT(
+      getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &rcvbuf, &length),
+      SyscallSucceeds());
+
+  // Make the call larger than the receive buffer.
+  const int recv_size = 3 * rcvbuf;
+
+  // Write a message that does fit in the receive buffer.
+  const int write_size = rcvbuf - kPageSize;
+
+  std::vector<char> write_buf(write_size, 'a');
+  const int ret = RetryEINTR(write)(sockets->second_fd(), write_buf.data(),
+                                    write_buf.size());
+  if (ret < 0 && errno == ENOBUFS) {
+    // NOTE(b/116636318): Linux may stall the write for a long time and
+    // ultimately return ENOBUFS. Allow this error, since a retry will likely
+    // result in the same error.
+    return;
+  }
+  ASSERT_THAT(ret, SyscallSucceeds());
+
+  std::vector<char> recv_buf(recv_size);
+
+  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->first_fd(), recv_buf.data(),
+                                     recv_buf.size(), write_size));
+
+  recv_buf.resize(write_size);
+  EXPECT_EQ(recv_buf, write_buf);
+}
+
+// Create a region of anonymous memory of size 'size', which is fragmented in
+// FileMem.
+//
+// ptr contains the start address of the region. The returned vector contains
+// all of the mappings to be unmapped when done.
+PosixErrorOr<std::vector<Mapping>> CreateFragmentedRegion(const int size,
+                                                          void** ptr) {
+  Mapping region;
+  ASSIGN_OR_RETURN_ERRNO(region, Mmap(nullptr, size, PROT_NONE,
+                                      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
+
+  *ptr = region.ptr();
+
+  // Don't save hundreds of times for all of these mmaps.
+  DisableSave ds;
+
+  std::vector<Mapping> pages;
+
+  // Map and commit a single page at a time, mapping and committing an unrelated
+  // page between each call to force FileMem fragmentation.
+  for (uintptr_t addr = region.addr(); addr < region.endaddr();
+       addr += kPageSize) {
+    Mapping page;
+    ASSIGN_OR_RETURN_ERRNO(
+        page,
+        Mmap(reinterpret_cast<void*>(addr), kPageSize, PROT_READ | PROT_WRITE,
+             MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0));
+    *reinterpret_cast<volatile char*>(page.ptr()) = 42;
+
+    pages.emplace_back(std::move(page));
+
+    // Unrelated page elsewhere.
+    ASSIGN_OR_RETURN_ERRNO(page,
+                           Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE,
+                                MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
+    *reinterpret_cast<volatile char*>(page.ptr()) = 42;
+
+    pages.emplace_back(std::move(page));
+  }
+
+  // The mappings above have taken ownership of the region.
+  region.release();
+
+  return std::move(pages);
+}
+
+// A contiguous iov that is heavily fragmented in FileMem can still be sent
+// successfully. See b/115833655.
+TEST_P(UnixNonStreamSocketPairTest, FragmentedSendMsg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  const int buffer_size = UIO_MAXIOV * kPageSize;
+  // Extra page for message header overhead.
+  const int sndbuf = buffer_size + kPageSize;
+  // N.B. setsockopt(SO_SNDBUF) doubles the passed value.
+  const int set_sndbuf = sndbuf / 2;
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+                         &set_sndbuf, sizeof(set_sndbuf)),
+              SyscallSucceeds());
+
+  int actual_sndbuf = 0;
+  socklen_t length = sizeof(actual_sndbuf);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+                         &actual_sndbuf, &length),
+              SyscallSucceeds());
+
+  if (actual_sndbuf != sndbuf) {
+    // Unable to get the sndbuf we want.
+    //
+    // N.B. At minimum, the socketpair gofer should provide a socket that is
+    // already the correct size.
+    //
+    // TODO(b/35921550): When internal UDS support SO_SNDBUF, we can assert that
+    // we always get the right SO_SNDBUF on gVisor.
+    GTEST_SKIP() << "SO_SNDBUF = " << actual_sndbuf << ", want " << sndbuf;
+  }
+
+  // Create a contiguous region of memory of 2*UIO_MAXIOV*PAGE_SIZE. We'll call
+  // sendmsg with a single iov, but the goal is to get the sentry to split this
+  // into > UIO_MAXIOV iovs when calling the kernel.
+  void* ptr;
+  std::vector<Mapping> pages =
+      ASSERT_NO_ERRNO_AND_VALUE(CreateFragmentedRegion(buffer_size, &ptr));
+
+  struct iovec iov = {};
+  iov.iov_base = ptr;
+  iov.iov_len = buffer_size;
+
+  struct msghdr msg = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  // NOTE(b/116636318,b/115833655): Linux has poor behavior in the presence of
+  // physical memory fragmentation. As a result, this may stall for a long time
+  // and ultimately return ENOBUFS. Allow this error, since it means that we
+  // made it to the host kernel and started the sendmsg.
+  EXPECT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0),
+              AnyOf(SyscallSucceedsWithValue(buffer_size),
+                    SyscallFailsWithErrno(ENOBUFS)));
+}
+
+// A contiguous iov that is heavily fragmented in FileMem can still be received
+// into successfully. Regression test for b/115833655.
+TEST_P(UnixNonStreamSocketPairTest, FragmentedRecvMsg) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  const int buffer_size = UIO_MAXIOV * kPageSize;
+  // Extra page for message header overhead.
+  const int sndbuf = buffer_size + kPageSize;
+  // N.B. setsockopt(SO_SNDBUF) doubles the passed value.
+  const int set_sndbuf = sndbuf / 2;
+
+  EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+                         &set_sndbuf, sizeof(set_sndbuf)),
+              SyscallSucceeds());
+
+  int actual_sndbuf = 0;
+  socklen_t length = sizeof(actual_sndbuf);
+  ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+                         &actual_sndbuf, &length),
+              SyscallSucceeds());
+
+  if (actual_sndbuf != sndbuf) {
+    // Unable to get the sndbuf we want.
+    //
+    // N.B. At minimum, the socketpair gofer should provide a socket that is
+    // already the correct size.
+    //
+    // TODO(b/35921550): When internal UDS support SO_SNDBUF, we can assert that
+    // we always get the right SO_SNDBUF on gVisor.
+    GTEST_SKIP() << "SO_SNDBUF = " << actual_sndbuf << ", want " << sndbuf;
+  }
+
+  std::vector<char> write_buf(buffer_size, 'a');
+  const int ret = RetryEINTR(write)(sockets->first_fd(), write_buf.data(),
+                                    write_buf.size());
+  if (ret < 0 && errno == ENOBUFS) {
+    // NOTE(b/116636318): Linux may stall the write for a long time and
+    // ultimately return ENOBUFS. Allow this error, since a retry will likely
+    // result in the same error.
+    return;
+  }
+  ASSERT_THAT(ret, SyscallSucceeds());
+
+  // Create a contiguous region of memory of 2*UIO_MAXIOV*PAGE_SIZE. We'll call
+  // sendmsg with a single iov, but the goal is to get the sentry to split this
+  // into > UIO_MAXIOV iovs when calling the kernel.
+  void* ptr;
+  std::vector<Mapping> pages =
+      ASSERT_NO_ERRNO_AND_VALUE(CreateFragmentedRegion(buffer_size, &ptr));
+
+  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(
+      sockets->second_fd(), reinterpret_cast<char*>(ptr), buffer_size));
+
+  EXPECT_EQ(0, memcmp(write_buf.data(), ptr, buffer_size));
+}
+
+TEST_P(UnixNonStreamSocketPairTest, SendTimeout) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 10
+  };
+  EXPECT_THAT(
+      setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
+      SyscallSucceeds());
+
+  const int buf_size = 5 * kPageSize;
+  EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &buf_size,
+                         sizeof(buf_size)),
+              SyscallSucceeds());
+  EXPECT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_RCVBUF, &buf_size,
+                         sizeof(buf_size)),
+              SyscallSucceeds());
+
+  // The buffer size should be big enough to avoid many iterations in the next
+  // loop. Otherwise, this will slow down cooperative_save tests.
+  std::vector<char> buf(kPageSize);
+  for (;;) {
+    int ret;
+    ASSERT_THAT(
+        ret = RetryEINTR(send)(sockets->first_fd(), buf.data(), buf.size(), 0),
+        ::testing::AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(EAGAIN)));
+    if (ret == -1) {
+      break;
+    }
+  }
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_non_stream.h b/test/syscalls/linux/socket_unix_non_stream.h
new file mode 100644
index 000000000..7478ab172
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_non_stream.h
@@ -0,0 +1,30 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected non-stream
+// unix-domain sockets.
+using UnixNonStreamSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_
diff --git a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc
new file mode 100644
index 000000000..fddcdf1c5
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc
@@ -0,0 +1,42 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_stream_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return VecCat<SocketPairKind>(
+      ApplyVec<SocketPairKind>(UnixDomainSocketPair,
+                               std::vector<int>{SOCK_DGRAM, SOCK_SEQPACKET}),
+      ApplyVec<SocketPairKind>(FilesystemBoundUnixDomainSocketPair,
+                               std::vector<int>{SOCK_DGRAM, SOCK_SEQPACKET}),
+      ApplyVec<SocketPairKind>(AbstractBoundUnixDomainSocketPair,
+                               std::vector<int>{SOCK_DGRAM, SOCK_SEQPACKET}));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    BlockingNonStreamUnixSockets, BlockingNonStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_pair.cc b/test/syscalls/linux/socket_unix_pair.cc
new file mode 100644
index 000000000..85999db04
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_pair.cc
@@ -0,0 +1,44 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix.h"
+#include "test/syscalls/linux/socket_unix_cmsg.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+      UnixDomainSocketPair,
+      AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+                             List<int>{0, SOCK_NONBLOCK})));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, UnixSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, UnixSocketPairCmsgTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_pair_nonblock.cc b/test/syscalls/linux/socket_unix_pair_nonblock.cc
new file mode 100644
index 000000000..281410a9a
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_pair_nonblock.cc
@@ -0,0 +1,39 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return ApplyVec<SocketPairKind>(
+      UnixDomainSocketPair,
+      AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+                             List<int>{SOCK_NONBLOCK}));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NonBlockingUnixSockets, NonBlockingSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_seqpacket.cc b/test/syscalls/linux/socket_unix_seqpacket.cc
new file mode 100644
index 000000000..6d03df4d9
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_seqpacket.cc
@@ -0,0 +1,67 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_unix_seqpacket.h"
+
+#include <stdio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(SeqpacketUnixSocketPairTest, WriteOneSideClosed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+  constexpr char kStr[] = "abc";
+  ASSERT_THAT(write(sockets->second_fd(), kStr, 3),
+              SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(SeqpacketUnixSocketPairTest, ReadOneSideClosed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+  char data[10] = {};
+  ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(SeqpacketUnixSocketPairTest, Sendto) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct sockaddr_un addr = {};
+  addr.sun_family = AF_UNIX;
+  constexpr char kPath[] = "\0nonexistent";
+  memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+  constexpr char kStr[] = "abc";
+  ASSERT_THAT(sendto(sockets->second_fd(), kStr, 3, 0, (struct sockaddr*)&addr,
+                     sizeof(addr)),
+              SyscallSucceedsWithValue(3));
+
+  char data[10] = {};
+  ASSERT_THAT(read(sockets->first_fd(), data, sizeof(data)),
+              SyscallSucceedsWithValue(3));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_seqpacket.h b/test/syscalls/linux/socket_unix_seqpacket.h
new file mode 100644
index 000000000..30d9b9edf
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_seqpacket.h
@@ -0,0 +1,30 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected seqpacket unix
+// sockets.
+using SeqpacketUnixSocketPairTest = SocketPairTest;
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_
diff --git a/test/syscalls/linux/socket_unix_seqpacket_local.cc b/test/syscalls/linux/socket_unix_seqpacket_local.cc
new file mode 100644
index 000000000..69a5f150d
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_seqpacket_local.cc
@@ -0,0 +1,58 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_stream.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix_non_stream.h"
+#include "test/syscalls/linux/socket_unix_seqpacket.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return VecCat<SocketPairKind>(VecCat<SocketPairKind>(
+      ApplyVec<SocketPairKind>(
+          UnixDomainSocketPair,
+          AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+                                 List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          FilesystemBoundUnixDomainSocketPair,
+          AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+                                 List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          AbstractBoundUnixDomainSocketPair,
+          AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+                                 List<int>{0, SOCK_NONBLOCK}))));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    SeqpacketUnixSockets, NonStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    SeqpacketUnixSockets, SeqpacketUnixSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_SUITE_P(
+    SeqpacketUnixSockets, UnixNonStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_stream.cc b/test/syscalls/linux/socket_unix_stream.cc
new file mode 100644
index 000000000..99e77b89e
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_stream.cc
@@ -0,0 +1,125 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <poll.h>
+#include <stdio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of connected stream unix sockets.
+using StreamUnixSocketPairTest = SocketPairTest;
+
+TEST_P(StreamUnixSocketPairTest, WriteOneSideClosed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+  constexpr char kStr[] = "abc";
+  ASSERT_THAT(write(sockets->second_fd(), kStr, 3),
+              SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(StreamUnixSocketPairTest, ReadOneSideClosed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+  char data[10] = {};
+  ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(StreamUnixSocketPairTest, RecvmsgOneSideClosed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Set timeout so that it will not wait for ever.
+  struct timeval tv {
+    .tv_sec = 0, .tv_usec = 10
+  };
+  EXPECT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv,
+                         sizeof(tv)),
+              SyscallSucceeds());
+
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+
+  char received_data[10] = {};
+  struct iovec iov;
+  iov.iov_base = received_data;
+  iov.iov_len = sizeof(received_data);
+  struct msghdr msg = {};
+  msg.msg_flags = -1;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(recvmsg(sockets->second_fd(), &msg, MSG_WAITALL),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(StreamUnixSocketPairTest, ReadOneSideClosedWithUnreadData) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char buf[10] = {};
+  ASSERT_THAT(RetryEINTR(write)(sockets->second_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RDWR), SyscallSucceeds());
+
+  ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(0));
+
+  ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+
+  ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(ECONNRESET));
+}
+
+TEST_P(StreamUnixSocketPairTest, Sendto) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct sockaddr_un addr = {};
+  addr.sun_family = AF_UNIX;
+  constexpr char kPath[] = "\0nonexistent";
+  memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+  constexpr char kStr[] = "abc";
+  ASSERT_THAT(sendto(sockets->second_fd(), kStr, 3, 0, (struct sockaddr*)&addr,
+                     sizeof(addr)),
+              SyscallFailsWithErrno(EISCONN));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, StreamUnixSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>(
+        ApplyVec<SocketPairKind>(UnixDomainSocketPair,
+                                 AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                                        List<int>{
+                                                            0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(FilesystemBoundUnixDomainSocketPair,
+                                 AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                                        List<int>{
+                                                            0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(
+            AbstractBoundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                   List<int>{0, SOCK_NONBLOCK}))))));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_stream_blocking_local.cc
new file mode 100644
index 000000000..8429bd429
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_stream_blocking_local.cc
@@ -0,0 +1,40 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_stream_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return {
+      UnixDomainSocketPair(SOCK_STREAM),
+      FilesystemBoundUnixDomainSocketPair(SOCK_STREAM),
+      AbstractBoundUnixDomainSocketPair(SOCK_STREAM),
+  };
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    BlockingStreamUnixSockets, BlockingStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_stream_local.cc b/test/syscalls/linux/socket_unix_stream_local.cc
new file mode 100644
index 000000000..a7e3449a9
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_stream_local.cc
@@ -0,0 +1,48 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_stream.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return VecCat<SocketPairKind>(
+      ApplyVec<SocketPairKind>(
+          UnixDomainSocketPair,
+          AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                 List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          FilesystemBoundUnixDomainSocketPair,
+          AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                 List<int>{0, SOCK_NONBLOCK})),
+      ApplyVec<SocketPairKind>(
+          AbstractBoundUnixDomainSocketPair,
+          AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                 List<int>{0, SOCK_NONBLOCK})));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    StreamUnixSockets, StreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc
new file mode 100644
index 000000000..4b763c8e2
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc
@@ -0,0 +1,39 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+
+#include "test/syscalls/linux/socket_stream_nonblock.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+  return {
+      UnixDomainSocketPair(SOCK_STREAM | SOCK_NONBLOCK),
+      FilesystemBoundUnixDomainSocketPair(SOCK_STREAM | SOCK_NONBLOCK),
+      AbstractBoundUnixDomainSocketPair(SOCK_STREAM | SOCK_NONBLOCK),
+  };
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NonBlockingStreamUnixSockets, NonBlockingStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_abstract.cc b/test/syscalls/linux/socket_unix_unbound_abstract.cc
new file mode 100644
index 000000000..8b1762000
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_abstract.cc
@@ -0,0 +1,116 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of unbound abstract unix sockets.
+using UnboundAbstractUnixSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundAbstractUnixSocketPairTest, AddressAfterNull) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct sockaddr_un addr =
+      *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr());
+  ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0);
+  SKIP_IF(addr.sun_path[sizeof(addr.sun_path) - 2] != 0 ||
+          addr.sun_path[sizeof(addr.sun_path) - 3] != 0);
+
+  addr.sun_path[sizeof(addr.sun_path) - 2] = 'a';
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(bind(sockets->second_fd(),
+                   reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+              SyscallSucceeds());
+}
+
+TEST_P(UnboundAbstractUnixSocketPairTest, ShortAddressNotExtended) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct sockaddr_un addr =
+      *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr());
+  ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0);
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size() - 1),
+              SyscallSucceeds());
+
+  ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+}
+
+TEST_P(UnboundAbstractUnixSocketPairTest, BindNothing) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  struct sockaddr_un addr = {.sun_family = AF_UNIX};
+  ASSERT_THAT(bind(sockets->first_fd(),
+                   reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+              SyscallSucceeds());
+}
+
+TEST_P(UnboundAbstractUnixSocketPairTest, GetSockNameFullLength) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  sockaddr_storage addr = {};
+  socklen_t addr_len = sizeof(addr);
+  ASSERT_THAT(getsockname(sockets->first_fd(),
+                          reinterpret_cast<struct sockaddr*>(&addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, sockets->first_addr_size());
+}
+
+TEST_P(UnboundAbstractUnixSocketPairTest, GetSockNamePartialLength) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size() - 1),
+              SyscallSucceeds());
+
+  sockaddr_storage addr = {};
+  socklen_t addr_len = sizeof(addr);
+  ASSERT_THAT(getsockname(sockets->first_fd(),
+                          reinterpret_cast<struct sockaddr*>(&addr), &addr_len),
+              SyscallSucceeds());
+  EXPECT_EQ(addr_len, sockets->first_addr_size() - 1);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, UnboundAbstractUnixSocketPairTest,
+    ::testing::ValuesIn(ApplyVec<SocketPairKind>(
+        AbstractUnboundUnixDomainSocketPair,
+        AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET,
+                                         SOCK_DGRAM},
+                               List<int>{0, SOCK_NONBLOCK}))));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_dgram.cc b/test/syscalls/linux/socket_unix_unbound_dgram.cc
new file mode 100644
index 000000000..907dca0f1
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_dgram.cc
@@ -0,0 +1,183 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of unbound dgram unix sockets.
+using UnboundDgramUnixSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundDgramUnixSocketPairTest, BindConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, SelfConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(connect(sockets->first_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, DoubleConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, GetRemoteAddress) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  socklen_t addressLength = sockets->first_addr_size();
+  struct sockaddr_storage address = {};
+  ASSERT_THAT(getpeername(sockets->second_fd(), (struct sockaddr*)(&address),
+                          &addressLength),
+              SyscallSucceeds());
+  EXPECT_EQ(
+      0, memcmp(&address, sockets->first_addr(), sockets->first_addr_size()));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, Sendto) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(sendto(sockets->second_fd(), sent_data, sizeof(sent_data), 0,
+                     sockets->first_addr(), sockets->first_addr_size()),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data)];
+  ASSERT_THAT(ReadFd(sockets->first_fd(), received_data, sizeof(received_data)),
+              SyscallSucceedsWithValue(sizeof(received_data)));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, ZeroWriteAllowed) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+  ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+                      sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  char sent_data[3];
+  // Send a zero length packet.
+  ASSERT_THAT(write(sockets->second_fd(), sent_data, 0),
+              SyscallSucceedsWithValue(0));
+  // Receive the packet.
+  char received_data[sizeof(sent_data)];
+  ASSERT_THAT(read(sockets->first_fd(), received_data, sizeof(received_data)),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, Listen) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(listen(sockets->first_fd(), 0), SyscallFailsWithErrno(ENOTSUP));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, Accept) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr),
+              SyscallFailsWithErrno(ENOTSUP));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, SendtoWithoutConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  char data = 'a';
+  ASSERT_THAT(
+      RetryEINTR(sendto)(sockets->second_fd(), &data, sizeof(data), 0,
+                         sockets->first_addr(), sockets->first_addr_size()),
+      SyscallSucceedsWithValue(sizeof(data)));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, SendtoWithoutConnectPassCreds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  SetSoPassCred(sockets->first_fd());
+  char data = 'a';
+  ASSERT_THAT(
+      RetryEINTR(sendto)(sockets->second_fd(), &data, sizeof(data), 0,
+                         sockets->first_addr(), sockets->first_addr_size()),
+      SyscallSucceedsWithValue(sizeof(data)));
+  ucred creds;
+  creds.pid = -1;
+  char buf[sizeof(data) + 1];
+  ASSERT_NO_FATAL_FAILURE(
+      RecvCreds(sockets->first_fd(), &creds, buf, sizeof(buf), sizeof(data)));
+  EXPECT_EQ(0, memcmp(&data, buf, sizeof(data)));
+  EXPECT_THAT(getpid(), SyscallSucceedsWithValue(creds.pid));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, UnboundDgramUnixSocketPairTest,
+    ::testing::ValuesIn(VecCat<SocketPairKind>(
+        ApplyVec<SocketPairKind>(FilesystemUnboundUnixDomainSocketPair,
+                                 AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+                                                        List<int>{
+                                                            0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(
+            AbstractUnboundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+                                   List<int>{0, SOCK_NONBLOCK})))));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_filesystem.cc b/test/syscalls/linux/socket_unix_unbound_filesystem.cc
new file mode 100644
index 000000000..cab912152
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_filesystem.cc
@@ -0,0 +1,84 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of unbound filesystem unix
+// sockets.
+using UnboundFilesystemUnixSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundFilesystemUnixSocketPairTest, AddressAfterNull) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  struct sockaddr_un addr =
+      *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr());
+  ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0);
+  SKIP_IF(addr.sun_path[sizeof(addr.sun_path) - 2] != 0 ||
+          addr.sun_path[sizeof(addr.sun_path) - 3] != 0);
+
+  addr.sun_path[sizeof(addr.sun_path) - 2] = 'a';
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  ASSERT_THAT(bind(sockets->second_fd(),
+                   reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(UnboundFilesystemUnixSocketPairTest, GetSockNameLength) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  sockaddr_storage got_addr = {};
+  socklen_t got_addr_len = sizeof(got_addr);
+  ASSERT_THAT(
+      getsockname(sockets->first_fd(),
+                  reinterpret_cast<struct sockaddr*>(&got_addr), &got_addr_len),
+      SyscallSucceeds());
+
+  sockaddr_un want_addr =
+      *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr());
+
+  EXPECT_EQ(got_addr_len,
+            strlen(want_addr.sun_path) + 1 + sizeof(want_addr.sun_family));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, UnboundFilesystemUnixSocketPairTest,
+    ::testing::ValuesIn(ApplyVec<SocketPairKind>(
+        FilesystemUnboundUnixDomainSocketPair,
+        AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET,
+                                         SOCK_DGRAM},
+                               List<int>{0, SOCK_NONBLOCK}))));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc
new file mode 100644
index 000000000..cb99030f5
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc
@@ -0,0 +1,89 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of unbound seqpacket unix sockets.
+using UnboundUnixSeqpacketSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundUnixSeqpacketSocketPairTest, SendtoWithoutConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  char data = 'a';
+  ASSERT_THAT(sendto(sockets->second_fd(), &data, sizeof(data), 0,
+                     sockets->first_addr(), sockets->first_addr_size()),
+              SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UnboundUnixSeqpacketSocketPairTest, SendtoWithoutConnectIgnoresAddr) {
+  // FIXME(b/68223466): gVisor tries to find /foo/bar and thus returns ENOENT.
+  if (IsRunningOnGvisor()) {
+    return;
+  }
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  // Even a bogus address is completely ignored.
+  constexpr char kPath[] = "/foo/bar";
+
+  // Sanity check that kPath doesn't exist.
+  struct stat s;
+  ASSERT_THAT(stat(kPath, &s), SyscallFailsWithErrno(ENOENT));
+
+  struct sockaddr_un addr = {};
+  addr.sun_family = AF_UNIX;
+  memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+  char data = 'a';
+  ASSERT_THAT(
+      sendto(sockets->second_fd(), &data, sizeof(data), 0,
+             reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)),
+      SyscallFailsWithErrno(ENOTCONN));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, UnboundUnixSeqpacketSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>(
+        ApplyVec<SocketPairKind>(
+            FilesystemUnboundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+                                   List<int>{0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(
+            AbstractUnboundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+                                   List<int>{0, SOCK_NONBLOCK}))))));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_stream.cc b/test/syscalls/linux/socket_unix_unbound_stream.cc
new file mode 100644
index 000000000..f185dded3
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_stream.cc
@@ -0,0 +1,733 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of connected unix stream sockets.
+using UnixStreamSocketPairTest = SocketPairTest;
+
+// FDPassPartialRead checks that sent control messages cannot be read after
+// any of their associated data has been read while ignoring the control message
+// by using read(2) instead of recvmsg(2).
+TEST_P(UnixStreamSocketPairTest, FDPassPartialRead) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data)));
+
+  char received_data[sizeof(sent_data) / 2];
+  ASSERT_THAT(
+      ReadFd(sockets->second_fd(), received_data, sizeof(received_data)),
+      SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+
+  RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data));
+  EXPECT_EQ(0, memcmp(sent_data + sizeof(received_data), received_data,
+                      sizeof(received_data)));
+}
+
+TEST_P(UnixStreamSocketPairTest, FDPassCoalescedRead) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  auto pair1 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(),
+                                       sent_data1, sizeof(sent_data1)));
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  auto pair2 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(),
+                                       sent_data2, sizeof(sent_data2)));
+
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+  ASSERT_THAT(
+      ReadFd(sockets->second_fd(), received_data, sizeof(received_data)),
+      SyscallSucceedsWithValue(sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+  EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+                      sizeof(sent_data2)));
+}
+
+// ZeroLengthMessageFDDiscarded checks that control messages associated with
+// zero length messages are discarded.
+TEST_P(UnixStreamSocketPairTest, ZeroLengthMessageFDDiscarded) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  // Zero length arrays are invalid in ISO C++, so allocate one of size 1 and
+  // send a length of 0.
+  char sent_data1[1] = {};
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendSingleFD(sockets->first_fd(), pair->second_fd(), sent_data1, 0));
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+  char received_data[sizeof(sent_data2)] = {};
+
+  RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data));
+  EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(received_data)));
+}
+
+// FDPassCoalescedRecv checks that control messages not in the first message are
+// preserved in a coalesced recv.
+TEST_P(UnixStreamSocketPairTest, FDPassCoalescedRecv) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data) / 2),
+              SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data + sizeof(sent_data) / 2,
+                                       sizeof(sent_data) / 2));
+
+  char received_data[sizeof(sent_data)];
+
+  int fd = -1;
+  ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+                                       sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+// ReadsNotCoalescedAfterFDPass checks that messages after a message containing
+// an FD control message are not coalesced.
+TEST_P(UnixStreamSocketPairTest, ReadsNotCoalescedAfterFDPass) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+                                       sent_data, sizeof(sent_data) / 2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data + sizeof(sent_data) / 2,
+                      sizeof(sent_data) / 2),
+              SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+  char received_data[sizeof(sent_data)];
+
+  int fd = -1;
+  ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+                                       sizeof(received_data),
+                                       sizeof(sent_data) / 2));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  ASSERT_NO_FATAL_FAILURE(
+      RecvNoCmsg(sockets->second_fd(), received_data, sizeof(sent_data) / 2));
+
+  EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data,
+                      sizeof(sent_data) / 2));
+}
+
+// FDPassNotCombined checks that FD control messages are not combined in a
+// coalesced read.
+TEST_P(UnixStreamSocketPairTest, FDPassNotCombined) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  auto pair1 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(),
+                                       sent_data, sizeof(sent_data) / 2));
+
+  auto pair2 =
+      ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+  ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(),
+                                       sent_data + sizeof(sent_data) / 2,
+                                       sizeof(sent_data) / 2));
+
+  char received_data[sizeof(sent_data)];
+
+  int fd = -1;
+  ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+                                       sizeof(received_data),
+                                       sizeof(sent_data) / 2));
+
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair1->first_fd()));
+
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  fd = -1;
+
+  ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+                                       sizeof(received_data),
+                                       sizeof(sent_data) / 2));
+
+  EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data,
+                      sizeof(sent_data) / 2));
+
+  ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair2->first_fd()));
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_P(UnixStreamSocketPairTest, CredPassPartialRead) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data[20];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+
+  struct ucred sent_creds;
+
+  ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+  ASSERT_NO_FATAL_FAILURE(
+      SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+  int one = 1;
+  ASSERT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &one,
+                         sizeof(one)),
+              SyscallSucceeds());
+
+  for (int i = 0; i < 2; i++) {
+    char received_data[10];
+    struct ucred received_creds;
+    ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                      received_data, sizeof(received_data),
+                                      sizeof(received_data)));
+
+    EXPECT_EQ(0, memcmp(sent_data + i * sizeof(received_data), received_data,
+                        sizeof(received_data)));
+    EXPECT_EQ(sent_creds.pid, received_creds.pid);
+    EXPECT_EQ(sent_creds.uid, received_creds.uid);
+    EXPECT_EQ(sent_creds.gid, received_creds.gid);
+  }
+}
+
+// Unix stream sockets peek in the same way as datagram sockets.
+//
+// SinglePeek checks that only a single message is peekable in a single recv.
+TEST_P(UnixStreamSocketPairTest, SinglePeek) {
+  if (!IsRunningOnGvisor()) {
+    // Don't run this test on linux kernels newer than 4.3.x Linux kernel commit
+    // 9f389e35674f5b086edd70ed524ca0f287259725 which changes this behavior. We
+    // used to target 3.11 compatibility, so disable this test on newer kernels.
+    //
+    // NOTE(b/118902768): Bring this up to Linux 4.4 compatibility.
+    auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion());
+    SKIP_IF(version.major > 4 || (version.major == 4 && version.minor >= 3));
+  }
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+  char sent_data[40];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), sent_data,
+                               sizeof(sent_data) / 2, 0),
+              SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+  ASSERT_THAT(
+      RetryEINTR(send)(sockets->first_fd(), sent_data + sizeof(sent_data) / 2,
+                       sizeof(sent_data) / 2, 0),
+      SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+  char received_data[sizeof(sent_data)];
+  for (int i = 0; i < 3; i++) {
+    memset(received_data, 0, sizeof(received_data));
+    ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                                 sizeof(received_data), MSG_PEEK),
+                SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+    EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+  }
+  memset(received_data, 0, sizeof(received_data));
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(sent_data) / 2, 0),
+              SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+  memset(received_data, 0, sizeof(received_data));
+  ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+                               sizeof(sent_data) / 2, 0),
+              SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+  EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data,
+                      sizeof(sent_data) / 2));
+}
+
+TEST_P(UnixStreamSocketPairTest, CredsNotCoalescedUp) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+
+  struct ucred received_creds;
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data),
+                                    sizeof(sent_data1)));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+
+  struct ucred want_creds {
+    0, 65534, 65534
+  };
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data),
+                                    sizeof(sent_data2)));
+
+  EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2)));
+
+  ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, CredsNotCoalescedDown) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  SetSoPassCred(sockets->second_fd());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+  UnsetSoPassCred(sockets->second_fd());
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+  struct ucred received_creds;
+
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data),
+                                    sizeof(sent_data1)));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+
+  struct ucred want_creds;
+  ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data),
+                                    sizeof(sent_data2)));
+
+  EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2)));
+
+  want_creds = {0, 65534, 65534};
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, CoalescedCredsNoPasscred) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  SetSoPassCred(sockets->second_fd());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+  UnsetSoPassCred(sockets->second_fd());
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+
+  ASSERT_NO_FATAL_FAILURE(
+      RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+  EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+                      sizeof(sent_data2)));
+}
+
+TEST_P(UnixStreamSocketPairTest, CoalescedCreds1) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+  struct ucred received_creds;
+
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+  EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+                      sizeof(sent_data2)));
+
+  struct ucred want_creds {
+    0, 65534, 65534
+  };
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, CoalescedCreds2) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  SetSoPassCred(sockets->second_fd());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+  struct ucred received_creds;
+
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+                                    received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+  EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+                      sizeof(sent_data2)));
+
+  struct ucred want_creds;
+  ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds.pid, received_creds.pid);
+  EXPECT_EQ(want_creds.uid, received_creds.uid);
+  EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, NonCoalescedDifferingCreds1) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+  char received_data1[sizeof(sent_data1) + sizeof(sent_data2)];
+  struct ucred received_creds1;
+
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds1,
+                                    received_data1, sizeof(sent_data1)));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1)));
+
+  struct ucred want_creds1 {
+    0, 65534, 65534
+  };
+
+  EXPECT_EQ(want_creds1.pid, received_creds1.pid);
+  EXPECT_EQ(want_creds1.uid, received_creds1.uid);
+  EXPECT_EQ(want_creds1.gid, received_creds1.gid);
+
+  char received_data2[sizeof(sent_data1) + sizeof(sent_data2)];
+  struct ucred received_creds2;
+
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds2,
+                                    received_data2, sizeof(sent_data2)));
+
+  EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2)));
+
+  struct ucred want_creds2;
+  ASSERT_THAT(want_creds2.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds2.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds2.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds2.pid, received_creds2.pid);
+  EXPECT_EQ(want_creds2.uid, received_creds2.uid);
+  EXPECT_EQ(want_creds2.gid, received_creds2.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, NonCoalescedDifferingCreds2) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  SetSoPassCred(sockets->second_fd());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+  UnsetSoPassCred(sockets->second_fd());
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+  SetSoPassCred(sockets->second_fd());
+
+  char received_data1[sizeof(sent_data1) + sizeof(sent_data2)];
+  struct ucred received_creds1;
+
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds1,
+                                    received_data1, sizeof(sent_data1)));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1)));
+
+  struct ucred want_creds1;
+  ASSERT_THAT(want_creds1.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds1.uid = getuid(), SyscallSucceeds());
+  ASSERT_THAT(want_creds1.gid = getgid(), SyscallSucceeds());
+
+  EXPECT_EQ(want_creds1.pid, received_creds1.pid);
+  EXPECT_EQ(want_creds1.uid, received_creds1.uid);
+  EXPECT_EQ(want_creds1.gid, received_creds1.gid);
+
+  char received_data2[sizeof(sent_data1) + sizeof(sent_data2)];
+  struct ucred received_creds2;
+
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds2,
+                                    received_data2, sizeof(sent_data2)));
+
+  EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2)));
+
+  struct ucred want_creds2 {
+    0, 65534, 65534
+  };
+
+  EXPECT_EQ(want_creds2.pid, received_creds2.pid);
+  EXPECT_EQ(want_creds2.uid, received_creds2.uid);
+  EXPECT_EQ(want_creds2.gid, received_creds2.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, CoalescedDifferingCreds) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  SetSoPassCred(sockets->second_fd());
+
+  char sent_data1[20];
+  RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+              SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+  char sent_data2[20];
+  RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+              SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+  UnsetSoPassCred(sockets->second_fd());
+
+  char sent_data3[20];
+  RandomizeBuffer(sent_data3, sizeof(sent_data3));
+
+  ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data3, sizeof(sent_data3)),
+              SyscallSucceedsWithValue(sizeof(sent_data3)));
+
+  char received_data[sizeof(sent_data1) + sizeof(sent_data2) +
+                     sizeof(sent_data3)];
+
+  ASSERT_NO_FATAL_FAILURE(
+      RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+  EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+  EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+                      sizeof(sent_data2)));
+  EXPECT_EQ(0, memcmp(sent_data3,
+                      received_data + sizeof(sent_data1) + sizeof(sent_data2),
+                      sizeof(sent_data3)));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, UnixStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>(
+        ApplyVec<SocketPairKind>(UnixDomainSocketPair,
+                                 AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                                        List<int>{
+                                                            0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(FilesystemBoundUnixDomainSocketPair,
+                                 AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                                        List<int>{
+                                                            0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(
+            AbstractBoundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                   List<int>{0, SOCK_NONBLOCK}))))));
+
+// Test fixture for tests that apply to pairs of unbound unix stream sockets.
+using UnboundUnixStreamSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundUnixStreamSocketPairTest, SendtoWithoutConnect) {
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  char data = 'a';
+  ASSERT_THAT(sendto(sockets->second_fd(), &data, sizeof(data), 0,
+                     sockets->first_addr(), sockets->first_addr_size()),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+TEST_P(UnboundUnixStreamSocketPairTest, SendtoWithoutConnectIgnoresAddr) {
+  // FIXME(b/68223466): gVisor tries to find /foo/bar and thus returns ENOENT.
+  if (IsRunningOnGvisor()) {
+    return;
+  }
+
+  auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+  ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+                   sockets->first_addr_size()),
+              SyscallSucceeds());
+
+  // Even a bogus address is completely ignored.
+  constexpr char kPath[] = "/foo/bar";
+
+  // Sanity check that kPath doesn't exist.
+  struct stat s;
+  ASSERT_THAT(stat(kPath, &s), SyscallFailsWithErrno(ENOENT));
+
+  struct sockaddr_un addr = {};
+  addr.sun_family = AF_UNIX;
+  memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+  char data = 'a';
+  ASSERT_THAT(
+      sendto(sockets->second_fd(), &data, sizeof(data), 0,
+             reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)),
+      SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllUnixDomainSockets, UnboundUnixStreamSocketPairTest,
+    ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>(
+        ApplyVec<SocketPairKind>(FilesystemUnboundUnixDomainSocketPair,
+                                 AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                                        List<int>{
+                                                            0, SOCK_NONBLOCK})),
+        ApplyVec<SocketPairKind>(
+            AbstractUnboundUnixDomainSocketPair,
+            AllBitwiseCombinations(List<int>{SOCK_STREAM},
+                                   List<int>{0, SOCK_NONBLOCK}))))));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc
new file mode 100644
index 000000000..08fc4b1b7
--- /dev/null
+++ b/test/syscalls/linux/splice.cc
@@ -0,0 +1,699 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+#include <sys/sendfile.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SpliceTest, TwoRegularFiles) {
+  // Create temp files.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Open the input file as read only.
+  const FileDescriptor in_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+  // Open the output file as write only.
+  const FileDescriptor out_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+  // Verify that it is rejected as expected; regardless of offsets.
+  loff_t in_offset = 0;
+  loff_t out_offset = 0;
+  EXPECT_THAT(splice(in_fd.get(), &in_offset, out_fd.get(), &out_offset, 1, 0),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(splice(in_fd.get(), nullptr, out_fd.get(), &out_offset, 1, 0),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(splice(in_fd.get(), &in_offset, out_fd.get(), nullptr, 1, 0),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(splice(in_fd.get(), nullptr, out_fd.get(), nullptr, 1, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+int memfd_create(const std::string& name, unsigned int flags) {
+  return syscall(__NR_memfd_create, name.c_str(), flags);
+}
+
+TEST(SpliceTest, NegativeOffset) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill the pipe.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Open the output file as write only.
+  int fd;
+  EXPECT_THAT(fd = memfd_create("negative", 0), SyscallSucceeds());
+  const FileDescriptor out_fd(fd);
+
+  loff_t out_offset = 0xffffffffffffffffull;
+  constexpr int kSize = 2;
+  EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kSize, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Write offset + size overflows int64.
+//
+// This is a regression test for b/148041624.
+TEST(SpliceTest, WriteOverflow) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill the pipe.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Open the output file.
+  int fd;
+  EXPECT_THAT(fd = memfd_create("overflow", 0), SyscallSucceeds());
+  const FileDescriptor out_fd(fd);
+
+  // out_offset + kSize overflows INT64_MAX.
+  loff_t out_offset = 0x7ffffffffffffffeull;
+  constexpr int kSize = 3;
+  EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kSize, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SpliceTest, SamePipe) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill the pipe.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Attempt to splice to itself.
+  EXPECT_THAT(splice(rfd.get(), nullptr, wfd.get(), nullptr, kPageSize, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TeeTest, SamePipe) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill the pipe.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Attempt to tee to itself.
+  EXPECT_THAT(tee(rfd.get(), wfd.get(), kPageSize, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TeeTest, RegularFile) {
+  // Open some file.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor in_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Attempt to tee from the file.
+  EXPECT_THAT(tee(in_fd.get(), wfd.get(), kPageSize, 0),
+              SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(tee(rfd.get(), in_fd.get(), kPageSize, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SpliceTest, PipeOffsets) {
+  // Create two new pipes.
+  int first[2], second[2];
+  ASSERT_THAT(pipe(first), SyscallSucceeds());
+  const FileDescriptor rfd1(first[0]);
+  const FileDescriptor wfd1(first[1]);
+  ASSERT_THAT(pipe(second), SyscallSucceeds());
+  const FileDescriptor rfd2(second[0]);
+  const FileDescriptor wfd2(second[1]);
+
+  // All pipe offsets should be rejected.
+  loff_t in_offset = 0;
+  loff_t out_offset = 0;
+  EXPECT_THAT(splice(rfd1.get(), &in_offset, wfd2.get(), &out_offset, 1, 0),
+              SyscallFailsWithErrno(ESPIPE));
+  EXPECT_THAT(splice(rfd1.get(), nullptr, wfd2.get(), &out_offset, 1, 0),
+              SyscallFailsWithErrno(ESPIPE));
+  EXPECT_THAT(splice(rfd1.get(), &in_offset, wfd2.get(), nullptr, 1, 0),
+              SyscallFailsWithErrno(ESPIPE));
+}
+
+// Event FDs may be used with splice without an offset.
+TEST(SpliceTest, FromEventFD) {
+  // Open the input eventfd with an initial value so that it is readable.
+  constexpr uint64_t kEventFDValue = 1;
+  int efd;
+  ASSERT_THAT(efd = eventfd(kEventFDValue, 0), SyscallSucceeds());
+  const FileDescriptor in_fd(efd);
+
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Splice 8-byte eventfd value to pipe.
+  constexpr int kEventFDSize = 8;
+  EXPECT_THAT(splice(in_fd.get(), nullptr, wfd.get(), nullptr, kEventFDSize, 0),
+              SyscallSucceedsWithValue(kEventFDSize));
+
+  // Contents should be equal.
+  std::vector<char> rbuf(kEventFDSize);
+  ASSERT_THAT(read(rfd.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kEventFDSize));
+  EXPECT_EQ(memcmp(rbuf.data(), &kEventFDValue, rbuf.size()), 0);
+}
+
+// Event FDs may not be used with splice with an offset.
+TEST(SpliceTest, FromEventFDOffset) {
+  int efd;
+  ASSERT_THAT(efd = eventfd(0, 0), SyscallSucceeds());
+  const FileDescriptor in_fd(efd);
+
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Attempt to splice 8-byte eventfd value to pipe with offset.
+  //
+  // This is not allowed because eventfd doesn't support pread.
+  constexpr int kEventFDSize = 8;
+  loff_t in_off = 0;
+  EXPECT_THAT(splice(in_fd.get(), &in_off, wfd.get(), nullptr, kEventFDSize, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Event FDs may not be used with splice with an offset.
+TEST(SpliceTest, ToEventFDOffset) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill with a value.
+  constexpr int kEventFDSize = 8;
+  std::vector<char> buf(kEventFDSize);
+  buf[0] = 1;
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kEventFDSize));
+
+  int efd;
+  ASSERT_THAT(efd = eventfd(0, 0), SyscallSucceeds());
+  const FileDescriptor out_fd(efd);
+
+  // Attempt to splice 8-byte eventfd value to pipe with offset.
+  //
+  // This is not allowed because eventfd doesn't support pwrite.
+  loff_t out_off = 0;
+  EXPECT_THAT(
+      splice(rfd.get(), nullptr, out_fd.get(), &out_off, kEventFDSize, 0),
+      SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SpliceTest, ToPipe) {
+  // Open the input file.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor in_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+
+  // Fill with some random data.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(in_fd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  ASSERT_THAT(lseek(in_fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Splice to the pipe.
+  EXPECT_THAT(splice(in_fd.get(), nullptr, wfd.get(), nullptr, kPageSize, 0),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Contents should be equal.
+  std::vector<char> rbuf(kPageSize);
+  ASSERT_THAT(read(rfd.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0);
+}
+
+TEST(SpliceTest, ToPipeOffset) {
+  // Open the input file.
+  const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor in_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+
+  // Fill with some random data.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(in_fd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Splice to the pipe.
+  loff_t in_offset = kPageSize / 2;
+  EXPECT_THAT(
+      splice(in_fd.get(), &in_offset, wfd.get(), nullptr, kPageSize / 2, 0),
+      SyscallSucceedsWithValue(kPageSize / 2));
+
+  // Contents should be equal to only the second part.
+  std::vector<char> rbuf(kPageSize / 2);
+  ASSERT_THAT(read(rfd.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize / 2));
+  EXPECT_EQ(memcmp(rbuf.data(), buf.data() + (kPageSize / 2), rbuf.size()), 0);
+}
+
+TEST(SpliceTest, FromPipe) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill with some random data.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Open the input file.
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor out_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR));
+
+  // Splice to the output file.
+  EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, kPageSize, 0),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // The offset of the output should be equal to kPageSize. We assert that and
+  // reset to zero so that we can read the contents and ensure they match.
+  EXPECT_THAT(lseek(out_fd.get(), 0, SEEK_CUR),
+              SyscallSucceedsWithValue(kPageSize));
+  ASSERT_THAT(lseek(out_fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+  // Contents should be equal.
+  std::vector<char> rbuf(kPageSize);
+  ASSERT_THAT(read(out_fd.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0);
+}
+
+TEST(SpliceTest, FromPipeOffset) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill with some random data.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Open the input file.
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor out_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR));
+
+  // Splice to the output file.
+  loff_t out_offset = kPageSize / 2;
+  EXPECT_THAT(
+      splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kPageSize, 0),
+      SyscallSucceedsWithValue(kPageSize));
+
+  // Content should reflect the splice. We write to a specific offset in the
+  // file, so the internals should now be allocated sparsely.
+  std::vector<char> rbuf(kPageSize);
+  ASSERT_THAT(read(out_fd.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  std::vector<char> zbuf(kPageSize / 2);
+  memset(zbuf.data(), 0, zbuf.size());
+  EXPECT_EQ(memcmp(rbuf.data(), zbuf.data(), zbuf.size()), 0);
+  EXPECT_EQ(memcmp(rbuf.data() + kPageSize / 2, buf.data(), kPageSize / 2), 0);
+}
+
+TEST(SpliceTest, TwoPipes) {
+  // Create two new pipes.
+  int first[2], second[2];
+  ASSERT_THAT(pipe(first), SyscallSucceeds());
+  const FileDescriptor rfd1(first[0]);
+  const FileDescriptor wfd1(first[1]);
+  ASSERT_THAT(pipe(second), SyscallSucceeds());
+  const FileDescriptor rfd2(second[0]);
+  const FileDescriptor wfd2(second[1]);
+
+  // Fill with some random data.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(wfd1.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Splice to the second pipe, using two operations.
+  EXPECT_THAT(
+      splice(rfd1.get(), nullptr, wfd2.get(), nullptr, kPageSize / 2, 0),
+      SyscallSucceedsWithValue(kPageSize / 2));
+  EXPECT_THAT(
+      splice(rfd1.get(), nullptr, wfd2.get(), nullptr, kPageSize / 2, 0),
+      SyscallSucceedsWithValue(kPageSize / 2));
+
+  // Content should reflect the splice.
+  std::vector<char> rbuf(kPageSize);
+  ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(memcmp(rbuf.data(), buf.data(), kPageSize), 0);
+}
+
+TEST(SpliceTest, TwoPipesCircular) {
+  // This test deadlocks the sentry on VFS1 because VFS1 splice ordering is
+  // based on fs.File.UniqueID, which does not prevent circular ordering between
+  // e.g. inode-level locks taken by fs.FileOperations.
+  SKIP_IF(IsRunningWithVFS1());
+
+  // Create two pipes.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor first_rfd(fds[0]);
+  const FileDescriptor first_wfd(fds[1]);
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor second_rfd(fds[0]);
+  const FileDescriptor second_wfd(fds[1]);
+
+  // On Linux, each pipe is normally limited to
+  // include/linux/pipe_fs_i.h:PIPE_DEF_BUFFERS buffers worth of data.
+  constexpr size_t PIPE_DEF_BUFFERS = 16;
+
+  // Write some data to each pipe. Below we splice 1 byte at a time between
+  // pipes, which very quickly causes each byte to be stored in a separate
+  // buffer, so we must ensure that the total amount of data in the system is <=
+  // PIPE_DEF_BUFFERS bytes.
+  std::vector<char> buf(PIPE_DEF_BUFFERS / 2);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(first_wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+  ASSERT_THAT(write(second_wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  // Have another thread splice from the second pipe to the first, while we
+  // splice from the first to the second. The test passes if this does not
+  // deadlock.
+  const int kIterations = 1000;
+  DisableSave ds;
+  ScopedThread t([&]() {
+    for (int i = 0; i < kIterations; i++) {
+      ASSERT_THAT(
+          splice(second_rfd.get(), nullptr, first_wfd.get(), nullptr, 1, 0),
+          SyscallSucceedsWithValue(1));
+    }
+  });
+  for (int i = 0; i < kIterations; i++) {
+    ASSERT_THAT(
+        splice(first_rfd.get(), nullptr, second_wfd.get(), nullptr, 1, 0),
+        SyscallSucceedsWithValue(1));
+  }
+}
+
+TEST(SpliceTest, Blocking) {
+  // Create two new pipes.
+  int first[2], second[2];
+  ASSERT_THAT(pipe(first), SyscallSucceeds());
+  const FileDescriptor rfd1(first[0]);
+  const FileDescriptor wfd1(first[1]);
+  ASSERT_THAT(pipe(second), SyscallSucceeds());
+  const FileDescriptor rfd2(second[0]);
+  const FileDescriptor wfd2(second[1]);
+
+  // This thread writes to the main pipe.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ScopedThread t([&]() {
+    ASSERT_THAT(write(wfd1.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(kPageSize));
+  });
+
+  // Attempt a splice immediately; it should block.
+  EXPECT_THAT(splice(rfd1.get(), nullptr, wfd2.get(), nullptr, kPageSize, 0),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Thread should be joinable.
+  t.Join();
+
+  // Content should reflect the splice.
+  std::vector<char> rbuf(kPageSize);
+  ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(memcmp(rbuf.data(), buf.data(), kPageSize), 0);
+}
+
+TEST(TeeTest, Blocking) {
+  // Create two new pipes.
+  int first[2], second[2];
+  ASSERT_THAT(pipe(first), SyscallSucceeds());
+  const FileDescriptor rfd1(first[0]);
+  const FileDescriptor wfd1(first[1]);
+  ASSERT_THAT(pipe(second), SyscallSucceeds());
+  const FileDescriptor rfd2(second[0]);
+  const FileDescriptor wfd2(second[1]);
+
+  // This thread writes to the main pipe.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ScopedThread t([&]() {
+    ASSERT_THAT(write(wfd1.get(), buf.data(), buf.size()),
+                SyscallSucceedsWithValue(kPageSize));
+  });
+
+  // Attempt a tee immediately; it should block.
+  EXPECT_THAT(tee(rfd1.get(), wfd2.get(), kPageSize, 0),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Thread should be joinable.
+  t.Join();
+
+  // Content should reflect the splice, in both pipes.
+  std::vector<char> rbuf(kPageSize);
+  ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(memcmp(rbuf.data(), buf.data(), kPageSize), 0);
+  ASSERT_THAT(read(rfd1.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(memcmp(rbuf.data(), buf.data(), kPageSize), 0);
+}
+
+TEST(TeeTest, BlockingWrite) {
+  // Create two new pipes.
+  int first[2], second[2];
+  ASSERT_THAT(pipe(first), SyscallSucceeds());
+  const FileDescriptor rfd1(first[0]);
+  const FileDescriptor wfd1(first[1]);
+  ASSERT_THAT(pipe(second), SyscallSucceeds());
+  const FileDescriptor rfd2(second[0]);
+  const FileDescriptor wfd2(second[1]);
+
+  // Make some data available to be read.
+  std::vector<char> buf1(kPageSize);
+  RandomizeBuffer(buf1.data(), buf1.size());
+  ASSERT_THAT(write(wfd1.get(), buf1.data(), buf1.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Fill up the write pipe's buffer.
+  int pipe_size = -1;
+  ASSERT_THAT(pipe_size = fcntl(wfd2.get(), F_GETPIPE_SZ), SyscallSucceeds());
+  std::vector<char> buf2(pipe_size);
+  ASSERT_THAT(write(wfd2.get(), buf2.data(), buf2.size()),
+              SyscallSucceedsWithValue(pipe_size));
+
+  ScopedThread t([&]() {
+    absl::SleepFor(absl::Milliseconds(100));
+    ASSERT_THAT(read(rfd2.get(), buf2.data(), buf2.size()),
+                SyscallSucceedsWithValue(pipe_size));
+  });
+
+  // Attempt a tee immediately; it should block.
+  EXPECT_THAT(tee(rfd1.get(), wfd2.get(), kPageSize, 0),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Thread should be joinable.
+  t.Join();
+
+  // Content should reflect the tee.
+  std::vector<char> rbuf(kPageSize);
+  ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(memcmp(rbuf.data(), buf1.data(), kPageSize), 0);
+}
+
+TEST(SpliceTest, NonBlocking) {
+  // Create two new pipes.
+  int first[2], second[2];
+  ASSERT_THAT(pipe(first), SyscallSucceeds());
+  const FileDescriptor rfd1(first[0]);
+  const FileDescriptor wfd1(first[1]);
+  ASSERT_THAT(pipe(second), SyscallSucceeds());
+  const FileDescriptor rfd2(second[0]);
+  const FileDescriptor wfd2(second[1]);
+
+  // Splice with no data to back it.
+  EXPECT_THAT(splice(rfd1.get(), nullptr, wfd2.get(), nullptr, kPageSize,
+                     SPLICE_F_NONBLOCK),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(TeeTest, NonBlocking) {
+  // Create two new pipes.
+  int first[2], second[2];
+  ASSERT_THAT(pipe(first), SyscallSucceeds());
+  const FileDescriptor rfd1(first[0]);
+  const FileDescriptor wfd1(first[1]);
+  ASSERT_THAT(pipe(second), SyscallSucceeds());
+  const FileDescriptor rfd2(second[0]);
+  const FileDescriptor wfd2(second[1]);
+
+  // Splice with no data to back it.
+  EXPECT_THAT(tee(rfd1.get(), wfd2.get(), kPageSize, SPLICE_F_NONBLOCK),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(TeeTest, MultiPage) {
+  // Create two new pipes.
+  int first[2], second[2];
+  ASSERT_THAT(pipe(first), SyscallSucceeds());
+  const FileDescriptor rfd1(first[0]);
+  const FileDescriptor wfd1(first[1]);
+  ASSERT_THAT(pipe(second), SyscallSucceeds());
+  const FileDescriptor rfd2(second[0]);
+  const FileDescriptor wfd2(second[1]);
+
+  // Make some data available to be read.
+  std::vector<char> wbuf(8 * kPageSize);
+  RandomizeBuffer(wbuf.data(), wbuf.size());
+  ASSERT_THAT(write(wfd1.get(), wbuf.data(), wbuf.size()),
+              SyscallSucceedsWithValue(wbuf.size()));
+
+  // Attempt a tee immediately; it should complete.
+  EXPECT_THAT(tee(rfd1.get(), wfd2.get(), wbuf.size(), 0),
+              SyscallSucceedsWithValue(wbuf.size()));
+
+  // Content should reflect the tee.
+  std::vector<char> rbuf(wbuf.size());
+  ASSERT_THAT(read(rfd2.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(rbuf.size()));
+  EXPECT_EQ(memcmp(rbuf.data(), wbuf.data(), rbuf.size()), 0);
+  ASSERT_THAT(read(rfd1.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(rbuf.size()));
+  EXPECT_EQ(memcmp(rbuf.data(), wbuf.data(), rbuf.size()), 0);
+}
+
+TEST(SpliceTest, FromPipeMaxFileSize) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill with some random data.
+  std::vector<char> buf(kPageSize);
+  RandomizeBuffer(buf.data(), buf.size());
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+
+  // Open the input file.
+  const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor out_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR));
+
+  EXPECT_THAT(ftruncate(out_fd.get(), 13 << 20), SyscallSucceeds());
+  EXPECT_THAT(lseek(out_fd.get(), 0, SEEK_END),
+              SyscallSucceedsWithValue(13 << 20));
+
+  // Set our file size limit.
+  sigset_t set;
+  sigemptyset(&set);
+  sigaddset(&set, SIGXFSZ);
+  TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+  rlimit rlim = {};
+  rlim.rlim_cur = rlim.rlim_max = (13 << 20);
+  EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &rlim), SyscallSucceeds());
+
+  // Splice to the output file.
+  EXPECT_THAT(
+      splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3 * kPageSize, 0),
+      SyscallFailsWithErrno(EFBIG));
+
+  // Contents should be equal.
+  std::vector<char> rbuf(kPageSize);
+  ASSERT_THAT(read(rfd.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kPageSize));
+  EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc
new file mode 100644
index 000000000..2503960f3
--- /dev/null
+++ b/test/syscalls/linux/stat.cc
@@ -0,0 +1,720 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+#ifndef AT_STATX_FORCE_SYNC
+#define AT_STATX_FORCE_SYNC 0x2000
+#endif
+#ifndef AT_STATX_DONT_SYNC
+#define AT_STATX_DONT_SYNC 0x4000
+#endif
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class StatTest : public FileTest {};
+
+TEST_F(StatTest, FstatatAbs) {
+  struct stat st;
+
+  // Check that the stat works.
+  EXPECT_THAT(fstatat(AT_FDCWD, test_file_name_.c_str(), &st, 0),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(st.st_mode));
+}
+
+TEST_F(StatTest, FstatatEmptyPath) {
+  struct stat st;
+  const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+  // Check that the stat works.
+  EXPECT_THAT(fstatat(fd.get(), "", &st, AT_EMPTY_PATH), SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(st.st_mode));
+}
+
+TEST_F(StatTest, FstatatRel) {
+  struct stat st;
+  int dirfd;
+  auto filename = std::string(Basename(test_file_name_));
+
+  // Open the temporary directory read-only.
+  ASSERT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY),
+              SyscallSucceeds());
+
+  // Check that the stat works.
+  EXPECT_THAT(fstatat(dirfd, filename.c_str(), &st, 0), SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(st.st_mode));
+  close(dirfd);
+}
+
+TEST_F(StatTest, FstatatSymlink) {
+  struct stat st;
+
+  // Check that the link is followed.
+  EXPECT_THAT(fstatat(AT_FDCWD, "/proc/self", &st, 0), SyscallSucceeds());
+  EXPECT_TRUE(S_ISDIR(st.st_mode));
+  EXPECT_FALSE(S_ISLNK(st.st_mode));
+
+  // Check that the flag works.
+  EXPECT_THAT(fstatat(AT_FDCWD, "/proc/self", &st, AT_SYMLINK_NOFOLLOW),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISLNK(st.st_mode));
+  EXPECT_FALSE(S_ISDIR(st.st_mode));
+}
+
+TEST_F(StatTest, Nlinks) {
+  TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  // Directory is initially empty, it should contain 2 links (one from itself,
+  // one from ".").
+  EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2));
+
+  // Create a file in the test directory. Files shouldn't increase the link
+  // count on the base directory.
+  TempPath file1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path()));
+  EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2));
+
+  // Create subdirectories. This should increase the link count by 1 per
+  // subdirectory.
+  TempPath dir1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path()));
+  EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(3));
+  TempPath dir2 =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path()));
+  EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(4));
+
+  // Removing directories should reduce the link count.
+  dir1.reset();
+  EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(3));
+  dir2.reset();
+  EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2));
+
+  // Removing files should have no effect on link count.
+  file1.reset();
+  EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2));
+}
+
+TEST_F(StatTest, BlocksIncreaseOnWrite) {
+  struct stat st;
+
+  // Stat the empty file.
+  ASSERT_THAT(fstat(test_file_fd_.get(), &st), SyscallSucceeds());
+
+  const int initial_blocks = st.st_blocks;
+
+  // Write to the file, making sure to exceed the block size.
+  std::vector<char> buf(2 * st.st_blksize, 'a');
+  ASSERT_THAT(write(test_file_fd_.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  // Stat the file again, and verify that number of allocated blocks has
+  // increased.
+  ASSERT_THAT(fstat(test_file_fd_.get(), &st), SyscallSucceeds());
+  EXPECT_GT(st.st_blocks, initial_blocks);
+}
+
+TEST_F(StatTest, PathNotCleaned) {
+  TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  // Create a file in the basedir.
+  TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path()));
+
+  // Stating the file directly should succeed.
+  struct stat buf;
+  EXPECT_THAT(lstat(file.path().c_str(), &buf), SyscallSucceeds());
+
+  // Try to stat the file using a directory that does not exist followed by
+  // "..".  If the path is cleaned prior to stating (which it should not be)
+  // then this will succeed.
+  const std::string bad_path = JoinPath("/does_not_exist/..", file.path());
+  EXPECT_THAT(lstat(bad_path.c_str(), &buf), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_F(StatTest, PathCanContainDotDot) {
+  TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath subdir =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path()));
+  const std::string subdir_name = std::string(Basename(subdir.path()));
+
+  // Create a file in the subdir.
+  TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(subdir.path()));
+  const std::string file_name = std::string(Basename(file.path()));
+
+  // Stat the file through a path that includes '..' and '.' but still resolves
+  // to the file.
+  const std::string good_path =
+      JoinPath(basedir.path(), subdir_name, "..", subdir_name, ".", file_name);
+  struct stat buf;
+  EXPECT_THAT(lstat(good_path.c_str(), &buf), SyscallSucceeds());
+}
+
+TEST_F(StatTest, PathCanContainEmptyComponent) {
+  TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  // Create a file in the basedir.
+  TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path()));
+  const std::string file_name = std::string(Basename(file.path()));
+
+  // Stat the file through a path that includes an empty component.  We have to
+  // build this ourselves because JoinPath automatically removes empty
+  // components.
+  const std::string good_path = absl::StrCat(basedir.path(), "//", file_name);
+  struct stat buf;
+  EXPECT_THAT(lstat(good_path.c_str(), &buf), SyscallSucceeds());
+}
+
+TEST_F(StatTest, TrailingSlashNotCleanedReturnsENOTDIR) {
+  TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  // Create a file in the basedir.
+  TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path()));
+
+  // Stat the file with an extra "/" on the end of it.  Since file is not a
+  // directory, this should return ENOTDIR.
+  const std::string bad_path = absl::StrCat(file.path(), "/");
+  struct stat buf;
+  EXPECT_THAT(lstat(bad_path.c_str(), &buf), SyscallFailsWithErrno(ENOTDIR));
+}
+
+// Test fstatating a symlink directory.
+TEST_F(StatTest, FstatatSymlinkDir) {
+  // Create a directory and symlink to it.
+  const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  const std::string symlink_to_dir = NewTempAbsPath();
+  EXPECT_THAT(symlink(dir.path().c_str(), symlink_to_dir.c_str()),
+              SyscallSucceeds());
+  auto cleanup = Cleanup([&symlink_to_dir]() {
+    EXPECT_THAT(unlink(symlink_to_dir.c_str()), SyscallSucceeds());
+  });
+
+  // Fstatat the link with AT_SYMLINK_NOFOLLOW should return symlink data.
+  struct stat st = {};
+  EXPECT_THAT(
+      fstatat(AT_FDCWD, symlink_to_dir.c_str(), &st, AT_SYMLINK_NOFOLLOW),
+      SyscallSucceeds());
+  EXPECT_FALSE(S_ISDIR(st.st_mode));
+  EXPECT_TRUE(S_ISLNK(st.st_mode));
+
+  // Fstatat the link should return dir data.
+  EXPECT_THAT(fstatat(AT_FDCWD, symlink_to_dir.c_str(), &st, 0),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISDIR(st.st_mode));
+  EXPECT_FALSE(S_ISLNK(st.st_mode));
+}
+
+// Test fstatating a symlink directory with trailing slash.
+TEST_F(StatTest, FstatatSymlinkDirWithTrailingSlash) {
+  // Create a directory and symlink to it.
+  const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string symlink_to_dir = NewTempAbsPath();
+  EXPECT_THAT(symlink(dir.path().c_str(), symlink_to_dir.c_str()),
+              SyscallSucceeds());
+  auto cleanup = Cleanup([&symlink_to_dir]() {
+    EXPECT_THAT(unlink(symlink_to_dir.c_str()), SyscallSucceeds());
+  });
+
+  // Fstatat on the symlink with a trailing slash should return the directory
+  // data.
+  struct stat st = {};
+  EXPECT_THAT(
+      fstatat(AT_FDCWD, absl::StrCat(symlink_to_dir, "/").c_str(), &st, 0),
+      SyscallSucceeds());
+  EXPECT_TRUE(S_ISDIR(st.st_mode));
+  EXPECT_FALSE(S_ISLNK(st.st_mode));
+
+  // Fstatat on the symlink with a trailing slash with AT_SYMLINK_NOFOLLOW
+  // should return the directory data.
+  // Symlink to directory with trailing slash will ignore AT_SYMLINK_NOFOLLOW.
+  EXPECT_THAT(fstatat(AT_FDCWD, absl::StrCat(symlink_to_dir, "/").c_str(), &st,
+                      AT_SYMLINK_NOFOLLOW),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISDIR(st.st_mode));
+  EXPECT_FALSE(S_ISLNK(st.st_mode));
+}
+
+// Test fstatating a symlink directory with a trailing slash
+// should return same stat data with fstatating directory.
+TEST_F(StatTest, FstatatSymlinkDirWithTrailingSlashSameInode) {
+  // Create a directory and symlink to it.
+  const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  // We are going to assert that the symlink inode id is the same as the linked
+  // dir's inode id. In order for the inode id to be stable across
+  // save/restore, it must be kept open. The FileDescriptor type will do that
+  // for us automatically.
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY));
+
+  const std::string symlink_to_dir = NewTempAbsPath();
+  EXPECT_THAT(symlink(dir.path().c_str(), symlink_to_dir.c_str()),
+              SyscallSucceeds());
+  auto cleanup = Cleanup([&symlink_to_dir]() {
+    EXPECT_THAT(unlink(symlink_to_dir.c_str()), SyscallSucceeds());
+  });
+
+  // Fstatat on the symlink with a trailing slash should return the directory
+  // data.
+  struct stat st = {};
+  EXPECT_THAT(fstatat(AT_FDCWD, absl::StrCat(symlink_to_dir, "/").c_str(), &st,
+                      AT_SYMLINK_NOFOLLOW),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISDIR(st.st_mode));
+
+  // Dir and symlink should point to same inode.
+  struct stat st_dir = {};
+  EXPECT_THAT(
+      fstatat(AT_FDCWD, dir.path().c_str(), &st_dir, AT_SYMLINK_NOFOLLOW),
+      SyscallSucceeds());
+  EXPECT_EQ(st.st_ino, st_dir.st_ino);
+}
+
+TEST_F(StatTest, LeadingDoubleSlash) {
+  // Create a file, and make sure we can stat it.
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  struct stat st;
+  ASSERT_THAT(lstat(file.path().c_str(), &st), SyscallSucceeds());
+
+  // Now add an extra leading slash.
+  const std::string double_slash_path = absl::StrCat("/", file.path());
+  ASSERT_TRUE(absl::StartsWith(double_slash_path, "//"));
+
+  // We should be able to stat the new path, and it should resolve to the same
+  // file (same device and inode).
+  struct stat double_slash_st;
+  ASSERT_THAT(lstat(double_slash_path.c_str(), &double_slash_st),
+              SyscallSucceeds());
+  EXPECT_EQ(st.st_dev, double_slash_st.st_dev);
+  EXPECT_EQ(st.st_ino, double_slash_st.st_ino);
+}
+
+// Test that a rename doesn't change the underlying file.
+TEST_F(StatTest, StatDoesntChangeAfterRename) {
+  const TempPath old_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath new_path(NewTempAbsPath());
+
+  struct stat st_old = {};
+  struct stat st_new = {};
+
+  ASSERT_THAT(stat(old_dir.path().c_str(), &st_old), SyscallSucceeds());
+  ASSERT_THAT(rename(old_dir.path().c_str(), new_path.path().c_str()),
+              SyscallSucceeds());
+  ASSERT_THAT(stat(new_path.path().c_str(), &st_new), SyscallSucceeds());
+
+  EXPECT_EQ(st_old.st_nlink, st_new.st_nlink);
+  EXPECT_EQ(st_old.st_dev, st_new.st_dev);
+  EXPECT_EQ(st_old.st_ino, st_new.st_ino);
+  EXPECT_EQ(st_old.st_mode, st_new.st_mode);
+  EXPECT_EQ(st_old.st_uid, st_new.st_uid);
+  EXPECT_EQ(st_old.st_gid, st_new.st_gid);
+  EXPECT_EQ(st_old.st_size, st_new.st_size);
+}
+
+// Test link counts with a regular file as the child.
+TEST_F(StatTest, LinkCountsWithRegularFileChild) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  struct stat st_parent_before = {};
+  ASSERT_THAT(stat(dir.path().c_str(), &st_parent_before), SyscallSucceeds());
+  EXPECT_EQ(st_parent_before.st_nlink, 2);
+
+  // Adding a regular file doesn't adjust the parent's link count.
+  const TempPath child =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+  struct stat st_parent_after = {};
+  ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds());
+  EXPECT_EQ(st_parent_after.st_nlink, 2);
+
+  // The child should have a single link from the parent.
+  struct stat st_child = {};
+  ASSERT_THAT(stat(child.path().c_str(), &st_child), SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(st_child.st_mode));
+  EXPECT_EQ(st_child.st_nlink, 1);
+
+  // Finally unlinking the child should not affect the parent's link count.
+  ASSERT_THAT(unlink(child.path().c_str()), SyscallSucceeds());
+  ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds());
+  EXPECT_EQ(st_parent_after.st_nlink, 2);
+}
+
+// This test verifies that inodes remain around when there is an open fd
+// after link count hits 0.
+TEST_F(StatTest, ZeroLinksOpenFdRegularFileChild_NoRandomSave) {
+  // Setting the enviornment variable GVISOR_GOFER_UNCACHED to any value
+  // will prevent this test from running, see the tmpfs lifecycle.
+  //
+  // We need to support this because when a file is unlinked and we forward
+  // the stat to the gofer it would return ENOENT.
+  const char* uncached_gofer = getenv("GVISOR_GOFER_UNCACHED");
+  SKIP_IF(uncached_gofer != nullptr);
+
+  // We don't support saving unlinked files.
+  const DisableSave ds;
+
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      dir.path(), "hello", TempPath::kDefaultFileMode));
+
+  // The child should have a single link from the parent.
+  struct stat st_child_before = {};
+  ASSERT_THAT(stat(child.path().c_str(), &st_child_before), SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(st_child_before.st_mode));
+  EXPECT_EQ(st_child_before.st_nlink, 1);
+  EXPECT_EQ(st_child_before.st_size, 5);  // Hello is 5 bytes.
+
+  // Open the file so we can fstat after unlinking.
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(child.path(), O_RDONLY));
+
+  // Now a stat should return ENOENT but we should still be able to stat
+  // via the open fd and fstat.
+  ASSERT_THAT(unlink(child.path().c_str()), SyscallSucceeds());
+
+  // Since the file has no more links stat should fail.
+  struct stat st_child_after = {};
+  ASSERT_THAT(stat(child.path().c_str(), &st_child_after),
+              SyscallFailsWithErrno(ENOENT));
+
+  // Fstat should still allow us to access the same file via the fd.
+  struct stat st_child_fd = {};
+  ASSERT_THAT(fstat(fd.get(), &st_child_fd), SyscallSucceeds());
+  EXPECT_EQ(st_child_before.st_dev, st_child_fd.st_dev);
+  EXPECT_EQ(st_child_before.st_ino, st_child_fd.st_ino);
+  EXPECT_EQ(st_child_before.st_mode, st_child_fd.st_mode);
+  EXPECT_EQ(st_child_before.st_uid, st_child_fd.st_uid);
+  EXPECT_EQ(st_child_before.st_gid, st_child_fd.st_gid);
+  EXPECT_EQ(st_child_before.st_size, st_child_fd.st_size);
+
+  // TODO(b/34861058): This isn't ideal but since fstatfs(2) will always return
+  // OVERLAYFS_SUPER_MAGIC we have no way to know if this fs is backed by a
+  // gofer which doesn't support links.
+  EXPECT_TRUE(st_child_fd.st_nlink == 0 || st_child_fd.st_nlink == 1);
+}
+
+// Test link counts with a directory as the child.
+TEST_F(StatTest, LinkCountsWithDirChild) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  // Before a child is added the two links are "." and the link from the parent.
+  struct stat st_parent_before = {};
+  ASSERT_THAT(stat(dir.path().c_str(), &st_parent_before), SyscallSucceeds());
+  EXPECT_EQ(st_parent_before.st_nlink, 2);
+
+  // Create a subdirectory and stat for the parent link counts.
+  const TempPath sub_dir =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+
+  // The three links are ".", the link from the parent, and the link from
+  // the child as "..".
+  struct stat st_parent_after = {};
+  ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds());
+  EXPECT_EQ(st_parent_after.st_nlink, 3);
+
+  // The child will have 1 link from the parent and 1 link which represents ".".
+  struct stat st_child = {};
+  ASSERT_THAT(stat(sub_dir.path().c_str(), &st_child), SyscallSucceeds());
+  EXPECT_TRUE(S_ISDIR(st_child.st_mode));
+  EXPECT_EQ(st_child.st_nlink, 2);
+
+  // Finally delete the child dir and the parent link count should return to 2.
+  ASSERT_THAT(rmdir(sub_dir.path().c_str()), SyscallSucceeds());
+  ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds());
+
+  // Now we should only have links from the parent and "." since the subdir
+  // has been removed.
+  EXPECT_EQ(st_parent_after.st_nlink, 2);
+}
+
+// Test statting a child of a non-directory.
+TEST_F(StatTest, ChildOfNonDir) {
+  // Create a path that has a child of a regular file.
+  const std::string filename = JoinPath(test_file_name_, "child");
+
+  // Statting the path should return ENOTDIR.
+  struct stat st;
+  EXPECT_THAT(lstat(filename.c_str(), &st), SyscallFailsWithErrno(ENOTDIR));
+}
+
+// Test lstating a symlink directory.
+TEST_F(StatTest, LstatSymlinkDir) {
+  // Create a directory and symlink to it.
+  const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string symlink_to_dir = NewTempAbsPath();
+  EXPECT_THAT(symlink(dir.path().c_str(), symlink_to_dir.c_str()),
+              SyscallSucceeds());
+  auto cleanup = Cleanup([&symlink_to_dir]() {
+    EXPECT_THAT(unlink(symlink_to_dir.c_str()), SyscallSucceeds());
+  });
+
+  // Lstat on the symlink should return symlink data.
+  struct stat st = {};
+  ASSERT_THAT(lstat(symlink_to_dir.c_str(), &st), SyscallSucceeds());
+  EXPECT_FALSE(S_ISDIR(st.st_mode));
+  EXPECT_TRUE(S_ISLNK(st.st_mode));
+
+  // Lstat on the symlink with a trailing slash should return the directory
+  // data.
+  ASSERT_THAT(lstat(absl::StrCat(symlink_to_dir, "/").c_str(), &st),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISDIR(st.st_mode));
+  EXPECT_FALSE(S_ISLNK(st.st_mode));
+}
+
+// Verify that we get an ELOOP from too many symbolic links even when there
+// are directories in the middle.
+TEST_F(StatTest, LstatELOOPPath) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  std::string subdir_base = "subdir";
+  ASSERT_THAT(mkdir(JoinPath(dir.path(), subdir_base).c_str(), 0755),
+              SyscallSucceeds());
+
+  std::string target = JoinPath(dir.path(), subdir_base, subdir_base);
+  std::string dst = JoinPath("..", subdir_base);
+  ASSERT_THAT(symlink(dst.c_str(), target.c_str()), SyscallSucceeds());
+  auto cleanup = Cleanup(
+      [&target]() { EXPECT_THAT(unlink(target.c_str()), SyscallSucceeds()); });
+
+  // Now build a path which is /subdir/subdir/... repeated many times so that
+  // we can build a path that is shorter than PATH_MAX but can still cause
+  // too many symbolic links. Note: Every other subdir is actually a directory
+  // so we're not in a situation where it's a -> b -> a -> b, where a and b
+  // are symbolic links.
+  std::string path = dir.path();
+  std::string subdir_append = absl::StrCat("/", subdir_base);
+  do {
+    absl::StrAppend(&path, subdir_append);
+    // Keep appending /subdir until we would overflow PATH_MAX.
+  } while ((path.size() + subdir_append.size()) < PATH_MAX);
+
+  struct stat s = {};
+  ASSERT_THAT(lstat(path.c_str(), &s), SyscallFailsWithErrno(ELOOP));
+}
+
+// Ensure that inode allocation for anonymous devices work correctly across
+// save/restore. In particular, inode numbers should be unique across S/R.
+TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) {
+  // Use sockets as a convenient way to create inodes on an anonymous device.
+  int fd;
+  ASSERT_THAT(fd = socket(AF_UNIX, SOCK_STREAM, 0), SyscallSucceeds());
+  FileDescriptor fd1(fd);
+  MaybeSave();
+  ASSERT_THAT(fd = socket(AF_UNIX, SOCK_STREAM, 0), SyscallSucceeds());
+  FileDescriptor fd2(fd);
+
+  struct stat st1;
+  struct stat st2;
+  ASSERT_THAT(fstat(fd1.get(), &st1), SyscallSucceeds());
+  ASSERT_THAT(fstat(fd2.get(), &st2), SyscallSucceeds());
+
+  // The two fds should have different inode numbers.
+  EXPECT_NE(st2.st_ino, st1.st_ino);
+
+  // Verify again after another S/R cycle. The inode numbers should remain the
+  // same.
+  MaybeSave();
+
+  struct stat st1_after;
+  struct stat st2_after;
+  ASSERT_THAT(fstat(fd1.get(), &st1_after), SyscallSucceeds());
+  ASSERT_THAT(fstat(fd2.get(), &st2_after), SyscallSucceeds());
+
+  EXPECT_EQ(st1_after.st_ino, st1.st_ino);
+  EXPECT_EQ(st2_after.st_ino, st2.st_ino);
+}
+
+#ifndef SYS_statx
+#if defined(__x86_64__)
+#define SYS_statx 332
+#elif defined(__aarch64__)
+#define SYS_statx 291
+#else
+#error "Unknown architecture"
+#endif
+#endif  // SYS_statx
+
+#ifndef STATX_ALL
+#define STATX_ALL 0x00000fffU
+#endif  // STATX_ALL
+
+// struct kernel_statx_timestamp is a Linux statx_timestamp struct.
+struct kernel_statx_timestamp {
+  int64_t tv_sec;
+  uint32_t tv_nsec;
+  int32_t __reserved;
+};
+
+// struct kernel_statx is a Linux statx struct. Old versions of glibc do not
+// expose it. See include/uapi/linux/stat.h
+struct kernel_statx {
+  uint32_t stx_mask;
+  uint32_t stx_blksize;
+  uint64_t stx_attributes;
+  uint32_t stx_nlink;
+  uint32_t stx_uid;
+  uint32_t stx_gid;
+  uint16_t stx_mode;
+  uint16_t __spare0[1];
+  uint64_t stx_ino;
+  uint64_t stx_size;
+  uint64_t stx_blocks;
+  uint64_t stx_attributes_mask;
+  struct kernel_statx_timestamp stx_atime;
+  struct kernel_statx_timestamp stx_btime;
+  struct kernel_statx_timestamp stx_ctime;
+  struct kernel_statx_timestamp stx_mtime;
+  uint32_t stx_rdev_major;
+  uint32_t stx_rdev_minor;
+  uint32_t stx_dev_major;
+  uint32_t stx_dev_minor;
+  uint64_t __spare2[14];
+};
+
+int statx(int dirfd, const char* pathname, int flags, unsigned int mask,
+          struct kernel_statx* statxbuf) {
+  return syscall(SYS_statx, dirfd, pathname, flags, mask, statxbuf);
+}
+
+TEST_F(StatTest, StatxAbsPath) {
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+          errno == ENOSYS);
+
+  struct kernel_statx stx;
+  EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, STATX_ALL, &stx),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxRelPathDirFD) {
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+          errno == ENOSYS);
+
+  struct kernel_statx stx;
+  auto const dirfd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY));
+  auto filename = std::string(Basename(test_file_name_));
+
+  EXPECT_THAT(statx(dirfd.get(), filename.c_str(), 0, STATX_ALL, &stx),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxRelPathCwd) {
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+          errno == ENOSYS);
+
+  ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+  auto filename = std::string(Basename(test_file_name_));
+  struct kernel_statx stx;
+  EXPECT_THAT(statx(AT_FDCWD, filename.c_str(), 0, STATX_ALL, &stx),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxEmptyPath) {
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+          errno == ENOSYS);
+
+  const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+  struct kernel_statx stx;
+  EXPECT_THAT(statx(fd.get(), "", AT_EMPTY_PATH, STATX_ALL, &stx),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxDoesNotRejectExtraneousMaskBits) {
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+          errno == ENOSYS);
+
+  struct kernel_statx stx;
+  // Set all mask bits except for STATX__RESERVED.
+  uint mask = 0xffffffff & ~0x80000000;
+  EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, mask, &stx),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxRejectsReservedMaskBit) {
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+          errno == ENOSYS);
+
+  struct kernel_statx stx;
+  // Set STATX__RESERVED in the mask.
+  EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, 0x80000000, &stx),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(StatTest, StatxSymlink) {
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+          errno == ENOSYS);
+
+  std::string parent_dir = "/tmp";
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(parent_dir, test_file_name_));
+  std::string p = link.path();
+
+  struct kernel_statx stx;
+  EXPECT_THAT(statx(AT_FDCWD, p.c_str(), AT_SYMLINK_NOFOLLOW, STATX_ALL, &stx),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISLNK(stx.stx_mode));
+  EXPECT_THAT(statx(AT_FDCWD, p.c_str(), 0, STATX_ALL, &stx),
+              SyscallSucceeds());
+  EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxInvalidFlags) {
+  SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 &&
+          errno == ENOSYS);
+
+  struct kernel_statx stx;
+  EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), 12345, 0, &stx),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Sync flags are mutually exclusive.
+  EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(),
+                    AT_STATX_FORCE_SYNC | AT_STATX_DONT_SYNC, 0, &stx),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/stat_times.cc b/test/syscalls/linux/stat_times.cc
new file mode 100644
index 000000000..68c0bef09
--- /dev/null
+++ b/test/syscalls/linux/stat_times.cc
@@ -0,0 +1,303 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include <tuple>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::IsEmpty;
+using ::testing::Not;
+
+std::tuple<absl::Time, absl::Time, absl::Time> GetTime(const TempPath& file) {
+  struct stat statbuf = {};
+  EXPECT_THAT(stat(file.path().c_str(), &statbuf), SyscallSucceeds());
+
+  const auto atime = absl::TimeFromTimespec(statbuf.st_atim);
+  const auto mtime = absl::TimeFromTimespec(statbuf.st_mtim);
+  const auto ctime = absl::TimeFromTimespec(statbuf.st_ctim);
+  return std::make_tuple(atime, mtime, ctime);
+}
+
+enum class AtimeEffect {
+  Unchanged,
+  Changed,
+};
+
+enum class MtimeEffect {
+  Unchanged,
+  Changed,
+};
+
+enum class CtimeEffect {
+  Unchanged,
+  Changed,
+};
+
+// Tests that fn modifies the atime/mtime/ctime of path as specified.
+void CheckTimes(const TempPath& path, std::function<void()> fn,
+                AtimeEffect atime_effect, MtimeEffect mtime_effect,
+                CtimeEffect ctime_effect) {
+  absl::Time atime, mtime, ctime;
+  std::tie(atime, mtime, ctime) = GetTime(path);
+
+  // FIXME(b/132819225): gVisor filesystem timestamps inconsistently use the
+  // internal or host clock, which may diverge slightly. Allow some slack on
+  // times to account for the difference.
+  //
+  // Here we sleep for 1s so that initial creation of path doesn't fall within
+  // the before slack window.
+  absl::SleepFor(absl::Seconds(1));
+
+  const absl::Time before = absl::Now() - absl::Seconds(1);
+
+  // Perform the op.
+  fn();
+
+  const absl::Time after = absl::Now() + absl::Seconds(1);
+
+  absl::Time atime2, mtime2, ctime2;
+  std::tie(atime2, mtime2, ctime2) = GetTime(path);
+
+  if (atime_effect == AtimeEffect::Changed) {
+    EXPECT_LE(before, atime2);
+    EXPECT_GE(after, atime2);
+    EXPECT_GT(atime2, atime);
+  } else {
+    EXPECT_EQ(atime2, atime);
+  }
+
+  if (mtime_effect == MtimeEffect::Changed) {
+    EXPECT_LE(before, mtime2);
+    EXPECT_GE(after, mtime2);
+    EXPECT_GT(mtime2, mtime);
+  } else {
+    EXPECT_EQ(mtime2, mtime);
+  }
+
+  if (ctime_effect == CtimeEffect::Changed) {
+    EXPECT_LE(before, ctime2);
+    EXPECT_GE(after, ctime2);
+    EXPECT_GT(ctime2, ctime);
+  } else {
+    EXPECT_EQ(ctime2, ctime);
+  }
+}
+
+// File creation time is reflected in atime, mtime, and ctime.
+TEST(StatTimesTest, FileCreation) {
+  const DisableSave ds;  // Timing-related test.
+
+  // Get a time for when the file is created.
+  //
+  // FIXME(b/132819225): See above.
+  const absl::Time before = absl::Now() - absl::Seconds(1);
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const absl::Time after = absl::Now() + absl::Seconds(1);
+
+  absl::Time atime, mtime, ctime;
+  std::tie(atime, mtime, ctime) = GetTime(file);
+
+  EXPECT_LE(before, atime);
+  EXPECT_LE(before, mtime);
+  EXPECT_LE(before, ctime);
+  EXPECT_GE(after, atime);
+  EXPECT_GE(after, mtime);
+  EXPECT_GE(after, ctime);
+}
+
+// Calling chmod on a file changes ctime.
+TEST(StatTimesTest, FileChmod) {
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  auto fn = [&] {
+    EXPECT_THAT(chmod(file.path().c_str(), 0666), SyscallSucceeds());
+  };
+  CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Unchanged,
+             CtimeEffect::Changed);
+}
+
+// Renaming a file changes ctime.
+TEST(StatTimesTest, FileRename) {
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  const std::string newpath = NewTempAbsPath();
+
+  auto fn = [&] {
+    ASSERT_THAT(rename(file.release().c_str(), newpath.c_str()),
+                SyscallSucceeds());
+    file.reset(newpath);
+  };
+  CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Unchanged,
+             CtimeEffect::Changed);
+}
+
+// Renaming a file changes ctime, even with an open FD.
+//
+// NOTE(b/132732387): This is a regression test for fs/gofer failing to update
+// cached ctime.
+TEST(StatTimesTest, FileRenameOpenFD) {
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // Holding an FD shouldn't affect behavior.
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+  const std::string newpath = NewTempAbsPath();
+
+  // FIXME(b/132814682): Restore fails with an uncached gofer and an open FD
+  // across rename.
+  //
+  // N.B. The logic here looks backwards because it isn't possible to
+  // conditionally disable save, only conditionally re-enable it.
+  DisableSave ds;
+  if (!getenv("GVISOR_GOFER_UNCACHED")) {
+    ds.reset();
+  }
+
+  auto fn = [&] {
+    ASSERT_THAT(rename(file.release().c_str(), newpath.c_str()),
+                SyscallSucceeds());
+    file.reset(newpath);
+  };
+  CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Unchanged,
+             CtimeEffect::Changed);
+}
+
+// Calling utimes on a file changes ctime and the time that we ask to change
+// (atime to now in this case).
+TEST(StatTimesTest, FileUtimes) {
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  auto fn = [&] {
+    const struct timespec ts[2] = {{0, UTIME_NOW}, {0, UTIME_OMIT}};
+    ASSERT_THAT(utimensat(AT_FDCWD, file.path().c_str(), ts, 0),
+                SyscallSucceeds());
+  };
+  CheckTimes(file, fn, AtimeEffect::Changed, MtimeEffect::Unchanged,
+             CtimeEffect::Changed);
+}
+
+// Truncating a file changes mtime and ctime.
+TEST(StatTimesTest, FileTruncate) {
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "yaaass", 0666));
+
+  auto fn = [&] {
+    EXPECT_THAT(truncate(file.path().c_str(), 0), SyscallSucceeds());
+  };
+  CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Changed,
+             CtimeEffect::Changed);
+}
+
+// Writing a file changes mtime and ctime.
+TEST(StatTimesTest, FileWrite) {
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "yaaass", 0666));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0));
+
+  auto fn = [&] {
+    const std::string contents = "all the single dollars";
+    EXPECT_THAT(WriteFd(fd.get(), contents.data(), contents.size()),
+                SyscallSucceeds());
+  };
+  CheckTimes(file, fn, AtimeEffect::Unchanged, MtimeEffect::Changed,
+             CtimeEffect::Changed);
+}
+
+// Reading a file changes atime.
+TEST(StatTimesTest, FileRead) {
+  const std::string contents = "bills bills bills";
+  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), contents, 0666));
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY, 0));
+
+  auto fn = [&] {
+    char buf[20];
+    ASSERT_THAT(ReadFd(fd.get(), buf, sizeof(buf)),
+                SyscallSucceedsWithValue(contents.size()));
+  };
+  CheckTimes(file, fn, AtimeEffect::Changed, MtimeEffect::Unchanged,
+             CtimeEffect::Unchanged);
+}
+
+// Listing files in a directory changes atime.
+TEST(StatTimesTest, DirList) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const TempPath file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+  auto fn = [&] {
+    const auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), false));
+    EXPECT_THAT(contents, Not(IsEmpty()));
+  };
+  CheckTimes(dir, fn, AtimeEffect::Changed, MtimeEffect::Unchanged,
+             CtimeEffect::Unchanged);
+}
+
+// Creating a file in a directory changes mtime and ctime.
+TEST(StatTimesTest, DirCreateFile) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  TempPath file;
+  auto fn = [&] {
+    file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+  };
+  CheckTimes(dir, fn, AtimeEffect::Unchanged, MtimeEffect::Changed,
+             CtimeEffect::Changed);
+}
+
+// Creating a directory in a directory changes mtime and ctime.
+TEST(StatTimesTest, DirCreateDir) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  TempPath dir2;
+  auto fn = [&] {
+    dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+  };
+  CheckTimes(dir, fn, AtimeEffect::Unchanged, MtimeEffect::Changed,
+             CtimeEffect::Changed);
+}
+
+// Removing a file from a directory changes mtime and ctime.
+TEST(StatTimesTest, DirRemoveFile) {
+  const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+  auto fn = [&] { file.reset(); };
+  CheckTimes(dir, fn, AtimeEffect::Unchanged, MtimeEffect::Changed,
+             CtimeEffect::Changed);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/statfs.cc b/test/syscalls/linux/statfs.cc
new file mode 100644
index 000000000..aca51d30f
--- /dev/null
+++ b/test/syscalls/linux/statfs.cc
@@ -0,0 +1,82 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/statfs.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(StatfsTest, CannotStatBadPath) {
+  auto temp_file = NewTempAbsPathInDir("/tmp");
+
+  struct statfs st;
+  EXPECT_THAT(statfs(temp_file.c_str(), &st), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(StatfsTest, InternalTmpfs) {
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  struct statfs st;
+  EXPECT_THAT(statfs(temp_file.path().c_str(), &st), SyscallSucceeds());
+}
+
+TEST(StatfsTest, InternalDevShm) {
+  struct statfs st;
+  EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds());
+}
+
+TEST(StatfsTest, NameLen) {
+  struct statfs st;
+  EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds());
+
+  // This assumes that /dev/shm is tmpfs.
+  EXPECT_EQ(st.f_namelen, NAME_MAX);
+}
+
+TEST(FstatfsTest, CannotStatBadFd) {
+  struct statfs st;
+  EXPECT_THAT(fstatfs(-1, &st), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(FstatfsTest, InternalTmpfs) {
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY));
+
+  struct statfs st;
+  EXPECT_THAT(fstatfs(fd.get(), &st), SyscallSucceeds());
+}
+
+TEST(FstatfsTest, InternalDevShm) {
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/shm", O_RDONLY));
+
+  struct statfs st;
+  EXPECT_THAT(fstatfs(fd.get(), &st), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc
new file mode 100644
index 000000000..4afed6d08
--- /dev/null
+++ b/test/syscalls/linux/sticky.cc
@@ -0,0 +1,161 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <grp.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+ABSL_FLAG(int32_t, scratch_uid, 65534, "first scratch UID");
+ABSL_FLAG(int32_t, scratch_gid, 65534, "first scratch GID");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(StickyTest, StickyBitPermDenied) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+  const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
+
+  // After changing credentials below, we need to use an open fd to make
+  // modifications in the parent dir, because there is no guarantee that we will
+  // still have the ability to open it.
+  const FileDescriptor parent_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY));
+  ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds());
+  ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds());
+  ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds());
+
+  // Drop privileges and change IDs only in child thread, or else this parent
+  // thread won't be able to open some log files after the test ends.
+  ScopedThread([&] {
+    // Drop privileges.
+    if (HaveCapability(CAP_FOWNER).ValueOrDie()) {
+      EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, false));
+    }
+
+    // Change EUID and EGID.
+    EXPECT_THAT(
+        syscall(SYS_setresgid, -1, absl::GetFlag(FLAGS_scratch_gid), -1),
+        SyscallSucceeds());
+    EXPECT_THAT(
+        syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1),
+        SyscallSucceeds());
+
+    EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"),
+                SyscallFailsWithErrno(EPERM));
+    EXPECT_THAT(unlinkat(parent_fd.get(), "file", 0),
+                SyscallFailsWithErrno(EPERM));
+    EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR),
+                SyscallFailsWithErrno(EPERM));
+    EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0),
+                SyscallFailsWithErrno(EPERM));
+  });
+}
+
+TEST(StickyTest, StickyBitSameUID) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+  const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
+
+  // After changing credentials below, we need to use an open fd to make
+  // modifications in the parent dir, because there is no guarantee that we will
+  // still have the ability to open it.
+  const FileDescriptor parent_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY));
+  ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds());
+  ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds());
+  ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds());
+
+  // Drop privileges and change IDs only in child thread, or else this parent
+  // thread won't be able to open some log files after the test ends.
+  ScopedThread([&] {
+    // Drop privileges.
+    if (HaveCapability(CAP_FOWNER).ValueOrDie()) {
+      EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, false));
+    }
+
+    // Change EGID.
+    EXPECT_THAT(
+        syscall(SYS_setresgid, -1, absl::GetFlag(FLAGS_scratch_gid), -1),
+        SyscallSucceeds());
+
+    // We still have the same EUID.
+    EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"),
+                SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR),
+                SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds());
+  });
+}
+
+TEST(StickyTest, StickyBitCapFOWNER) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+  const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
+
+  // After changing credentials below, we need to use an open fd to make
+  // modifications in the parent dir, because there is no guarantee that we will
+  // still have the ability to open it.
+  const FileDescriptor parent_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY));
+  ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds());
+  ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds());
+  ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds());
+
+  // Drop privileges and change IDs only in child thread, or else this parent
+  // thread won't be able to open some log files after the test ends.
+  ScopedThread([&] {
+    // Set PR_SET_KEEPCAPS.
+    EXPECT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds());
+
+    // Change EUID and EGID.
+    EXPECT_THAT(
+        syscall(SYS_setresgid, -1, absl::GetFlag(FLAGS_scratch_gid), -1),
+        SyscallSucceeds());
+    EXPECT_THAT(
+        syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1),
+        SyscallSucceeds());
+
+    EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true));
+    EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"),
+                SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR),
+                SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds());
+  });
+}
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc
new file mode 100644
index 000000000..a17ff62e9
--- /dev/null
+++ b/test/syscalls/linux/symlink.cc
@@ -0,0 +1,402 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+mode_t FilePermission(const std::string& path) {
+  struct stat buf = {0};
+  TEST_CHECK(lstat(path.c_str(), &buf) == 0);
+  return buf.st_mode & 0777;
+}
+
+// Test that name collisions are checked on the new link path, not the source
+// path. Regression test for b/31782115.
+TEST(SymlinkTest, CanCreateSymlinkWithCachedSourceDirent) {
+  const std::string srcname = NewTempAbsPath();
+  const std::string newname = NewTempAbsPath();
+  const std::string basedir = std::string(Dirname(srcname));
+  ASSERT_EQ(basedir, Dirname(newname));
+
+  ASSERT_THAT(chdir(basedir.c_str()), SyscallSucceeds());
+
+  // Open the source node to cause the underlying dirent to be cached. It will
+  // remain cached while we have the file open.
+  int fd;
+  ASSERT_THAT(fd = open(srcname.c_str(), O_CREAT | O_RDWR, 0666),
+              SyscallSucceeds());
+  FileDescriptor fd_closer(fd);
+
+  // Attempt to create a symlink. If the bug exists, this will fail since the
+  // dirent link creation code will check for a name collision on the source
+  // link name.
+  EXPECT_THAT(symlink(std::string(Basename(srcname)).c_str(),
+                      std::string(Basename(newname)).c_str()),
+              SyscallSucceeds());
+}
+
+TEST(SymlinkTest, CanCreateSymlinkFile) {
+  const std::string oldname = NewTempAbsPath();
+  const std::string newname = NewTempAbsPath();
+
+  int fd;
+  ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds());
+  EXPECT_EQ(FilePermission(newname), 0777);
+
+  auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(newname));
+  EXPECT_EQ(oldname, link);
+
+  EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+  EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, CanCreateSymlinkDir) {
+  const std::string olddir = NewTempAbsPath();
+  const std::string newdir = NewTempAbsPath();
+
+  EXPECT_THAT(mkdir(olddir.c_str(), 0777), SyscallSucceeds());
+  EXPECT_THAT(symlink(olddir.c_str(), newdir.c_str()), SyscallSucceeds());
+  EXPECT_EQ(FilePermission(newdir), 0777);
+
+  auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(newdir));
+  EXPECT_EQ(olddir, link);
+
+  EXPECT_THAT(unlink(newdir.c_str()), SyscallSucceeds());
+
+  ASSERT_THAT(rmdir(olddir.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, CannotCreateSymlinkInReadOnlyDir) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  const std::string olddir = NewTempAbsPath();
+  ASSERT_THAT(mkdir(olddir.c_str(), 0444), SyscallSucceeds());
+
+  const std::string newdir = NewTempAbsPathInDir(olddir);
+  EXPECT_THAT(symlink(olddir.c_str(), newdir.c_str()),
+              SyscallFailsWithErrno(EACCES));
+
+  ASSERT_THAT(rmdir(olddir.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, CannotSymlinkOverExistingFile) {
+  const auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const auto newfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  EXPECT_THAT(symlink(oldfile.path().c_str(), newfile.path().c_str()),
+              SyscallFailsWithErrno(EEXIST));
+}
+
+TEST(SymlinkTest, CannotSymlinkOverExistingDir) {
+  const auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const auto newdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  EXPECT_THAT(symlink(oldfile.path().c_str(), newdir.path().c_str()),
+              SyscallFailsWithErrno(EEXIST));
+}
+
+TEST(SymlinkTest, OldnameIsEmpty) {
+  const std::string newname = NewTempAbsPath();
+  EXPECT_THAT(symlink("", newname.c_str()), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(SymlinkTest, OldnameIsDangling) {
+  const std::string newname = NewTempAbsPath();
+  EXPECT_THAT(symlink("/dangling", newname.c_str()), SyscallSucceeds());
+
+  // This is required for S/R random save tests, which pre-run this test
+  // in the same TEST_TMPDIR, which means that we need to clean it for any
+  // operations exclusively creating files, like symlink above.
+  EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, NewnameCannotExist) {
+  const std::string newname =
+      JoinPath(GetAbsoluteTestTmpdir(), "thisdoesnotexist", "foo");
+  EXPECT_THAT(symlink("/thisdoesnotmatter", newname.c_str()),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(SymlinkTest, CanEvaluateLink) {
+  const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+  // We are going to assert that the symlink inode id is the same as the linked
+  // file's inode id. In order for the inode id to be stable across
+  // save/restore, it must be kept open. The FileDescriptor type will do that
+  // for us automatically.
+  auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+  struct stat file_st;
+  EXPECT_THAT(fstat(fd.get(), &file_st), SyscallSucceeds());
+
+  const std::string link = NewTempAbsPath();
+  EXPECT_THAT(symlink(file.path().c_str(), link.c_str()), SyscallSucceeds());
+  EXPECT_EQ(FilePermission(link), 0777);
+
+  auto linkfd = ASSERT_NO_ERRNO_AND_VALUE(Open(link.c_str(), O_RDWR));
+  struct stat link_st;
+  EXPECT_THAT(fstat(linkfd.get(), &link_st), SyscallSucceeds());
+
+  // Check that in fact newname points to the file we expect.
+  EXPECT_EQ(file_st.st_dev, link_st.st_dev);
+  EXPECT_EQ(file_st.st_ino, link_st.st_ino);
+}
+
+TEST(SymlinkTest, TargetIsNotMapped) {
+  const std::string oldname = NewTempAbsPath();
+  const std::string newname = NewTempAbsPath();
+
+  int fd;
+  // Create the target so that when we read the link, it exists.
+  ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  // Create a symlink called newname that points to oldname.
+  EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds());
+
+  std::vector<char> buf(1024);
+  int linksize;
+  // Read the link and assert that the oldname is still the same.
+  EXPECT_THAT(linksize = readlink(newname.c_str(), buf.data(), 1024),
+              SyscallSucceeds());
+  EXPECT_EQ(0, strncmp(oldname.c_str(), buf.data(), linksize));
+
+  EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+  EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, PreadFromSymlink) {
+  std::string name = NewTempAbsPath();
+  int fd;
+  ASSERT_THAT(fd = open(name.c_str(), O_CREAT, 0644), SyscallSucceeds());
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+
+  std::string linkname = NewTempAbsPath();
+  ASSERT_THAT(symlink(name.c_str(), linkname.c_str()), SyscallSucceeds());
+
+  ASSERT_THAT(fd = open(linkname.c_str(), O_RDONLY), SyscallSucceeds());
+
+  char buf[1024];
+  EXPECT_THAT(pread64(fd, buf, 1024, 0), SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+
+  EXPECT_THAT(unlink(name.c_str()), SyscallSucceeds());
+  EXPECT_THAT(unlink(linkname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, SymlinkAtDegradedPermissions_NoRandomSave) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+  int dirfd;
+  ASSERT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0),
+              SyscallSucceeds());
+
+  const DisableSave ds;  // Permissions are dropped.
+  EXPECT_THAT(fchmod(dirfd, 0), SyscallSucceeds());
+
+  std::string basename = std::string(Basename(file.path()));
+  EXPECT_THAT(symlinkat("/dangling", dirfd, basename.c_str()),
+              SyscallFailsWithErrno(EACCES));
+  EXPECT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, ReadlinkAtDegradedPermissions_NoRandomSave) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string oldpath = NewTempAbsPathInDir(dir.path());
+  const std::string oldbase = std::string(Basename(oldpath));
+  ASSERT_THAT(symlink("/dangling", oldpath.c_str()), SyscallSucceeds());
+
+  int dirfd;
+  EXPECT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0),
+              SyscallSucceeds());
+
+  const DisableSave ds;  // Permissions are dropped.
+  EXPECT_THAT(fchmod(dirfd, 0), SyscallSucceeds());
+
+  char buf[1024];
+  int linksize;
+  EXPECT_THAT(linksize = readlinkat(dirfd, oldbase.c_str(), buf, 1024),
+              SyscallFailsWithErrno(EACCES));
+  EXPECT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, ChmodSymlink) {
+  auto target = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string newpath = NewTempAbsPath();
+  ASSERT_THAT(symlink(target.path().c_str(), newpath.c_str()),
+              SyscallSucceeds());
+  EXPECT_EQ(FilePermission(newpath), 0777);
+  EXPECT_THAT(chmod(newpath.c_str(), 0666), SyscallSucceeds());
+  EXPECT_EQ(FilePermission(newpath), 0777);
+}
+
+// Test that following a symlink updates the atime on the symlink.
+TEST(SymlinkTest, FollowUpdatesATime) {
+  const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string link = NewTempAbsPath();
+  EXPECT_THAT(symlink(file.path().c_str(), link.c_str()), SyscallSucceeds());
+
+  // Lstat the symlink.
+  struct stat st_before_follow;
+  ASSERT_THAT(lstat(link.c_str(), &st_before_follow), SyscallSucceeds());
+
+  // Let the clock advance.
+  absl::SleepFor(absl::Seconds(1));
+
+  // Open the file via the symlink.
+  int fd;
+  ASSERT_THAT(fd = open(link.c_str(), O_RDWR, 0666), SyscallSucceeds());
+  FileDescriptor fd_closer(fd);
+
+  // Lstat the symlink again, and check that atime is updated.
+  struct stat st_after_follow;
+  ASSERT_THAT(lstat(link.c_str(), &st_after_follow), SyscallSucceeds());
+  EXPECT_LT(st_before_follow.st_atime, st_after_follow.st_atime);
+}
+
+class ParamSymlinkTest : public ::testing::TestWithParam<std::string> {};
+
+// Test that creating an existing symlink with creat will create the target.
+TEST_P(ParamSymlinkTest, CreatLinkCreatesTarget) {
+  const std::string target = GetParam();
+  const std::string linkpath = NewTempAbsPath();
+
+  ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  int fd;
+  EXPECT_THAT(fd = creat(linkpath.c_str(), 0666), SyscallSucceeds());
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+
+  ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+  struct stat st;
+  EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds());
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+  ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds());
+}
+
+// Test that opening an existing symlink with O_CREAT will create the target.
+TEST_P(ParamSymlinkTest, OpenLinkCreatesTarget) {
+  const std::string target = GetParam();
+  const std::string linkpath = NewTempAbsPath();
+
+  ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  int fd;
+  EXPECT_THAT(fd = open(linkpath.c_str(), O_CREAT, 0666), SyscallSucceeds());
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+
+  ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+  struct stat st;
+  EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds());
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+  ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds());
+}
+
+// Test that opening a self-symlink with O_CREAT will fail with ELOOP.
+TEST_P(ParamSymlinkTest, CreateExistingSelfLink) {
+  ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+
+  const std::string linkpath = GetParam();
+  ASSERT_THAT(symlink(linkpath.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  EXPECT_THAT(open(linkpath.c_str(), O_CREAT, 0666),
+              SyscallFailsWithErrno(ELOOP));
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+// Test that opening a file that is a symlink to its parent directory fails
+// with ELOOP.
+TEST_P(ParamSymlinkTest, CreateExistingParentLink) {
+  ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+
+  const std::string linkpath = GetParam();
+  const std::string target = JoinPath(linkpath, "child");
+  ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  EXPECT_THAT(open(linkpath.c_str(), O_CREAT, 0666),
+              SyscallFailsWithErrno(ELOOP));
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+// Test that opening an existing symlink with O_CREAT|O_EXCL will fail with
+// EEXIST.
+TEST_P(ParamSymlinkTest, OpenLinkExclFails) {
+  const std::string target = GetParam();
+  const std::string linkpath = NewTempAbsPath();
+
+  ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_EXCL, 0666),
+              SyscallFailsWithErrno(EEXIST));
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+// Test that opening an existing symlink with O_CREAT|O_NOFOLLOW will fail with
+// ELOOP.
+TEST_P(ParamSymlinkTest, OpenLinkNoFollowFails) {
+  const std::string target = GetParam();
+  const std::string linkpath = NewTempAbsPath();
+
+  ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+  EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_NOFOLLOW, 0666),
+              SyscallFailsWithErrno(ELOOP));
+
+  ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+INSTANTIATE_TEST_SUITE_P(AbsAndRelTarget, ParamSymlinkTest,
+                         ::testing::Values(NewTempAbsPath(), NewTempRelPath()));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sync.cc b/test/syscalls/linux/sync.cc
new file mode 100644
index 000000000..8aa2525a9
--- /dev/null
+++ b/test/syscalls/linux/sync.cc
@@ -0,0 +1,59 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SyncTest, SyncEverything) {
+  ASSERT_THAT(syscall(SYS_sync), SyscallSucceeds());
+}
+
+TEST(SyncTest, SyncFileSytem) {
+  int fd;
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  ASSERT_THAT(fd = open(f.path().c_str(), O_RDONLY), SyscallSucceeds());
+  EXPECT_THAT(syncfs(fd), SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(SyncTest, SyncFromPipe) {
+  int pipes[2];
+  EXPECT_THAT(pipe(pipes), SyscallSucceeds());
+  EXPECT_THAT(syncfs(pipes[0]), SyscallSucceeds());
+  EXPECT_THAT(syncfs(pipes[1]), SyscallSucceeds());
+  EXPECT_THAT(close(pipes[0]), SyscallSucceeds());
+  EXPECT_THAT(close(pipes[1]), SyscallSucceeds());
+}
+
+TEST(SyncTest, CannotSyncFileSytemAtBadFd) {
+  EXPECT_THAT(syncfs(-1), SyscallFailsWithErrno(EBADF));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sync_file_range.cc b/test/syscalls/linux/sync_file_range.cc
new file mode 100644
index 000000000..36cc42043
--- /dev/null
+++ b/test/syscalls/linux/sync_file_range.cc
@@ -0,0 +1,112 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SyncFileRangeTest, TempFileSucceeds) {
+  auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR));
+  constexpr char data[] = "some data to sync";
+  int fd = f.get();
+
+  EXPECT_THAT(write(fd, data, sizeof(data)),
+              SyscallSucceedsWithValue(sizeof(data)));
+  EXPECT_THAT(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE),
+              SyscallSucceeds());
+  EXPECT_THAT(sync_file_range(fd, 0, 0, 0), SyscallSucceeds());
+  EXPECT_THAT(
+      sync_file_range(fd, 0, 0,
+                      SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER |
+                          SYNC_FILE_RANGE_WAIT_BEFORE),
+      SyscallSucceeds());
+  EXPECT_THAT(sync_file_range(
+                  fd, 0, 1, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER),
+              SyscallSucceeds());
+  EXPECT_THAT(sync_file_range(
+                  fd, 1, 0, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER),
+              SyscallSucceeds());
+}
+
+TEST(SyncFileRangeTest, CannotSyncFileRangeOnUnopenedFd) {
+  auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR));
+  constexpr char data[] = "some data to sync";
+  int fd = f.get();
+
+  EXPECT_THAT(write(fd, data, sizeof(data)),
+              SyscallSucceedsWithValue(sizeof(data)));
+
+  pid_t pid = fork();
+  if (pid == 0) {
+    f.reset();
+
+    // fd is now invalid.
+    TEST_CHECK(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE) == -1);
+    TEST_PCHECK(errno == EBADF);
+    _exit(0);
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+
+  int status = 0;
+  ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+  EXPECT_TRUE(WIFEXITED(status));
+  EXPECT_EQ(WEXITSTATUS(status), 0);
+}
+
+TEST(SyncFileRangeTest, BadArgs) {
+  auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR));
+  int fd = f.get();
+
+  EXPECT_THAT(sync_file_range(fd, -1, 0, 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(sync_file_range(fd, 0, -1, 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(sync_file_range(fd, 8912, INT64_MAX - 4096, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SyncFileRangeTest, CannotSyncFileRangeWithWaitBefore) {
+  auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR));
+  constexpr char data[] = "some data to sync";
+  int fd = f.get();
+
+  EXPECT_THAT(write(fd, data, sizeof(data)),
+              SyscallSucceedsWithValue(sizeof(data)));
+  if (IsRunningOnGvisor()) {
+    EXPECT_THAT(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE),
+                SyscallFailsWithErrno(ENOSYS));
+    EXPECT_THAT(
+        sync_file_range(fd, 0, 0,
+                        SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE),
+        SyscallFailsWithErrno(ENOSYS));
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sysinfo.cc b/test/syscalls/linux/sysinfo.cc
new file mode 100644
index 000000000..1a71256da
--- /dev/null
+++ b/test/syscalls/linux/sysinfo.cc
@@ -0,0 +1,86 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This is a very simple sanity test to validate that the sysinfo syscall is
+// supported by gvisor and returns sane values.
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SysinfoTest, SysinfoIsCallable) {
+  struct sysinfo ignored = {};
+  EXPECT_THAT(syscall(SYS_sysinfo, &ignored), SyscallSucceedsWithValue(0));
+}
+
+TEST(SysinfoTest, EfaultProducedOnBadAddress) {
+  // Validate that we return EFAULT when a bad address is provided.
+  // specified by man 2 sysinfo
+  EXPECT_THAT(syscall(SYS_sysinfo, nullptr), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(SysinfoTest, TotalRamSaneValue) {
+  struct sysinfo s = {};
+  EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+  EXPECT_GT(s.totalram, 0);
+}
+
+TEST(SysinfoTest, MemunitSet) {
+  struct sysinfo s = {};
+  EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+  EXPECT_GE(s.mem_unit, 1);
+}
+
+TEST(SysinfoTest, UptimeSaneValue) {
+  struct sysinfo s = {};
+  EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+  EXPECT_GE(s.uptime, 0);
+}
+
+TEST(SysinfoTest, UptimeIncreasingValue) {
+  struct sysinfo s = {};
+  EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+  absl::SleepFor(absl::Seconds(2));
+  struct sysinfo s2 = {};
+  EXPECT_THAT(sysinfo(&s2), SyscallSucceedsWithValue(0));
+  EXPECT_LT(s.uptime, s2.uptime);
+}
+
+TEST(SysinfoTest, FreeRamSaneValue) {
+  struct sysinfo s = {};
+  EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+  EXPECT_GT(s.freeram, 0);
+  EXPECT_LT(s.freeram, s.totalram);
+}
+
+TEST(SysinfoTest, NumProcsSaneValue) {
+  struct sysinfo s = {};
+  EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+  EXPECT_GT(s.procs, 0);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/syslog.cc b/test/syscalls/linux/syslog.cc
new file mode 100644
index 000000000..9a7407d96
--- /dev/null
+++ b/test/syscalls/linux/syslog.cc
@@ -0,0 +1,51 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/klog.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr int SYSLOG_ACTION_READ_ALL = 3;
+constexpr int SYSLOG_ACTION_SIZE_BUFFER = 10;
+
+int Syslog(int type, char* buf, int len) {
+  return syscall(__NR_syslog, type, buf, len);
+}
+
+// Only SYSLOG_ACTION_SIZE_BUFFER and SYSLOG_ACTION_READ_ALL are implemented in
+// gVisor.
+
+TEST(Syslog, Size) {
+  EXPECT_THAT(Syslog(SYSLOG_ACTION_SIZE_BUFFER, nullptr, 0), SyscallSucceeds());
+}
+
+TEST(Syslog, ReadAll) {
+  // There might not be anything to read, so we can't check the write count.
+  char buf[100];
+  EXPECT_THAT(Syslog(SYSLOG_ACTION_READ_ALL, buf, sizeof(buf)),
+              SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/sysret.cc b/test/syscalls/linux/sysret.cc
new file mode 100644
index 000000000..19ffbd85b
--- /dev/null
+++ b/test/syscalls/linux/sysret.cc
@@ -0,0 +1,142 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Tests to verify that the behavior of linux and gvisor matches when
+// 'sysret' returns to bad (aka non-canonical) %rip or %rsp.
+
+#include <linux/elf.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000;
+constexpr uint64_t kNonCanonicalRsp = 0xFFFF000000000000;
+
+class SysretTest : public ::testing::Test {
+ protected:
+  struct user_regs_struct regs_;
+  struct iovec iov;
+  pid_t child_;
+
+  void SetUp() override {
+    pid_t pid = fork();
+
+    // Child.
+    if (pid == 0) {
+      TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+      MaybeSave();
+      TEST_PCHECK(raise(SIGSTOP) == 0);
+      MaybeSave();
+      _exit(0);
+    }
+
+    // Parent.
+    int status;
+    memset(&iov, 0, sizeof(iov));
+    ASSERT_THAT(pid, SyscallSucceeds());  // Might still be < 0.
+    ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+    EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+    iov.iov_base = &regs_;
+    iov.iov_len = sizeof(regs_);
+    ASSERT_THAT(ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov),
+                SyscallSucceeds());
+
+    child_ = pid;
+  }
+
+  void Detach() {
+    ASSERT_THAT(ptrace(PTRACE_DETACH, child_, 0, 0), SyscallSucceeds());
+  }
+
+  void SetRip(uint64_t newrip) {
+#if defined(__x86_64__)
+    regs_.rip = newrip;
+#elif defined(__aarch64__)
+    regs_.pc = newrip;
+#else
+#error "Unknown architecture"
+#endif
+    ASSERT_THAT(ptrace(PTRACE_SETREGSET, child_, NT_PRSTATUS, &iov),
+                SyscallSucceeds());
+  }
+
+  void SetRsp(uint64_t newrsp) {
+#if defined(__x86_64__)
+    regs_.rsp = newrsp;
+#elif defined(__aarch64__)
+    regs_.sp = newrsp;
+#else
+#error "Unknown architecture"
+#endif
+    ASSERT_THAT(ptrace(PTRACE_SETREGSET, child_, NT_PRSTATUS, &iov),
+                SyscallSucceeds());
+  }
+
+  // Wait waits for the child pid and returns the exit status.
+  int Wait() {
+    int status;
+    while (true) {
+      int rval = wait4(child_, &status, 0, NULL);
+      if (rval < 0) {
+        return rval;
+      }
+      if (rval == child_) {
+        return status;
+      }
+    }
+  }
+};
+
+TEST_F(SysretTest, JustDetach) {
+  Detach();
+  int status = Wait();
+  EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+      << "status = " << status;
+}
+
+TEST_F(SysretTest, BadRip) {
+  SetRip(kNonCanonicalRip);
+  Detach();
+  int status = Wait();
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV)
+      << "status = " << status;
+}
+
+TEST_F(SysretTest, BadRsp) {
+  SetRsp(kNonCanonicalRsp);
+  Detach();
+  int status = Wait();
+#if defined(__x86_64__)
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGBUS)
+      << "status = " << status;
+#elif defined(__aarch64__)
+  EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV)
+      << "status = " << status;
+#else
+#error "Unknown architecture"
+#endif
+}
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc
new file mode 100644
index 000000000..a4d2953e1
--- /dev/null
+++ b/test/syscalls/linux/tcp_socket.cc
@@ -0,0 +1,1568 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <limits>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<sockaddr_storage> InetLoopbackAddr(int family) {
+  struct sockaddr_storage addr;
+  memset(&addr, 0, sizeof(addr));
+  addr.ss_family = family;
+  switch (family) {
+    case AF_INET:
+      reinterpret_cast<struct sockaddr_in*>(&addr)->sin_addr.s_addr =
+          htonl(INADDR_LOOPBACK);
+      break;
+    case AF_INET6:
+      reinterpret_cast<struct sockaddr_in6*>(&addr)->sin6_addr =
+          in6addr_loopback;
+      break;
+    default:
+      return PosixError(EINVAL,
+                        absl::StrCat("unknown socket family: ", family));
+  }
+  return addr;
+}
+
+// Fixture for tests parameterized by the address family to use (AF_INET and
+// AF_INET6) when creating sockets.
+class TcpSocketTest : public ::testing::TestWithParam<int> {
+ protected:
+  // Creates three sockets that will be used by test cases -- a listener, one
+  // that connects, and the accepted one.
+  void SetUp() override;
+
+  // Closes the sockets created by SetUp().
+  void TearDown() override;
+
+  // Listening socket.
+  int listener_ = -1;
+
+  // Socket connected via connect().
+  int s_ = -1;
+
+  // Socket connected via accept().
+  int t_ = -1;
+
+  // Initial size of the send buffer.
+  int sendbuf_size_ = -1;
+};
+
+void TcpSocketTest::SetUp() {
+  ASSERT_THAT(listener_ = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+              SyscallSucceeds());
+
+  ASSERT_THAT(s_ = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+              SyscallSucceeds());
+
+  // Initialize address to the loopback one.
+  sockaddr_storage addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t addrlen = sizeof(addr);
+
+  // Bind to some port then start listening.
+  ASSERT_THAT(
+      bind(listener_, reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+      SyscallSucceeds());
+
+  ASSERT_THAT(listen(listener_, SOMAXCONN), SyscallSucceeds());
+
+  // Get the address we're listening on, then connect to it. We need to do this
+  // because we're allowing the stack to pick a port for us.
+  ASSERT_THAT(getsockname(listener_, reinterpret_cast<struct sockaddr*>(&addr),
+                          &addrlen),
+              SyscallSucceeds());
+
+  ASSERT_THAT(RetryEINTR(connect)(s_, reinterpret_cast<struct sockaddr*>(&addr),
+                                  addrlen),
+              SyscallSucceeds());
+
+  // Get the initial send buffer size.
+  socklen_t optlen = sizeof(sendbuf_size_);
+  ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &sendbuf_size_, &optlen),
+              SyscallSucceeds());
+
+  // Accept the connection.
+  ASSERT_THAT(t_ = RetryEINTR(accept)(listener_, nullptr, nullptr),
+              SyscallSucceeds());
+}
+
+void TcpSocketTest::TearDown() {
+  EXPECT_THAT(close(listener_), SyscallSucceeds());
+  if (s_ >= 0) {
+    EXPECT_THAT(close(s_), SyscallSucceeds());
+  }
+  if (t_ >= 0) {
+    EXPECT_THAT(close(t_), SyscallSucceeds());
+  }
+}
+
+TEST_P(TcpSocketTest, ConnectOnEstablishedConnection) {
+  sockaddr_storage addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t addrlen = sizeof(addr);
+
+  ASSERT_THAT(
+      connect(s_, reinterpret_cast<const struct sockaddr*>(&addr), addrlen),
+      SyscallFailsWithErrno(EISCONN));
+  ASSERT_THAT(
+      connect(t_, reinterpret_cast<const struct sockaddr*>(&addr), addrlen),
+      SyscallFailsWithErrno(EISCONN));
+}
+
+TEST_P(TcpSocketTest, ShutdownWriteInTimeWait) {
+  EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds());
+  EXPECT_THAT(shutdown(s_, SHUT_RDWR), SyscallSucceeds());
+  absl::SleepFor(absl::Seconds(1));  // Wait to enter TIME_WAIT.
+  EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(TcpSocketTest, ShutdownWriteInFinWait1) {
+  EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds());
+  EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds());
+  absl::SleepFor(absl::Seconds(1));  // Wait to enter FIN-WAIT2.
+  EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds());
+}
+
+TEST_P(TcpSocketTest, DataCoalesced) {
+  char buf[10];
+
+  // Write in two steps.
+  ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf) / 2),
+              SyscallSucceedsWithValue(sizeof(buf) / 2));
+  ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf) / 2),
+              SyscallSucceedsWithValue(sizeof(buf) / 2));
+
+  // Allow stack to process both packets.
+  absl::SleepFor(absl::Seconds(1));
+
+  // Read in one shot.
+  EXPECT_THAT(RetryEINTR(recv)(t_, buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST_P(TcpSocketTest, SenderAddressIgnored) {
+  char buf[3];
+  ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  memset(&addr, 0, sizeof(addr));
+
+  ASSERT_THAT(
+      RetryEINTR(recvfrom)(t_, buf, sizeof(buf), 0,
+                           reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+      SyscallSucceedsWithValue(3));
+
+  // Check that addr remains zeroed-out.
+  const char* ptr = reinterpret_cast<char*>(&addr);
+  for (size_t i = 0; i < sizeof(addr); i++) {
+    EXPECT_EQ(ptr[i], 0);
+  }
+}
+
+TEST_P(TcpSocketTest, SenderAddressIgnoredOnPeek) {
+  char buf[3];
+  ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  memset(&addr, 0, sizeof(addr));
+
+  ASSERT_THAT(
+      RetryEINTR(recvfrom)(t_, buf, sizeof(buf), MSG_PEEK,
+                           reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+      SyscallSucceedsWithValue(3));
+
+  // Check that addr remains zeroed-out.
+  const char* ptr = reinterpret_cast<char*>(&addr);
+  for (size_t i = 0; i < sizeof(addr); i++) {
+    EXPECT_EQ(ptr[i], 0);
+  }
+}
+
+TEST_P(TcpSocketTest, SendtoAddressIgnored) {
+  struct sockaddr_storage addr;
+  memset(&addr, 0, sizeof(addr));
+  addr.ss_family = GetParam();  // FIXME(b/63803955)
+
+  char data = '\0';
+  EXPECT_THAT(
+      RetryEINTR(sendto)(s_, &data, sizeof(data), 0,
+                         reinterpret_cast<sockaddr*>(&addr), sizeof(addr)),
+      SyscallSucceedsWithValue(1));
+}
+
+TEST_P(TcpSocketTest, WritevZeroIovec) {
+  // 2 bytes just to be safe and have vecs[1] not point to something random
+  // (even though length is 0).
+  char buf[2];
+  char recv_buf[1];
+
+  // Construct a vec where the final vector is of length 0.
+  iovec vecs[2] = {};
+  vecs[0].iov_base = buf;
+  vecs[0].iov_len = 1;
+  vecs[1].iov_base = buf + 1;
+  vecs[1].iov_len = 0;
+
+  EXPECT_THAT(RetryEINTR(writev)(s_, vecs, 2), SyscallSucceedsWithValue(1));
+
+  EXPECT_THAT(RetryEINTR(recv)(t_, recv_buf, 1, 0),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ(memcmp(recv_buf, buf, 1), 0);
+}
+
+TEST_P(TcpSocketTest, ZeroWriteAllowed) {
+  char buf[3];
+  // Send a zero length packet.
+  ASSERT_THAT(RetryEINTR(write)(s_, buf, 0), SyscallSucceedsWithValue(0));
+  // Verify that there is no packet available.
+  EXPECT_THAT(RetryEINTR(recv)(t_, buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+// Test that a non-blocking write with a buffer that is larger than the send
+// buffer size will not actually write the whole thing at once. Regression test
+// for b/64438887.
+TEST_P(TcpSocketTest, NonblockingLargeWrite) {
+  // Set the FD to O_NONBLOCK.
+  int opts;
+  ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds());
+  opts |= O_NONBLOCK;
+  ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds());
+
+  // Allocate a buffer three times the size of the send buffer. We do this with
+  // a vector to avoid allocating on the stack.
+  int size = 3 * sendbuf_size_;
+  std::vector<char> buf(size);
+
+  // Try to write the whole thing.
+  int n;
+  ASSERT_THAT(n = RetryEINTR(write)(s_, buf.data(), size), SyscallSucceeds());
+
+  // We should have written something, but not the whole thing.
+  EXPECT_GT(n, 0);
+  EXPECT_LT(n, size);
+}
+
+// Test that a blocking write with a buffer that is larger than the send buffer
+// will block until the entire buffer is sent.
+TEST_P(TcpSocketTest, BlockingLargeWrite_NoRandomSave) {
+  // Allocate a buffer three times the size of the send buffer on the heap. We
+  // do this as a vector to avoid allocating on the stack.
+  int size = 3 * sendbuf_size_;
+  std::vector<char> writebuf(size);
+
+  // Start reading the response in a loop.
+  int read_bytes = 0;
+  ScopedThread t([this, &read_bytes]() {
+    // Avoid interrupting the blocking write in main thread.
+    const DisableSave ds;
+
+    // Take ownership of the FD so that we close it on failure. This will
+    // unblock the blocking write below.
+    FileDescriptor fd(t_);
+    t_ = -1;
+
+    char readbuf[2500] = {};
+    int n = -1;
+    while (n != 0) {
+      ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)),
+                  SyscallSucceeds());
+      read_bytes += n;
+    }
+  });
+
+  // Try to write the whole thing.
+  int n;
+  ASSERT_THAT(n = WriteFd(s_, writebuf.data(), size), SyscallSucceeds());
+
+  // We should have written the whole thing.
+  EXPECT_EQ(n, size);
+  EXPECT_THAT(close(s_), SyscallSucceedsWithValue(0));
+  s_ = -1;
+  t.Join();
+
+  // We should have read the whole thing.
+  EXPECT_EQ(read_bytes, size);
+}
+
+// Test that a send with MSG_DONTWAIT flag and buffer that larger than the send
+// buffer size will not write the whole thing.
+TEST_P(TcpSocketTest, LargeSendDontWait) {
+  // Allocate a buffer three times the size of the send buffer. We do this on
+  // with a vector to avoid allocating on the stack.
+  int size = 3 * sendbuf_size_;
+  std::vector<char> buf(size);
+
+  // Try to write the whole thing with MSG_DONTWAIT flag, which can
+  // return a partial write.
+  int n;
+  ASSERT_THAT(n = RetryEINTR(send)(s_, buf.data(), size, MSG_DONTWAIT),
+              SyscallSucceeds());
+
+  // We should have written something, but not the whole thing.
+  EXPECT_GT(n, 0);
+  EXPECT_LT(n, size);
+}
+
+// Test that a send on a non-blocking socket with a buffer that larger than the
+// send buffer will not write the whole thing at once.
+TEST_P(TcpSocketTest, NonblockingLargeSend) {
+  // Set the FD to O_NONBLOCK.
+  int opts;
+  ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds());
+  opts |= O_NONBLOCK;
+  ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds());
+
+  // Allocate a buffer three times the size of the send buffer. We do this on
+  // with a vector to avoid allocating on the stack.
+  int size = 3 * sendbuf_size_;
+  std::vector<char> buf(size);
+
+  // Try to write the whole thing.
+  int n;
+  ASSERT_THAT(n = RetryEINTR(send)(s_, buf.data(), size, 0), SyscallSucceeds());
+
+  // We should have written something, but not the whole thing.
+  EXPECT_GT(n, 0);
+  EXPECT_LT(n, size);
+}
+
+// Same test as above, but calls send instead of write.
+TEST_P(TcpSocketTest, BlockingLargeSend_NoRandomSave) {
+  // Allocate a buffer three times the size of the send buffer. We do this on
+  // with a vector to avoid allocating on the stack.
+  int size = 3 * sendbuf_size_;
+  std::vector<char> writebuf(size);
+
+  // Start reading the response in a loop.
+  int read_bytes = 0;
+  ScopedThread t([this, &read_bytes]() {
+    // Avoid interrupting the blocking write in main thread.
+    const DisableSave ds;
+
+    // Take ownership of the FD so that we close it on failure. This will
+    // unblock the blocking write below.
+    FileDescriptor fd(t_);
+    t_ = -1;
+
+    char readbuf[2500] = {};
+    int n = -1;
+    while (n != 0) {
+      ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)),
+                  SyscallSucceeds());
+      read_bytes += n;
+    }
+  });
+
+  // Try to send the whole thing.
+  int n;
+  ASSERT_THAT(n = SendFd(s_, writebuf.data(), size, 0), SyscallSucceeds());
+
+  // We should have written the whole thing.
+  EXPECT_EQ(n, size);
+  EXPECT_THAT(close(s_), SyscallSucceedsWithValue(0));
+  s_ = -1;
+  t.Join();
+
+  // We should have read the whole thing.
+  EXPECT_EQ(read_bytes, size);
+}
+
+// Test that polling on a socket with a full send buffer will block.
+TEST_P(TcpSocketTest, PollWithFullBufferBlocks) {
+  // Set the FD to O_NONBLOCK.
+  int opts;
+  ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds());
+  opts |= O_NONBLOCK;
+  ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds());
+
+  // Set TCP_NODELAY, which will cause linux to fill the receive buffer from the
+  // send buffer as quickly as possibly. This way we can fill up both buffers
+  // faster.
+  constexpr int tcp_nodelay_flag = 1;
+  ASSERT_THAT(setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &tcp_nodelay_flag,
+                         sizeof(tcp_nodelay_flag)),
+              SyscallSucceeds());
+
+  // Set a 256KB send/receive buffer.
+  int buf_sz = 1 << 18;
+  EXPECT_THAT(setsockopt(t_, SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)),
+              SyscallSucceedsWithValue(0));
+  EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &buf_sz, sizeof(buf_sz)),
+              SyscallSucceedsWithValue(0));
+
+  // Create a large buffer that will be used for sending.
+  std::vector<char> buf(1 << 16);
+
+  // Write until we receive an error.
+  while (RetryEINTR(send)(s_, buf.data(), buf.size(), 0) != -1) {
+    // Sleep to give linux a chance to move data from the send buffer to the
+    // receive buffer.
+    usleep(10000);  // 10ms.
+  }
+  // The last error should have been EWOULDBLOCK.
+  ASSERT_EQ(errno, EWOULDBLOCK);
+
+  // Now polling on the FD with a timeout should return 0 corresponding to no
+  // FDs ready.
+  struct pollfd poll_fd = {s_, POLLOUT, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10), SyscallSucceedsWithValue(0));
+}
+
+TEST_P(TcpSocketTest, MsgTrunc) {
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data)] = {};
+  ASSERT_THAT(
+      RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, MSG_TRUNC),
+      SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+  // Check that we didn't get anything.
+  char zeros[sizeof(received_data)] = {};
+  EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
+}
+
+// MSG_CTRUNC is a return flag but linux allows it to be set on input flags
+// without returning an error.
+TEST_P(TcpSocketTest, MsgTruncWithCtrunc) {
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2,
+                               MSG_TRUNC | MSG_CTRUNC),
+              SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+  // Check that we didn't get anything.
+  char zeros[sizeof(received_data)] = {};
+  EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
+}
+
+// This test will verify that MSG_CTRUNC doesn't do anything when specified
+// on input.
+TEST_P(TcpSocketTest, MsgTruncWithCtruncOnly) {
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2,
+                               MSG_CTRUNC),
+              SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+  // Since MSG_CTRUNC here had no affect, it should not behave like MSG_TRUNC.
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+}
+
+TEST_P(TcpSocketTest, MsgTruncLargeSize) {
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data) * 2] = {};
+  ASSERT_THAT(
+      RetryEINTR(recv)(t_, received_data, sizeof(received_data), MSG_TRUNC),
+      SyscallSucceedsWithValue(sizeof(sent_data)));
+
+  // Check that we didn't get anything.
+  char zeros[sizeof(received_data)] = {};
+  EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
+}
+
+TEST_P(TcpSocketTest, MsgTruncPeek) {
+  char sent_data[512];
+  RandomizeBuffer(sent_data, sizeof(sent_data));
+  ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  char received_data[sizeof(sent_data)] = {};
+  ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2,
+                               MSG_TRUNC | MSG_PEEK),
+              SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+  // Check that we didn't get anything.
+  char zeros[sizeof(received_data)] = {};
+  EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
+
+  // Check that we can still get all of the data.
+  ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data), 0),
+              SyscallSucceedsWithValue(sizeof(sent_data)));
+  EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(TcpSocketTest, NoDelayDefault) {
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(TcpSocketTest, SetNoDelay) {
+  ASSERT_THAT(
+      setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &kSockOptOn, sizeof(kSockOptOn)),
+      SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOn);
+
+  ASSERT_THAT(setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &kSockOptOff,
+                         sizeof(kSockOptOff)),
+              SyscallSucceeds());
+
+  EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kSockOptOff);
+}
+
+#ifndef TCP_INQ
+#define TCP_INQ 36
+#endif
+
+TEST_P(TcpSocketTest, TcpInqSetSockOpt) {
+  char buf[1024];
+  ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // TCP_INQ is disabled by default.
+  int val = -1;
+  socklen_t slen = sizeof(val);
+  EXPECT_THAT(getsockopt(t_, SOL_TCP, TCP_INQ, &val, &slen),
+              SyscallSucceedsWithValue(0));
+  ASSERT_EQ(val, 0);
+
+  // Try to set TCP_INQ.
+  val = 1;
+  EXPECT_THAT(setsockopt(t_, SOL_TCP, TCP_INQ, &val, sizeof(val)),
+              SyscallSucceedsWithValue(0));
+  val = -1;
+  slen = sizeof(val);
+  EXPECT_THAT(getsockopt(t_, SOL_TCP, TCP_INQ, &val, &slen),
+              SyscallSucceedsWithValue(0));
+  ASSERT_EQ(val, 1);
+
+  // Try to unset TCP_INQ.
+  val = 0;
+  EXPECT_THAT(setsockopt(t_, SOL_TCP, TCP_INQ, &val, sizeof(val)),
+              SyscallSucceedsWithValue(0));
+  val = -1;
+  slen = sizeof(val);
+  EXPECT_THAT(getsockopt(t_, SOL_TCP, TCP_INQ, &val, &slen),
+              SyscallSucceedsWithValue(0));
+  ASSERT_EQ(val, 0);
+}
+
+TEST_P(TcpSocketTest, TcpInq) {
+  char buf[1024];
+  // Write more than one TCP segment.
+  int size = sizeof(buf);
+  int kChunk = sizeof(buf) / 4;
+  for (int i = 0; i < size; i += kChunk) {
+    ASSERT_THAT(RetryEINTR(write)(s_, buf, kChunk),
+                SyscallSucceedsWithValue(kChunk));
+  }
+
+  int val = 1;
+  kChunk = sizeof(buf) / 2;
+  EXPECT_THAT(setsockopt(t_, SOL_TCP, TCP_INQ, &val, sizeof(val)),
+              SyscallSucceedsWithValue(0));
+
+  // Wait when all data will be in the received queue.
+  while (true) {
+    ASSERT_THAT(ioctl(t_, TIOCINQ, &size), SyscallSucceeds());
+    if (size == sizeof(buf)) {
+      break;
+    }
+    absl::SleepFor(absl::Milliseconds(10));
+  }
+
+  struct msghdr msg = {};
+  std::vector<char> control(CMSG_SPACE(sizeof(int)));
+  size = sizeof(buf);
+  struct iovec iov;
+  for (int i = 0; size != 0; i += kChunk) {
+    msg.msg_control = &control[0];
+    msg.msg_controllen = control.size();
+
+    iov.iov_base = buf;
+    iov.iov_len = kChunk;
+    msg.msg_iov = &iov;
+    msg.msg_iovlen = 1;
+    ASSERT_THAT(RetryEINTR(recvmsg)(t_, &msg, 0),
+                SyscallSucceedsWithValue(kChunk));
+    size -= kChunk;
+
+    struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+    ASSERT_NE(cmsg, nullptr);
+    ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+    ASSERT_EQ(cmsg->cmsg_level, SOL_TCP);
+    ASSERT_EQ(cmsg->cmsg_type, TCP_INQ);
+
+    int inq = 0;
+    memcpy(&inq, CMSG_DATA(cmsg), sizeof(int));
+    ASSERT_EQ(inq, size);
+  }
+}
+
+TEST_P(TcpSocketTest, Tiocinq) {
+  char buf[1024];
+  size_t size = sizeof(buf);
+  ASSERT_THAT(RetryEINTR(write)(s_, buf, size), SyscallSucceedsWithValue(size));
+
+  uint32_t seed = time(nullptr);
+  const size_t max_chunk = size / 10;
+  while (size > 0) {
+    size_t chunk = (rand_r(&seed) % max_chunk) + 1;
+    ssize_t read = RetryEINTR(recvfrom)(t_, buf, chunk, 0, nullptr, nullptr);
+    ASSERT_THAT(read, SyscallSucceeds());
+    size -= read;
+
+    int inq = 0;
+    ASSERT_THAT(ioctl(t_, TIOCINQ, &inq), SyscallSucceeds());
+    ASSERT_EQ(inq, size);
+  }
+}
+
+TEST_P(TcpSocketTest, TcpSCMPriority) {
+  char buf[1024];
+  ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  int val = 1;
+  EXPECT_THAT(setsockopt(t_, SOL_TCP, TCP_INQ, &val, sizeof(val)),
+              SyscallSucceedsWithValue(0));
+  EXPECT_THAT(setsockopt(t_, SOL_SOCKET, SO_TIMESTAMP, &val, sizeof(val)),
+              SyscallSucceedsWithValue(0));
+
+  struct msghdr msg = {};
+  std::vector<char> control(
+      CMSG_SPACE(sizeof(struct timeval) + CMSG_SPACE(sizeof(int))));
+  struct iovec iov;
+  msg.msg_control = &control[0];
+  msg.msg_controllen = control.size();
+
+  iov.iov_base = buf;
+  iov.iov_len = sizeof(buf);
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  ASSERT_THAT(RetryEINTR(recvmsg)(t_, &msg, 0),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  // TODO(b/78348848): SO_TIMESTAMP isn't implemented for TCP sockets.
+  if (!IsRunningOnGvisor() || cmsg->cmsg_level == SOL_SOCKET) {
+    ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+    ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP);
+    ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval)));
+
+    cmsg = CMSG_NXTHDR(&msg, cmsg);
+    ASSERT_NE(cmsg, nullptr);
+  }
+  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+  ASSERT_EQ(cmsg->cmsg_level, SOL_TCP);
+  ASSERT_EQ(cmsg->cmsg_type, TCP_INQ);
+
+  int inq = 0;
+  memcpy(&inq, CMSG_DATA(cmsg), sizeof(int));
+  ASSERT_EQ(inq, 0);
+
+  cmsg = CMSG_NXTHDR(&msg, cmsg);
+  ASSERT_EQ(cmsg, nullptr);
+}
+
+INSTANTIATE_TEST_SUITE_P(AllInetTests, TcpSocketTest,
+                         ::testing::Values(AF_INET, AF_INET6));
+
+// Fixture for tests parameterized by address family that don't want the fixture
+// to do things.
+using SimpleTcpSocketTest = ::testing::TestWithParam<int>;
+
+TEST_P(SimpleTcpSocketTest, SendUnconnected) {
+  int fd;
+  ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+              SyscallSucceeds());
+  FileDescriptor sock_fd(fd);
+
+  char data = '\0';
+  EXPECT_THAT(RetryEINTR(send)(fd, &data, sizeof(data), 0),
+              SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(SimpleTcpSocketTest, SendtoWithoutAddressUnconnected) {
+  int fd;
+  ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+              SyscallSucceeds());
+  FileDescriptor sock_fd(fd);
+
+  char data = '\0';
+  EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, nullptr, 0),
+              SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(SimpleTcpSocketTest, SendtoWithAddressUnconnected) {
+  int fd;
+  ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+              SyscallSucceeds());
+  FileDescriptor sock_fd(fd);
+
+  sockaddr_storage addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  char data = '\0';
+  EXPECT_THAT(
+      RetryEINTR(sendto)(fd, &data, sizeof(data), 0,
+                         reinterpret_cast<sockaddr*>(&addr), sizeof(addr)),
+      SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(SimpleTcpSocketTest, GetPeerNameUnconnected) {
+  int fd;
+  ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+              SyscallSucceeds());
+  FileDescriptor sock_fd(fd);
+
+  sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(getpeername(fd, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+              SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(TcpSocketTest, FullBuffer) {
+  // Set both FDs to be blocking.
+  int flags = 0;
+  ASSERT_THAT(flags = fcntl(s_, F_GETFL), SyscallSucceeds());
+  EXPECT_THAT(fcntl(s_, F_SETFL, flags & ~O_NONBLOCK), SyscallSucceeds());
+  flags = 0;
+  ASSERT_THAT(flags = fcntl(t_, F_GETFL), SyscallSucceeds());
+  EXPECT_THAT(fcntl(t_, F_SETFL, flags & ~O_NONBLOCK), SyscallSucceeds());
+
+  // 2500 was chosen as a small value that can be set on Linux.
+  int set_snd = 2500;
+  EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &set_snd, sizeof(set_snd)),
+              SyscallSucceedsWithValue(0));
+  int get_snd = -1;
+  socklen_t get_snd_len = sizeof(get_snd);
+  EXPECT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &get_snd, &get_snd_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_snd_len, sizeof(get_snd));
+  EXPECT_GT(get_snd, 0);
+
+  // 2500 was chosen as a small value that can be set on Linux and gVisor.
+  int set_rcv = 2500;
+  EXPECT_THAT(setsockopt(t_, SOL_SOCKET, SO_RCVBUF, &set_rcv, sizeof(set_rcv)),
+              SyscallSucceedsWithValue(0));
+  int get_rcv = -1;
+  socklen_t get_rcv_len = sizeof(get_rcv);
+  EXPECT_THAT(getsockopt(t_, SOL_SOCKET, SO_RCVBUF, &get_rcv, &get_rcv_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_rcv_len, sizeof(get_rcv));
+  EXPECT_GE(get_rcv, 2500);
+
+  // Quick sanity test.
+  EXPECT_LT(get_snd + get_rcv, 2500 * IOV_MAX);
+
+  char data[2500] = {};
+  std::vector<struct iovec> iovecs;
+  for (int i = 0; i < IOV_MAX; i++) {
+    struct iovec iov = {};
+    iov.iov_base = data;
+    iov.iov_len = sizeof(data);
+    iovecs.push_back(iov);
+  }
+  ScopedThread t([this, &iovecs]() {
+    int result = -1;
+    EXPECT_THAT(result = RetryEINTR(writev)(s_, iovecs.data(), iovecs.size()),
+                SyscallSucceeds());
+    EXPECT_GT(result, 1);
+    EXPECT_LT(result, sizeof(data) * iovecs.size());
+  });
+
+  char recv = 0;
+  EXPECT_THAT(RetryEINTR(read)(t_, &recv, 1), SyscallSucceedsWithValue(1));
+  EXPECT_THAT(close(t_), SyscallSucceedsWithValue(0));
+  t_ = -1;
+}
+
+TEST_P(TcpSocketTest, PollAfterShutdown) {
+  ScopedThread client_thread([this]() {
+    EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallSucceedsWithValue(0));
+    struct pollfd poll_fd = {s_, POLLIN | POLLERR | POLLHUP, 0};
+    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
+                SyscallSucceedsWithValue(1));
+  });
+
+  EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceedsWithValue(0));
+  struct pollfd poll_fd = {t_, POLLIN | POLLERR | POLLHUP, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
+              SyscallSucceedsWithValue(1));
+}
+
+TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListener) {
+  // Initialize address to the loopback one.
+  sockaddr_storage addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t addrlen = sizeof(addr);
+
+  const FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Set the FD to O_NONBLOCK.
+  int opts;
+  ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+  opts |= O_NONBLOCK;
+  ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds());
+
+  ASSERT_THAT(RetryEINTR(connect)(
+                  s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+              SyscallFailsWithErrno(EINPROGRESS));
+
+  // Now polling on the FD with a timeout should return 0 corresponding to no
+  // FDs ready.
+  struct pollfd poll_fd = {s.get(), POLLOUT, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
+              SyscallSucceedsWithValue(1));
+
+  int err;
+  socklen_t optlen = sizeof(err);
+  ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
+              SyscallSucceeds());
+
+  EXPECT_EQ(err, ECONNREFUSED);
+}
+
+TEST_P(SimpleTcpSocketTest, NonBlockingConnect) {
+  const FileDescriptor listener =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Initialize address to the loopback one.
+  sockaddr_storage addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t addrlen = sizeof(addr);
+
+  // Bind to some port then start listening.
+  ASSERT_THAT(
+      bind(listener.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+      SyscallSucceeds());
+
+  ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
+
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Set the FD to O_NONBLOCK.
+  int opts;
+  ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+  opts |= O_NONBLOCK;
+  ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds());
+
+  ASSERT_THAT(getsockname(listener.get(),
+                          reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+              SyscallSucceeds());
+
+  ASSERT_THAT(RetryEINTR(connect)(
+                  s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+              SyscallFailsWithErrno(EINPROGRESS));
+
+  int t;
+  ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr),
+              SyscallSucceeds());
+
+  // Now polling on the FD with a timeout should return 0 corresponding to no
+  // FDs ready.
+  struct pollfd poll_fd = {s.get(), POLLOUT, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
+              SyscallSucceedsWithValue(1));
+
+  int err;
+  socklen_t optlen = sizeof(err);
+  ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
+              SyscallSucceeds());
+
+  EXPECT_EQ(err, 0);
+
+  EXPECT_THAT(close(t), SyscallSucceeds());
+}
+
+TEST_P(SimpleTcpSocketTest, NonBlockingConnectRemoteClose) {
+  const FileDescriptor listener =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Initialize address to the loopback one.
+  sockaddr_storage addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t addrlen = sizeof(addr);
+
+  // Bind to some port then start listening.
+  ASSERT_THAT(
+      bind(listener.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+      SyscallSucceeds());
+
+  ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
+
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+
+  ASSERT_THAT(getsockname(listener.get(),
+                          reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+              SyscallSucceeds());
+
+  ASSERT_THAT(RetryEINTR(connect)(
+                  s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+              SyscallFailsWithErrno(EINPROGRESS));
+
+  int t;
+  ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr),
+              SyscallSucceeds());
+
+  EXPECT_THAT(close(t), SyscallSucceeds());
+
+  // Now polling on the FD with a timeout should return 0 corresponding to no
+  // FDs ready.
+  struct pollfd poll_fd = {s.get(), POLLOUT, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
+              SyscallSucceedsWithValue(1));
+
+  ASSERT_THAT(RetryEINTR(connect)(
+                  s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+              SyscallSucceeds());
+
+  ASSERT_THAT(RetryEINTR(connect)(
+                  s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+              SyscallFailsWithErrno(EISCONN));
+}
+
+// Test that we get an ECONNREFUSED with a blocking socket when no one is
+// listening on the other end.
+TEST_P(SimpleTcpSocketTest, BlockingConnectRefused) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Initialize address to the loopback one.
+  sockaddr_storage addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t addrlen = sizeof(addr);
+
+  ASSERT_THAT(RetryEINTR(connect)(
+                  s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+              SyscallFailsWithErrno(ECONNREFUSED));
+
+  // Avoiding triggering save in destructor of s.
+  EXPECT_THAT(close(s.release()), SyscallSucceeds());
+}
+
+// Test that connecting to a non-listening port and thus receiving a RST is
+// handled appropriately by the socket - the port that the socket was bound to
+// is released and the expected error is returned.
+TEST_P(SimpleTcpSocketTest, CleanupOnConnectionRefused) {
+  // Create a socket that is known to not be listening. As is it bound but not
+  // listening, when another socket connects to the port, it will refuse..
+  FileDescriptor bound_s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  sockaddr_storage bound_addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t bound_addrlen = sizeof(bound_addr);
+
+  ASSERT_THAT(
+      bind(bound_s.get(), reinterpret_cast<struct sockaddr*>(&bound_addr),
+           bound_addrlen),
+      SyscallSucceeds());
+
+  // Get the addresses the socket is bound to because the port is chosen by the
+  // stack.
+  ASSERT_THAT(getsockname(bound_s.get(),
+                          reinterpret_cast<struct sockaddr*>(&bound_addr),
+                          &bound_addrlen),
+              SyscallSucceeds());
+
+  // Create, initialize, and bind the socket that is used to test connecting to
+  // the non-listening port.
+  FileDescriptor client_s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  // Initialize client address to the loopback one.
+  sockaddr_storage client_addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t client_addrlen = sizeof(client_addr);
+
+  ASSERT_THAT(
+      bind(client_s.get(), reinterpret_cast<struct sockaddr*>(&client_addr),
+           client_addrlen),
+      SyscallSucceeds());
+
+  ASSERT_THAT(getsockname(client_s.get(),
+                          reinterpret_cast<struct sockaddr*>(&client_addr),
+                          &client_addrlen),
+              SyscallSucceeds());
+
+  // Now the test: connect to the bound but not listening socket with the
+  // client socket. The bound socket should return a RST and cause the client
+  // socket to return an error and clean itself up immediately.
+  // The error being ECONNREFUSED diverges with RFC 793, page 37, but does what
+  // Linux does.
+  ASSERT_THAT(connect(client_s.get(),
+                      reinterpret_cast<const struct sockaddr*>(&bound_addr),
+                      bound_addrlen),
+              SyscallFailsWithErrno(ECONNREFUSED));
+
+  FileDescriptor new_s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Test binding to the address from the client socket. This should be okay
+  // if it was dropped correctly.
+  ASSERT_THAT(
+      bind(new_s.get(), reinterpret_cast<struct sockaddr*>(&client_addr),
+           client_addrlen),
+      SyscallSucceeds());
+
+  // Attempt #2, with the new socket and reused addr our connect should fail in
+  // the same way as before, not with an EADDRINUSE.
+  ASSERT_THAT(connect(client_s.get(),
+                      reinterpret_cast<const struct sockaddr*>(&bound_addr),
+                      bound_addrlen),
+              SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+// Test that we get an ECONNREFUSED with a nonblocking socket.
+TEST_P(SimpleTcpSocketTest, NonBlockingConnectRefused) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+
+  // Initialize address to the loopback one.
+  sockaddr_storage addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t addrlen = sizeof(addr);
+
+  ASSERT_THAT(RetryEINTR(connect)(
+                  s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+              SyscallFailsWithErrno(EINPROGRESS));
+
+  // We don't need to specify any events to get POLLHUP or POLLERR as these
+  // are added before the poll.
+  struct pollfd poll_fd = {s.get(), /*events=*/0, 0};
+  EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000), SyscallSucceedsWithValue(1));
+
+  // The ECONNREFUSED should cause us to be woken up with POLLHUP.
+  EXPECT_NE(poll_fd.revents & (POLLHUP | POLLERR), 0);
+
+  // Avoiding triggering save in destructor of s.
+  EXPECT_THAT(close(s.release()), SyscallSucceeds());
+}
+
+// Test that setting a supported congestion control algorithm succeeds for an
+// unconnected TCP socket
+TEST_P(SimpleTcpSocketTest, SetCongestionControlSucceedsForSupported) {
+  // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+  const int kTcpCaNameMax = 16;
+
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  {
+    const char kSetCC[kTcpCaNameMax] = "reno";
+    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
+                           strlen(kSetCC)),
+                SyscallSucceedsWithValue(0));
+
+    char got_cc[kTcpCaNameMax];
+    memset(got_cc, '1', sizeof(got_cc));
+    socklen_t optlen = sizeof(got_cc);
+    ASSERT_THAT(
+        getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+        SyscallSucceedsWithValue(0));
+    // We ignore optlen here as the linux kernel sets optlen to the lower of the
+    // size of the buffer passed in or kTcpCaNameMax and not the length of the
+    // congestion control algorithm's actual name.
+    EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax)));
+  }
+  {
+    const char kSetCC[kTcpCaNameMax] = "cubic";
+    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
+                           strlen(kSetCC)),
+                SyscallSucceedsWithValue(0));
+
+    char got_cc[kTcpCaNameMax];
+    memset(got_cc, '1', sizeof(got_cc));
+    socklen_t optlen = sizeof(got_cc);
+    ASSERT_THAT(
+        getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+        SyscallSucceedsWithValue(0));
+    // We ignore optlen here as the linux kernel sets optlen to the lower of the
+    // size of the buffer passed in or kTcpCaNameMax and not the length of the
+    // congestion control algorithm's actual name.
+    EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax)));
+  }
+}
+
+// This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is
+// consistent between linux and gvisor when the passed in buffer is smaller than
+// kTcpCaNameMax.
+TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionShortReadBuffer) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  {
+    // Verify that getsockopt/setsockopt work with buffers smaller than
+    // kTcpCaNameMax.
+    const char kSetCC[] = "cubic";
+    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
+                           strlen(kSetCC)),
+                SyscallSucceedsWithValue(0));
+
+    char got_cc[sizeof(kSetCC)];
+    socklen_t optlen = sizeof(got_cc);
+    ASSERT_THAT(
+        getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+        SyscallSucceedsWithValue(0));
+    EXPECT_EQ(sizeof(got_cc), optlen);
+    EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(got_cc)));
+  }
+}
+
+// This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is
+// consistent between linux and gvisor when the passed in buffer is larger than
+// kTcpCaNameMax.
+TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionLargeReadBuffer) {
+  // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+  const int kTcpCaNameMax = 16;
+
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  {
+    // Verify that getsockopt works with buffers larger than
+    // kTcpCaNameMax.
+    const char kSetCC[] = "cubic";
+    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
+                           strlen(kSetCC)),
+                SyscallSucceedsWithValue(0));
+
+    char got_cc[kTcpCaNameMax + 5];
+    socklen_t optlen = sizeof(got_cc);
+    ASSERT_THAT(
+        getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+        SyscallSucceedsWithValue(0));
+    // Linux copies the minimum of kTcpCaNameMax or the length of the passed in
+    // buffer and sets optlen to the number of bytes actually copied
+    // irrespective of the actual length of the congestion control name.
+    EXPECT_EQ(kTcpCaNameMax, optlen);
+    EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
+  }
+}
+
+// Test that setting an unsupported congestion control algorithm fails for an
+// unconnected TCP socket.
+TEST_P(SimpleTcpSocketTest, SetCongestionControlFailsForUnsupported) {
+  // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+  const int kTcpCaNameMax = 16;
+
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  char old_cc[kTcpCaNameMax];
+  socklen_t optlen = sizeof(old_cc);
+  ASSERT_THAT(
+      getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &old_cc, &optlen),
+      SyscallSucceedsWithValue(0));
+
+  const char kSetCC[] = "invalid_ca_kSetCC";
+  ASSERT_THAT(
+      setsockopt(s.get(), SOL_TCP, TCP_CONGESTION, &kSetCC, strlen(kSetCC)),
+      SyscallFailsWithErrno(ENOENT));
+
+  char got_cc[kTcpCaNameMax];
+  ASSERT_THAT(
+      getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+      SyscallSucceedsWithValue(0));
+  // We ignore optlen here as the linux kernel sets optlen to the lower of the
+  // size of the buffer passed in or kTcpCaNameMax and not the length of the
+  // congestion control algorithm's actual name.
+  EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(kTcpCaNameMax)));
+}
+
+TEST_P(SimpleTcpSocketTest, MaxSegDefault) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  constexpr int kDefaultMSS = 536;
+  int tcp_max_seg;
+  socklen_t optlen = sizeof(tcp_max_seg);
+  ASSERT_THAT(
+      getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, &optlen),
+      SyscallSucceedsWithValue(0));
+
+  EXPECT_EQ(kDefaultMSS, tcp_max_seg);
+  EXPECT_EQ(sizeof(tcp_max_seg), optlen);
+}
+
+TEST_P(SimpleTcpSocketTest, SetMaxSeg) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  constexpr int kDefaultMSS = 536;
+  constexpr int kTCPMaxSeg = 1024;
+  ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &kTCPMaxSeg,
+                         sizeof(kTCPMaxSeg)),
+              SyscallSucceedsWithValue(0));
+
+  // Linux actually never returns the user_mss value. It will always return the
+  // default MSS value defined above for an unconnected socket and always return
+  // the actual current MSS for a connected one.
+  int optval;
+  socklen_t optlen = sizeof(optval);
+  ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &optval, &optlen),
+              SyscallSucceedsWithValue(0));
+
+  EXPECT_EQ(kDefaultMSS, optval);
+  EXPECT_EQ(sizeof(optval), optlen);
+}
+
+TEST_P(SimpleTcpSocketTest, SetMaxSegFailsForInvalidMSSValues) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  {
+    constexpr int tcp_max_seg = 10;
+    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg,
+                           sizeof(tcp_max_seg)),
+                SyscallFailsWithErrno(EINVAL));
+  }
+  {
+    constexpr int tcp_max_seg = 75000;
+    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg,
+                           sizeof(tcp_max_seg)),
+                SyscallFailsWithErrno(EINVAL));
+  }
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPUserTimeout) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  {
+    constexpr int kTCPUserTimeout = -1;
+    EXPECT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                           &kTCPUserTimeout, sizeof(kTCPUserTimeout)),
+                SyscallFailsWithErrno(EINVAL));
+  }
+
+  // kTCPUserTimeout is in milliseconds.
+  constexpr int kTCPUserTimeout = 100;
+  ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT,
+                         &kTCPUserTimeout, sizeof(kTCPUserTimeout)),
+              SyscallSucceedsWithValue(0));
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kTCPUserTimeout);
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptNeg) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  // -ve TCP_DEFER_ACCEPT is same as setting it to zero.
+  constexpr int kNeg = -1;
+  EXPECT_THAT(
+      setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &kNeg, sizeof(kNeg)),
+      SyscallSucceeds());
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 0);
+}
+
+TEST_P(SimpleTcpSocketTest, GetTCPDeferAcceptDefault) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
+      SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, 0);
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptGreaterThanZero) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  // kTCPDeferAccept is in seconds.
+  // NOTE: linux translates seconds to # of retries and back from
+  //   #of retries to seconds. Which means only certain values
+  //   translate back exactly. That's why we use 3 here, a value of
+  //   5 will result in us getting back 7 instead of 5 in the
+  //   getsockopt.
+  constexpr int kTCPDeferAccept = 3;
+  ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT,
+                         &kTCPDeferAccept, sizeof(kTCPDeferAccept)),
+              SyscallSucceeds());
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
+      SyscallSucceeds());
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kTCPDeferAccept);
+}
+
+TEST_P(SimpleTcpSocketTest, RecvOnClosedSocket) {
+  auto s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  char buf[1];
+  EXPECT_THAT(recv(s.get(), buf, 0, 0), SyscallFailsWithErrno(ENOTCONN));
+  EXPECT_THAT(recv(s.get(), buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(SimpleTcpSocketTest, TCPConnectSoRcvBufRace) {
+  auto s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+  sockaddr_storage addr =
+      ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+  socklen_t addrlen = sizeof(addr);
+
+  RetryEINTR(connect)(s.get(), reinterpret_cast<struct sockaddr*>(&addr),
+                      addrlen);
+  int buf_sz = 1 << 18;
+  EXPECT_THAT(
+      setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)),
+      SyscallSucceedsWithValue(0));
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPSynCntLessThanOne) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  int default_syn_cnt = get;
+
+  {
+    // TCP_SYNCNT less than 1 should be rejected with an EINVAL.
+    constexpr int kZero = 0;
+    EXPECT_THAT(
+        setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kZero, sizeof(kZero)),
+        SyscallFailsWithErrno(EINVAL));
+
+    // TCP_SYNCNT less than 1 should be rejected with an EINVAL.
+    constexpr int kNeg = -1;
+    EXPECT_THAT(
+        setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kNeg, sizeof(kNeg)),
+        SyscallFailsWithErrno(EINVAL));
+
+    int get = -1;
+    socklen_t get_len = sizeof(get);
+
+    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+                SyscallSucceedsWithValue(0));
+    EXPECT_EQ(get_len, sizeof(get));
+    EXPECT_EQ(default_syn_cnt, get);
+  }
+}
+
+TEST_P(SimpleTcpSocketTest, GetTCPSynCntDefault) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  constexpr int kDefaultSynCnt = 6;
+
+  ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kDefaultSynCnt);
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPSynCntGreaterThanOne) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  constexpr int kTCPSynCnt = 20;
+  ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
+                         sizeof(kTCPSynCnt)),
+              SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+              SyscallSucceeds());
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kTCPSynCnt);
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPSynCntAboveMax) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  int default_syn_cnt = get;
+  {
+    constexpr int kTCPSynCnt = 256;
+    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
+                           sizeof(kTCPSynCnt)),
+                SyscallFailsWithErrno(EINVAL));
+
+    int get = -1;
+    socklen_t get_len = sizeof(get);
+    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+                SyscallSucceeds());
+    EXPECT_EQ(get_len, sizeof(get));
+    EXPECT_EQ(get, default_syn_cnt);
+  }
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPWindowClampBelowMinRcvBuf) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Discover minimum receive buf by setting a really low value
+  // for the receive buffer.
+  constexpr int kZero = 0;
+  EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
+              SyscallSucceeds());
+
+  // Now retrieve the minimum value for SO_RCVBUF as the set above should
+  // have caused SO_RCVBUF for the socket to be set to the minimum.
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  int min_so_rcvbuf = get;
+
+  {
+    // TCP_WINDOW_CLAMP less than min_so_rcvbuf/2 should be set to
+    // min_so_rcvbuf/2.
+    int below_half_min_rcvbuf = min_so_rcvbuf / 2 - 1;
+    EXPECT_THAT(
+        setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+                   &below_half_min_rcvbuf, sizeof(below_half_min_rcvbuf)),
+        SyscallSucceeds());
+
+    int get = -1;
+    socklen_t get_len = sizeof(get);
+
+    ASSERT_THAT(
+        getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
+        SyscallSucceedsWithValue(0));
+    EXPECT_EQ(get_len, sizeof(get));
+    EXPECT_EQ(min_so_rcvbuf / 2, get);
+  }
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPWindowClampZeroClosedSocket) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+  constexpr int kZero = 0;
+  ASSERT_THAT(
+      setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &kZero, sizeof(kZero)),
+      SyscallSucceeds());
+
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(
+      getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
+      SyscallSucceeds());
+  EXPECT_EQ(get_len, sizeof(get));
+  EXPECT_EQ(get, kZero);
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) {
+  FileDescriptor s =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+  // Discover minimum receive buf by setting a really low value
+  // for the receive buffer.
+  constexpr int kZero = 0;
+  EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
+              SyscallSucceeds());
+
+  // Now retrieve the minimum value for SO_RCVBUF as the set above should
+  // have caused SO_RCVBUF for the socket to be set to the minimum.
+  int get = -1;
+  socklen_t get_len = sizeof(get);
+  ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(get_len, sizeof(get));
+  int min_so_rcvbuf = get;
+
+  {
+    int above_half_min_rcv_buf = min_so_rcvbuf / 2 + 1;
+    EXPECT_THAT(
+        setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+                   &above_half_min_rcv_buf, sizeof(above_half_min_rcv_buf)),
+        SyscallSucceeds());
+
+    int get = -1;
+    socklen_t get_len = sizeof(get);
+
+    ASSERT_THAT(
+        getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
+        SyscallSucceedsWithValue(0));
+    EXPECT_EQ(get_len, sizeof(get));
+    EXPECT_EQ(above_half_min_rcv_buf, get);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest,
+                         ::testing::Values(AF_INET, AF_INET6));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/tgkill.cc b/test/syscalls/linux/tgkill.cc
new file mode 100644
index 000000000..80acae5de
--- /dev/null
+++ b/test/syscalls/linux/tgkill.cc
@@ -0,0 +1,48 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(TgkillTest, InvalidTID) {
+  EXPECT_THAT(tgkill(getpid(), -1, 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(tgkill(getpid(), 0, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TgkillTest, InvalidTGID) {
+  EXPECT_THAT(tgkill(-1, gettid(), 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(tgkill(0, gettid(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TgkillTest, ValidInput) {
+  EXPECT_THAT(tgkill(getpid(), gettid(), 0), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/time.cc b/test/syscalls/linux/time.cc
new file mode 100644
index 000000000..e75bba669
--- /dev/null
+++ b/test/syscalls/linux/time.cc
@@ -0,0 +1,107 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <time.h>
+
+#include "gtest/gtest.h"
+#include "test/util/proc_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr long kFudgeSeconds = 5;
+
+#if defined(__x86_64__) || defined(__i386__)
+// Mimics the time(2) wrapper from glibc prior to 2.15.
+time_t vsyscall_time(time_t* t) {
+  constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400;
+  return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t);
+}
+
+TEST(TimeTest, VsyscallTime_Succeeds) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+
+  time_t t1, t2;
+
+  {
+    const DisableSave ds;  // Timing assertions.
+    EXPECT_THAT(time(&t1), SyscallSucceeds());
+    EXPECT_THAT(vsyscall_time(&t2), SyscallSucceeds());
+  }
+
+  // Time should be monotonic.
+  EXPECT_LE(static_cast<long>(t1), static_cast<long>(t2));
+
+  // Check that it's within kFudge seconds.
+  EXPECT_LE(static_cast<long>(t2), static_cast<long>(t1) + kFudgeSeconds);
+
+  // Redo with save.
+  EXPECT_THAT(time(&t1), SyscallSucceeds());
+  EXPECT_THAT(vsyscall_time(&t2), SyscallSucceeds());
+
+  // Time should be monotonic.
+  EXPECT_LE(static_cast<long>(t1), static_cast<long>(t2));
+}
+
+TEST(TimeTest, VsyscallTime_InvalidAddressSIGSEGV) {
+  EXPECT_EXIT(vsyscall_time(reinterpret_cast<time_t*>(0x1)),
+              ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+// Mimics the gettimeofday(2) wrapper from the Go runtime <= 1.2.
+int vsyscall_gettimeofday(struct timeval* tv, struct timezone* tz) {
+  constexpr uint64_t kVsyscallGettimeofdayEntry = 0xffffffffff600000;
+  return reinterpret_cast<int (*)(struct timeval*, struct timezone*)>(
+      kVsyscallGettimeofdayEntry)(tv, tz);
+}
+
+TEST(TimeTest, VsyscallGettimeofday_Succeeds) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+
+  struct timeval tv1, tv2;
+  struct timezone tz1, tz2;
+
+  {
+    const DisableSave ds;  // Timing assertions.
+    EXPECT_THAT(gettimeofday(&tv1, &tz1), SyscallSucceeds());
+    EXPECT_THAT(vsyscall_gettimeofday(&tv2, &tz2), SyscallSucceeds());
+  }
+
+  // See above.
+  EXPECT_LE(static_cast<long>(tv1.tv_sec), static_cast<long>(tv2.tv_sec));
+  EXPECT_LE(static_cast<long>(tv2.tv_sec),
+            static_cast<long>(tv1.tv_sec) + kFudgeSeconds);
+
+  // Redo with save.
+  EXPECT_THAT(gettimeofday(&tv1, &tz1), SyscallSucceeds());
+  EXPECT_THAT(vsyscall_gettimeofday(&tv2, &tz2), SyscallSucceeds());
+}
+
+TEST(TimeTest, VsyscallGettimeofday_InvalidAddressSIGSEGV) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+
+  EXPECT_EXIT(vsyscall_gettimeofday(reinterpret_cast<struct timeval*>(0x1),
+                                    reinterpret_cast<struct timezone*>(0x1)),
+              ::testing::KilledBySignal(SIGSEGV), "");
+}
+#endif
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/timerfd.cc b/test/syscalls/linux/timerfd.cc
new file mode 100644
index 000000000..c4f8fdd7a
--- /dev/null
+++ b/test/syscalls/linux/timerfd.cc
@@ -0,0 +1,273 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <poll.h>
+#include <sys/timerfd.h>
+#include <time.h>
+
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Wrapper around timerfd_create(2) that returns a FileDescriptor.
+PosixErrorOr<FileDescriptor> TimerfdCreate(int clockid, int flags) {
+  int fd = timerfd_create(clockid, flags);
+  MaybeSave();
+  if (fd < 0) {
+    return PosixError(errno, "timerfd_create failed");
+  }
+  return FileDescriptor(fd);
+}
+
+// In tests that race a timerfd with a sleep, some slack is required because:
+//
+// - Timerfd expirations are asynchronous with respect to nanosleeps.
+//
+// - Because clock_gettime(CLOCK_MONOTONIC) is implemented through the VDSO,
+// it technically uses a closely-related, but distinct, time domain from the
+// CLOCK_MONOTONIC used to trigger timerfd expirations. The same applies to
+// CLOCK_BOOTTIME which is an alias for CLOCK_MONOTONIC.
+absl::Duration TimerSlack() { return absl::Milliseconds(500); }
+
+class TimerfdTest : public ::testing::TestWithParam<int> {};
+
+TEST_P(TimerfdTest, IsInitiallyStopped) {
+  auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0));
+  struct itimerspec its = {};
+  ASSERT_THAT(timerfd_gettime(tfd.get(), &its), SyscallSucceeds());
+  EXPECT_EQ(0, its.it_value.tv_sec);
+  EXPECT_EQ(0, its.it_value.tv_nsec);
+}
+
+TEST_P(TimerfdTest, SingleShot) {
+  constexpr absl::Duration kDelay = absl::Seconds(1);
+
+  auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0));
+  struct itimerspec its = {};
+  its.it_value = absl::ToTimespec(kDelay);
+  ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+              SyscallSucceeds());
+
+  // The timer should fire exactly once since the interval is zero.
+  absl::SleepFor(kDelay + TimerSlack());
+  uint64_t val = 0;
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallSucceedsWithValue(sizeof(uint64_t)));
+  EXPECT_EQ(1, val);
+}
+
+TEST_P(TimerfdTest, Periodic) {
+  constexpr absl::Duration kDelay = absl::Seconds(1);
+  constexpr int kPeriods = 3;
+
+  auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0));
+  struct itimerspec its = {};
+  its.it_value = absl::ToTimespec(kDelay);
+  its.it_interval = absl::ToTimespec(kDelay);
+  ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+              SyscallSucceeds());
+
+  // Expect to see at least kPeriods expirations. More may occur due to the
+  // timer slack, or due to delays from scheduling or save/restore.
+  absl::SleepFor(kPeriods * kDelay + TimerSlack());
+  uint64_t val = 0;
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallSucceedsWithValue(sizeof(uint64_t)));
+  EXPECT_GE(val, kPeriods);
+}
+
+TEST_P(TimerfdTest, BlockingRead) {
+  constexpr absl::Duration kDelay = absl::Seconds(3);
+
+  auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0));
+  struct itimerspec its = {};
+  its.it_value.tv_sec = absl::ToInt64Seconds(kDelay);
+  auto const start_time = absl::Now();
+  ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+              SyscallSucceeds());
+
+  // read should block until the timer fires.
+  uint64_t val = 0;
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallSucceedsWithValue(sizeof(uint64_t)));
+  auto const end_time = absl::Now();
+  EXPECT_EQ(1, val);
+  EXPECT_GE((end_time - start_time) + TimerSlack(), kDelay);
+}
+
+TEST_P(TimerfdTest, NonblockingRead_NoRandomSave) {
+  constexpr absl::Duration kDelay = absl::Seconds(5);
+
+  auto const tfd =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK));
+
+  // Since the timer is initially disabled and has never fired, read should
+  // return EAGAIN.
+  uint64_t val = 0;
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallFailsWithErrno(EAGAIN));
+
+  DisableSave ds;  // Timing-sensitive.
+
+  // Arm the timer.
+  struct itimerspec its = {};
+  its.it_value.tv_sec = absl::ToInt64Seconds(kDelay);
+  ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+              SyscallSucceeds());
+
+  // Since the timer has not yet fired, read should return EAGAIN.
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallFailsWithErrno(EAGAIN));
+
+  ds.reset();  // No longer timing-sensitive.
+
+  // After the timer fires, read should indicate 1 expiration.
+  absl::SleepFor(kDelay + TimerSlack());
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallSucceedsWithValue(sizeof(uint64_t)));
+  EXPECT_EQ(1, val);
+
+  // The successful read should have reset the number of expirations.
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(TimerfdTest, BlockingPoll_SetTimeResetsExpirations) {
+  constexpr absl::Duration kDelay = absl::Seconds(3);
+
+  auto const tfd =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK));
+  struct itimerspec its = {};
+  its.it_value.tv_sec = absl::ToInt64Seconds(kDelay);
+  auto const start_time = absl::Now();
+  ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+              SyscallSucceeds());
+
+  // poll should block until the timer fires.
+  struct pollfd pfd = {};
+  pfd.fd = tfd.get();
+  pfd.events = POLLIN;
+  ASSERT_THAT(poll(&pfd, /* nfds = */ 1,
+                   /* timeout = */ 2 * absl::ToInt64Seconds(kDelay) * 1000),
+              SyscallSucceedsWithValue(1));
+  auto const end_time = absl::Now();
+  EXPECT_EQ(POLLIN, pfd.revents);
+  EXPECT_GE((end_time - start_time) + TimerSlack(), kDelay);
+
+  // Call timerfd_settime again with a value of 0. This should reset the number
+  // of expirations to 0, causing read to return EAGAIN since the timerfd is
+  // non-blocking.
+  its.it_value.tv_sec = 0;
+  ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+              SyscallSucceeds());
+  uint64_t val = 0;
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(TimerfdTest, SetAbsoluteTime) {
+  constexpr absl::Duration kDelay = absl::Seconds(3);
+
+  // Use a non-blocking timerfd so that if TFD_TIMER_ABSTIME is incorrectly
+  // non-functional, we get EAGAIN rather than a test timeout.
+  auto const tfd =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK));
+  struct itimerspec its = {};
+  ASSERT_THAT(clock_gettime(GetParam(), &its.it_value), SyscallSucceeds());
+  its.it_value.tv_sec += absl::ToInt64Seconds(kDelay);
+  ASSERT_THAT(timerfd_settime(tfd.get(), TFD_TIMER_ABSTIME, &its, nullptr),
+              SyscallSucceeds());
+
+  absl::SleepFor(kDelay + TimerSlack());
+  uint64_t val = 0;
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallSucceedsWithValue(sizeof(uint64_t)));
+  EXPECT_EQ(1, val);
+}
+
+TEST_P(TimerfdTest, IllegalSeek) {
+  auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0));
+  if (!IsRunningWithVFS1()) {
+    EXPECT_THAT(lseek(tfd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+  }
+}
+
+TEST_P(TimerfdTest, IllegalPread) {
+  auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0));
+  int val;
+  EXPECT_THAT(pread(tfd.get(), &val, sizeof(val), 0),
+              SyscallFailsWithErrno(ESPIPE));
+}
+
+TEST_P(TimerfdTest, IllegalPwrite) {
+  auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0));
+  EXPECT_THAT(pwrite(tfd.get(), "x", 1, 0), SyscallFailsWithErrno(ESPIPE));
+  if (!IsRunningWithVFS1()) {
+  }
+}
+
+TEST_P(TimerfdTest, IllegalWrite) {
+  auto const tfd =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK));
+  uint64_t val = 0;
+  EXPECT_THAT(write(tfd.get(), &val, sizeof(val)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+std::string PrintClockId(::testing::TestParamInfo<int> info) {
+  switch (info.param) {
+    case CLOCK_MONOTONIC:
+      return "CLOCK_MONOTONIC";
+    case CLOCK_BOOTTIME:
+      return "CLOCK_BOOTTIME";
+    default:
+      return absl::StrCat(info.param);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(AllTimerTypes, TimerfdTest,
+                         ::testing::Values(CLOCK_MONOTONIC, CLOCK_BOOTTIME),
+                         PrintClockId);
+
+TEST(TimerfdClockRealtimeTest, ClockRealtime) {
+  // Since CLOCK_REALTIME can, by definition, change, we can't make any
+  // non-flaky assertions about the amount of time it takes for a
+  // CLOCK_REALTIME-based timer to expire. Just check that it expires at all,
+  // and hope it happens before the test times out.
+  constexpr int kDelaySecs = 1;
+
+  auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_REALTIME, 0));
+  struct itimerspec its = {};
+  its.it_value.tv_sec = kDelaySecs;
+  ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+              SyscallSucceeds());
+
+  uint64_t val = 0;
+  ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+              SyscallSucceedsWithValue(sizeof(uint64_t)));
+  EXPECT_EQ(1, val);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc
new file mode 100644
index 000000000..4b3c44527
--- /dev/null
+++ b/test/syscalls/linux/timers.cc
@@ -0,0 +1,662 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <syscall.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <atomic>
+
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/cleanup.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+ABSL_FLAG(bool, timers_test_sleep, false,
+          "If true, sleep forever instead of running tests.");
+
+using ::testing::_;
+using ::testing::AnyOf;
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+#ifndef CPUCLOCK_PROF
+#define CPUCLOCK_PROF 0
+#endif  // CPUCLOCK_PROF
+
+PosixErrorOr<absl::Duration> ProcessCPUTime(pid_t pid) {
+  // Use pid-specific CPUCLOCK_PROF, which is the clock used to enforce
+  // RLIMIT_CPU.
+  clockid_t clockid = (~static_cast<clockid_t>(pid) << 3) | CPUCLOCK_PROF;
+
+  struct timespec ts;
+  int ret = clock_gettime(clockid, &ts);
+  if (ret < 0) {
+    return PosixError(errno, "clock_gettime failed");
+  }
+
+  return absl::DurationFromTimespec(ts);
+}
+
+void NoopSignalHandler(int signo) {
+  TEST_CHECK_MSG(SIGXCPU == signo,
+                 "NoopSigHandler did not receive expected signal");
+}
+
+void UninstallingSignalHandler(int signo) {
+  TEST_CHECK_MSG(SIGXCPU == signo,
+                 "UninstallingSignalHandler did not receive expected signal");
+  struct sigaction rev_action;
+  rev_action.sa_handler = SIG_DFL;
+  rev_action.sa_flags = 0;
+  sigemptyset(&rev_action.sa_mask);
+  sigaction(SIGXCPU, &rev_action, nullptr);
+}
+
+TEST(TimerTest, ProcessKilledOnCPUSoftLimit) {
+  constexpr absl::Duration kSoftLimit = absl::Seconds(1);
+  constexpr absl::Duration kHardLimit = absl::Seconds(3);
+
+  struct rlimit cpu_limits;
+  cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit);
+  cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit);
+
+  int pid = fork();
+  MaybeSave();
+  if (pid == 0) {
+    TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0);
+    MaybeSave();
+    for (;;) {
+    }
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  auto c = Cleanup([pid] {
+    int status;
+    EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+    EXPECT_TRUE(WIFSIGNALED(status));
+    EXPECT_EQ(WTERMSIG(status), SIGXCPU);
+  });
+
+  // Wait for the child to exit, but do not reap it. This will allow us to check
+  // its CPU usage while it is zombied.
+  EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT),
+              SyscallSucceeds());
+
+  // Assert that the child spent 1s of CPU before getting killed.
+  //
+  // We must be careful to use CPUCLOCK_PROF, the same clock used for RLIMIT_CPU
+  // enforcement, to get correct results. Note that this is slightly different
+  // from rusage-reported CPU usage:
+  //
+  // RLIMIT_CPU, CPUCLOCK_PROF use kernel/sched/cputime.c:thread_group_cputime.
+  // rusage uses kernel/sched/cputime.c:thread_group_cputime_adjusted.
+  absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid));
+  EXPECT_GE(cpu, kSoftLimit);
+
+  // Child did not make it to the hard limit.
+  //
+  // Linux sends SIGXCPU synchronously with CPU tick updates. See
+  // kernel/time/timer.c:update_process_times:
+  //   => account_process_tick  // update task CPU usage.
+  //   => run_posix_cpu_timers  // enforce RLIMIT_CPU, sending signal.
+  //
+  // Thus, only chance for this to flake is if the system time required to
+  // deliver the signal exceeds 2s.
+  EXPECT_LT(cpu, kHardLimit);
+}
+
+TEST(TimerTest, ProcessPingedRepeatedlyAfterCPUSoftLimit) {
+  struct sigaction new_action;
+  new_action.sa_handler = UninstallingSignalHandler;
+  new_action.sa_flags = 0;
+  sigemptyset(&new_action.sa_mask);
+
+  constexpr absl::Duration kSoftLimit = absl::Seconds(1);
+  constexpr absl::Duration kHardLimit = absl::Seconds(10);
+
+  struct rlimit cpu_limits;
+  cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit);
+  cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit);
+
+  int pid = fork();
+  MaybeSave();
+  if (pid == 0) {
+    TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0);
+    MaybeSave();
+    TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0);
+    MaybeSave();
+    for (;;) {
+    }
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  auto c = Cleanup([pid] {
+    int status;
+    EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+    EXPECT_TRUE(WIFSIGNALED(status));
+    EXPECT_EQ(WTERMSIG(status), SIGXCPU);
+  });
+
+  // Wait for the child to exit, but do not reap it. This will allow us to check
+  // its CPU usage while it is zombied.
+  EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT),
+              SyscallSucceeds());
+
+  absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid));
+  // Following signals come every CPU second.
+  EXPECT_GE(cpu, kSoftLimit + absl::Seconds(1));
+
+  // Child did not make it to the hard limit.
+  //
+  // As above, should not flake.
+  EXPECT_LT(cpu, kHardLimit);
+}
+
+TEST(TimerTest, ProcessKilledOnCPUHardLimit) {
+  struct sigaction new_action;
+  new_action.sa_handler = NoopSignalHandler;
+  new_action.sa_flags = 0;
+  sigemptyset(&new_action.sa_mask);
+
+  constexpr absl::Duration kSoftLimit = absl::Seconds(1);
+  constexpr absl::Duration kHardLimit = absl::Seconds(3);
+
+  struct rlimit cpu_limits;
+  cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit);
+  cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit);
+
+  int pid = fork();
+  MaybeSave();
+  if (pid == 0) {
+    TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0);
+    MaybeSave();
+    TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0);
+    MaybeSave();
+    for (;;) {
+    }
+  }
+  ASSERT_THAT(pid, SyscallSucceeds());
+  auto c = Cleanup([pid] {
+    int status;
+    EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+    EXPECT_TRUE(WIFSIGNALED(status));
+    EXPECT_EQ(WTERMSIG(status), SIGKILL);
+  });
+
+  // Wait for the child to exit, but do not reap it. This will allow us to check
+  // its CPU usage while it is zombied.
+  EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT),
+              SyscallSucceeds());
+
+  absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid));
+  EXPECT_GE(cpu, kHardLimit);
+}
+
+// RAII type for a kernel "POSIX" interval timer. (The kernel provides system
+// calls such as timer_create that behave very similarly, but not identically,
+// to those described by timer_create(2); in particular, the kernel does not
+// implement SIGEV_THREAD. glibc builds POSIX-compliant interval timers based on
+// these kernel interval timers.)
+//
+// Compare implementation to FileDescriptor.
+class IntervalTimer {
+ public:
+  IntervalTimer() = default;
+
+  explicit IntervalTimer(int id) { set_id(id); }
+
+  IntervalTimer(IntervalTimer&& orig) : id_(orig.release()) {}
+
+  IntervalTimer& operator=(IntervalTimer&& orig) {
+    if (this == &orig) return *this;
+    reset(orig.release());
+    return *this;
+  }
+
+  IntervalTimer(const IntervalTimer& other) = delete;
+  IntervalTimer& operator=(const IntervalTimer& other) = delete;
+
+  ~IntervalTimer() { reset(); }
+
+  int get() const { return id_; }
+
+  int release() {
+    int const id = id_;
+    id_ = -1;
+    return id;
+  }
+
+  void reset() { reset(-1); }
+
+  void reset(int id) {
+    if (id_ >= 0) {
+      TEST_PCHECK(syscall(SYS_timer_delete, id_) == 0);
+      MaybeSave();
+    }
+    set_id(id);
+  }
+
+  PosixErrorOr<struct itimerspec> Set(
+      int flags, const struct itimerspec& new_value) const {
+    struct itimerspec old_value = {};
+    if (syscall(SYS_timer_settime, id_, flags, &new_value, &old_value) < 0) {
+      return PosixError(errno, "timer_settime");
+    }
+    MaybeSave();
+    return old_value;
+  }
+
+  PosixErrorOr<struct itimerspec> Get() const {
+    struct itimerspec curr_value = {};
+    if (syscall(SYS_timer_gettime, id_, &curr_value) < 0) {
+      return PosixError(errno, "timer_gettime");
+    }
+    MaybeSave();
+    return curr_value;
+  }
+
+  PosixErrorOr<int> Overruns() const {
+    int rv = syscall(SYS_timer_getoverrun, id_);
+    if (rv < 0) {
+      return PosixError(errno, "timer_getoverrun");
+    }
+    MaybeSave();
+    return rv;
+  }
+
+ private:
+  void set_id(int id) { id_ = std::max(id, -1); }
+
+  // Kernel timer_t is int; glibc timer_t is void*.
+  int id_ = -1;
+};
+
+PosixErrorOr<IntervalTimer> TimerCreate(clockid_t clockid,
+                                        const struct sigevent& sev) {
+  int timerid;
+  int ret = syscall(SYS_timer_create, clockid, &sev, &timerid);
+  if (ret < 0) {
+    return PosixError(errno, "timer_create");
+  }
+  if (ret > 0) {
+    return PosixError(EINVAL, "timer_create should never return positive");
+  }
+  MaybeSave();
+  return IntervalTimer(timerid);
+}
+
+// See timerfd.cc:TimerSlack() for rationale.
+constexpr absl::Duration kTimerSlack = absl::Milliseconds(500);
+
+TEST(IntervalTimerTest, IsInitiallyStopped) {
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_NONE;
+  const auto timer =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+  const struct itimerspec its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get());
+  EXPECT_EQ(0, its.it_value.tv_sec);
+  EXPECT_EQ(0, its.it_value.tv_nsec);
+}
+
+// Kernel can create multiple timers without issue.
+//
+// Regression test for gvisor.dev/issue/1738.
+TEST(IntervalTimerTest, MultipleTimers) {
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_NONE;
+  const auto timer1 =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+  const auto timer2 =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+}
+
+TEST(IntervalTimerTest, SingleShotSilent) {
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_NONE;
+  const auto timer =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+  constexpr absl::Duration kDelay = absl::Seconds(1);
+  struct itimerspec its = {};
+  its.it_value = absl::ToTimespec(kDelay);
+  ASSERT_NO_ERRNO(timer.Set(0, its));
+
+  // The timer should count down to 0 and stop since the interval is zero. No
+  // overruns should be counted.
+  absl::SleepFor(kDelay + kTimerSlack);
+  its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get());
+  EXPECT_EQ(0, its.it_value.tv_sec);
+  EXPECT_EQ(0, its.it_value.tv_nsec);
+  EXPECT_THAT(timer.Overruns(), IsPosixErrorOkAndHolds(0));
+}
+
+TEST(IntervalTimerTest, PeriodicSilent) {
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_NONE;
+  const auto timer =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+  constexpr absl::Duration kPeriod = absl::Seconds(1);
+  struct itimerspec its = {};
+  its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+  ASSERT_NO_ERRNO(timer.Set(0, its));
+
+  absl::SleepFor(kPeriod * 3 + kTimerSlack);
+
+  // The timer should still be running.
+  its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get());
+  EXPECT_TRUE(its.it_value.tv_nsec != 0 || its.it_value.tv_sec != 0);
+
+  // Timer expirations are not counted as overruns under SIGEV_NONE.
+  EXPECT_THAT(timer.Overruns(), IsPosixErrorOkAndHolds(0));
+}
+
+std::atomic<int> counted_signals;
+
+void IntervalTimerCountingSignalHandler(int sig, siginfo_t* info,
+                                        void* ucontext) {
+  counted_signals.fetch_add(1 + info->si_overrun);
+}
+
+TEST(IntervalTimerTest, PeriodicGroupDirectedSignal) {
+  constexpr int kSigno = SIGUSR1;
+  constexpr int kSigvalue = 42;
+
+  // Install our signal handler.
+  counted_signals.store(0);
+  struct sigaction sa = {};
+  sa.sa_sigaction = IntervalTimerCountingSignalHandler;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  const auto scoped_sigaction =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa));
+
+  // Ensure that kSigno is unblocked on at least one thread.
+  const auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, kSigno));
+
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_SIGNAL;
+  sev.sigev_signo = kSigno;
+  sev.sigev_value.sival_int = kSigvalue;
+  auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+  constexpr absl::Duration kPeriod = absl::Seconds(1);
+  constexpr int kCycles = 3;
+  struct itimerspec its = {};
+  its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+  ASSERT_NO_ERRNO(timer.Set(0, its));
+
+  absl::SleepFor(kPeriod * kCycles + kTimerSlack);
+  EXPECT_GE(counted_signals.load(), kCycles);
+}
+
+// From Linux's include/uapi/asm-generic/siginfo.h.
+#ifndef sigev_notify_thread_id
+#define sigev_notify_thread_id _sigev_un._tid
+#endif
+
+TEST(IntervalTimerTest, PeriodicThreadDirectedSignal) {
+  constexpr int kSigno = SIGUSR1;
+  constexpr int kSigvalue = 42;
+
+  // Block kSigno so that we can accumulate overruns.
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, kSigno);
+  const auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask));
+
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_THREAD_ID;
+  sev.sigev_signo = kSigno;
+  sev.sigev_value.sival_int = kSigvalue;
+  sev.sigev_notify_thread_id = gettid();
+  auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+  constexpr absl::Duration kPeriod = absl::Seconds(1);
+  constexpr int kCycles = 3;
+  struct itimerspec its = {};
+  its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+  ASSERT_NO_ERRNO(timer.Set(0, its));
+  absl::SleepFor(kPeriod * kCycles + kTimerSlack);
+
+  // At least kCycles expirations should have occurred, resulting in kCycles-1
+  // overruns (the first expiration sent the signal successfully).
+  siginfo_t si;
+  struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration());
+  ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+              SyscallSucceedsWithValue(kSigno));
+  EXPECT_EQ(si.si_signo, kSigno);
+  EXPECT_EQ(si.si_code, SI_TIMER);
+  EXPECT_EQ(si.si_timerid, timer.get());
+  EXPECT_GE(si.si_overrun, kCycles - 1);
+  EXPECT_EQ(si.si_int, kSigvalue);
+
+  // Kill the timer, then drain any additional signal it may have enqueued. We
+  // can't do this before the preceding sigtimedwait because stopping or
+  // deleting the timer resets si_overrun to 0.
+  timer.reset();
+  sigtimedwait(&mask, &si, &zero_ts);
+}
+
+TEST(IntervalTimerTest, OtherThreadGroup) {
+  constexpr int kSigno = SIGUSR1;
+
+  // Create a subprocess that does nothing until killed.
+  pid_t child_pid;
+  const auto sp = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+      "/proc/self/exe", ExecveArray({"timers", "--timers_test_sleep"}),
+      ExecveArray(), &child_pid, nullptr));
+
+  // Verify that we can't create a timer that would send signals to it.
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_THREAD_ID;
+  sev.sigev_signo = kSigno;
+  sev.sigev_notify_thread_id = child_pid;
+  EXPECT_THAT(TimerCreate(CLOCK_MONOTONIC, sev), PosixErrorIs(EINVAL, _));
+}
+
+TEST(IntervalTimerTest, RealTimeSignalsAreNotDuplicated) {
+  const int kSigno = SIGRTMIN;
+  constexpr int kSigvalue = 42;
+
+  // Block signo so that we can accumulate overruns.
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, kSigno);
+  const auto scoped_sigmask = ScopedSignalMask(SIG_BLOCK, mask);
+
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_THREAD_ID;
+  sev.sigev_signo = kSigno;
+  sev.sigev_value.sival_int = kSigvalue;
+  sev.sigev_notify_thread_id = gettid();
+  const auto timer =
+      ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+  constexpr absl::Duration kPeriod = absl::Seconds(1);
+  constexpr int kCycles = 3;
+  struct itimerspec its = {};
+  its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+  ASSERT_NO_ERRNO(timer.Set(0, its));
+  absl::SleepFor(kPeriod * kCycles + kTimerSlack);
+
+  // Stop the timer so that no further signals are enqueued after sigtimedwait.
+  struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration());
+  its.it_value = its.it_interval = zero_ts;
+  ASSERT_NO_ERRNO(timer.Set(0, its));
+
+  // The timer should have sent only a single signal, even though the kernel
+  // supports enqueueing of multiple RT signals.
+  siginfo_t si;
+  ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+              SyscallSucceedsWithValue(kSigno));
+  EXPECT_EQ(si.si_signo, kSigno);
+  EXPECT_EQ(si.si_code, SI_TIMER);
+  EXPECT_EQ(si.si_timerid, timer.get());
+  // si_overrun was reset by timer_settime.
+  EXPECT_EQ(si.si_overrun, 0);
+  EXPECT_EQ(si.si_int, kSigvalue);
+  EXPECT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(IntervalTimerTest, AlreadyPendingSignal) {
+  constexpr int kSigno = SIGUSR1;
+  constexpr int kSigvalue = 42;
+
+  // Block kSigno so that we can accumulate overruns.
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, kSigno);
+  const auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask));
+
+  // Send ourselves a signal, preventing the timer from enqueuing.
+  ASSERT_THAT(tgkill(getpid(), gettid(), kSigno), SyscallSucceeds());
+
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_THREAD_ID;
+  sev.sigev_signo = kSigno;
+  sev.sigev_value.sival_int = kSigvalue;
+  sev.sigev_notify_thread_id = gettid();
+  auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+  constexpr absl::Duration kPeriod = absl::Seconds(1);
+  constexpr int kCycles = 3;
+  struct itimerspec its = {};
+  its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+  ASSERT_NO_ERRNO(timer.Set(0, its));
+
+  // End the sleep one cycle short; we will sleep for one more cycle below.
+  absl::SleepFor(kPeriod * (kCycles - 1));
+
+  // Dequeue the first signal, which we sent to ourselves with tgkill.
+  siginfo_t si;
+  struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration());
+  ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+              SyscallSucceedsWithValue(kSigno));
+  EXPECT_EQ(si.si_signo, kSigno);
+  // glibc sigtimedwait silently replaces SI_TKILL with SI_USER:
+  // sysdeps/unix/sysv/linux/sigtimedwait.c:__sigtimedwait(). This isn't
+  // documented, so we don't depend on it.
+  EXPECT_THAT(si.si_code, AnyOf(SI_USER, SI_TKILL));
+
+  // Sleep for 1 more cycle to give the timer time to send a signal.
+  absl::SleepFor(kPeriod + kTimerSlack);
+
+  // At least kCycles expirations should have occurred, resulting in kCycles-1
+  // overruns (the last expiration sent the signal successfully).
+  ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+              SyscallSucceedsWithValue(kSigno));
+  EXPECT_EQ(si.si_signo, kSigno);
+  EXPECT_EQ(si.si_code, SI_TIMER);
+  EXPECT_EQ(si.si_timerid, timer.get());
+  EXPECT_GE(si.si_overrun, kCycles - 1);
+  EXPECT_EQ(si.si_int, kSigvalue);
+
+  // Kill the timer, then drain any additional signal it may have enqueued. We
+  // can't do this before the preceding sigtimedwait because stopping or
+  // deleting the timer resets si_overrun to 0.
+  timer.reset();
+  sigtimedwait(&mask, &si, &zero_ts);
+}
+
+TEST(IntervalTimerTest, IgnoredSignalCountsAsOverrun) {
+  constexpr int kSigno = SIGUSR1;
+  constexpr int kSigvalue = 42;
+
+  // Ignore kSigno.
+  struct sigaction sa = {};
+  sa.sa_handler = SIG_IGN;
+  const auto scoped_sigaction =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa));
+
+  // Unblock kSigno so that ignored signals will be discarded.
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, kSigno);
+  auto scoped_sigmask =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, mask));
+
+  struct sigevent sev = {};
+  sev.sigev_notify = SIGEV_THREAD_ID;
+  sev.sigev_signo = kSigno;
+  sev.sigev_value.sival_int = kSigvalue;
+  sev.sigev_notify_thread_id = gettid();
+  auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+  constexpr absl::Duration kPeriod = absl::Seconds(1);
+  constexpr int kCycles = 3;
+  struct itimerspec its = {};
+  its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+  ASSERT_NO_ERRNO(timer.Set(0, its));
+
+  // End the sleep one cycle short; we will sleep for one more cycle below.
+  absl::SleepFor(kPeriod * (kCycles - 1));
+
+  // Block kSigno so that ignored signals will be enqueued.
+  scoped_sigmask.Release()();
+  scoped_sigmask = ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask));
+
+  // Sleep for 1 more cycle to give the timer time to send a signal.
+  absl::SleepFor(kPeriod + kTimerSlack);
+
+  // At least kCycles expirations should have occurred, resulting in kCycles-1
+  // overruns (the last expiration sent the signal successfully).
+  siginfo_t si;
+  struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration());
+  ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+              SyscallSucceedsWithValue(kSigno));
+  EXPECT_EQ(si.si_signo, kSigno);
+  EXPECT_EQ(si.si_code, SI_TIMER);
+  EXPECT_EQ(si.si_timerid, timer.get());
+  EXPECT_GE(si.si_overrun, kCycles - 1);
+  EXPECT_EQ(si.si_int, kSigvalue);
+
+  // Kill the timer, then drain any additional signal it may have enqueued. We
+  // can't do this before the preceding sigtimedwait because stopping or
+  // deleting the timer resets si_overrun to 0.
+  timer.reset();
+  sigtimedwait(&mask, &si, &zero_ts);
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  gvisor::testing::TestInit(&argc, &argv);
+
+  if (absl::GetFlag(FLAGS_timers_test_sleep)) {
+    while (true) {
+      absl::SleepFor(absl::Seconds(10));
+    }
+  }
+
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/tkill.cc b/test/syscalls/linux/tkill.cc
new file mode 100644
index 000000000..8d8ebbb24
--- /dev/null
+++ b/test/syscalls/linux/tkill.cc
@@ -0,0 +1,75 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <csignal>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+static int tkill(pid_t tid, int sig) {
+  int ret;
+  do {
+    // NOTE(b/25434735): tkill(2) could return EAGAIN for RT signals.
+    ret = syscall(SYS_tkill, tid, sig);
+  } while (ret == -1 && errno == EAGAIN);
+  return ret;
+}
+
+TEST(TkillTest, InvalidTID) {
+  EXPECT_THAT(tkill(-1, 0), SyscallFailsWithErrno(EINVAL));
+  EXPECT_THAT(tkill(0, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TkillTest, ValidTID) {
+  EXPECT_THAT(tkill(gettid(), 0), SyscallSucceeds());
+}
+
+void SigHandler(int sig, siginfo_t* info, void* context) {
+  TEST_CHECK(sig == SIGRTMAX);
+  TEST_CHECK(info->si_pid == getpid());
+  TEST_CHECK(info->si_uid == getuid());
+  TEST_CHECK(info->si_code == SI_TKILL);
+}
+
+// Test with a real signal. Regression test for b/24790092.
+TEST(TkillTest, ValidTIDAndRealSignal) {
+  struct sigaction sa;
+  sa.sa_sigaction = SigHandler;
+  sigfillset(&sa.sa_mask);
+  sa.sa_flags = SA_SIGINFO;
+  ASSERT_THAT(sigaction(SIGRTMAX, &sa, nullptr), SyscallSucceeds());
+  // InitGoogle blocks all RT signals, so we need undo it.
+  sigset_t unblock;
+  sigemptyset(&unblock);
+  sigaddset(&unblock, SIGRTMAX);
+  ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &unblock, nullptr), SyscallSucceeds());
+  EXPECT_THAT(tkill(gettid(), SIGRTMAX), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/truncate.cc b/test/syscalls/linux/truncate.cc
new file mode 100644
index 000000000..c988c6380
--- /dev/null
+++ b/test/syscalls/linux/truncate.cc
@@ -0,0 +1,218 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class FixtureTruncateTest : public FileTest {
+  void SetUp() override { FileTest::SetUp(); }
+};
+
+TEST_F(FixtureTruncateTest, Truncate) {
+  // Get the current rlimit and restore after test run.
+  struct rlimit initial_lim;
+  ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  auto cleanup = Cleanup([&initial_lim] {
+    EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  });
+
+  // Check that it starts at size zero.
+  struct stat buf;
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 0);
+
+  // Stay at size zero.
+  EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 0);
+
+  // Grow to ten bytes.
+  EXPECT_THAT(truncate(test_file_name_.c_str(), 10), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 10);
+
+  // Can't be truncated to a negative number.
+  EXPECT_THAT(truncate(test_file_name_.c_str(), -1),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Try growing past the file size limit.
+  sigset_t new_mask;
+  sigemptyset(&new_mask);
+  sigaddset(&new_mask, SIGXFSZ);
+  sigprocmask(SIG_BLOCK, &new_mask, nullptr);
+  struct timespec timelimit;
+  timelimit.tv_sec = 10;
+  timelimit.tv_nsec = 0;
+
+  struct rlimit setlim;
+  setlim.rlim_cur = 1024;
+  setlim.rlim_max = RLIM_INFINITY;
+  ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+  EXPECT_THAT(truncate(test_file_name_.c_str(), 1025),
+              SyscallFailsWithErrno(EFBIG));
+  EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ);
+  ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds());
+
+  // Shrink back down to zero.
+  EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 0);
+}
+
+TEST_F(FixtureTruncateTest, Ftruncate) {
+  // Get the current rlimit and restore after test run.
+  struct rlimit initial_lim;
+  ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  auto cleanup = Cleanup([&initial_lim] {
+    EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  });
+
+  // Check that it starts at size zero.
+  struct stat buf;
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 0);
+
+  // Stay at size zero.
+  EXPECT_THAT(ftruncate(test_file_fd_.get(), 0), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 0);
+
+  // Grow to ten bytes.
+  EXPECT_THAT(ftruncate(test_file_fd_.get(), 10), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 10);
+
+  // Can't be truncated to a negative number.
+  EXPECT_THAT(ftruncate(test_file_fd_.get(), -1),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Try growing past the file size limit.
+  sigset_t new_mask;
+  sigemptyset(&new_mask);
+  sigaddset(&new_mask, SIGXFSZ);
+  sigprocmask(SIG_BLOCK, &new_mask, nullptr);
+  struct timespec timelimit;
+  timelimit.tv_sec = 10;
+  timelimit.tv_nsec = 0;
+
+  struct rlimit setlim;
+  setlim.rlim_cur = 1024;
+  setlim.rlim_max = RLIM_INFINITY;
+  ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+  EXPECT_THAT(ftruncate(test_file_fd_.get(), 1025),
+              SyscallFailsWithErrno(EFBIG));
+  EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ);
+  ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds());
+
+  // Shrink back down to zero.
+  EXPECT_THAT(ftruncate(test_file_fd_.get(), 0), SyscallSucceeds());
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 0);
+}
+
+// Truncating a file down clears that portion of the file.
+TEST_F(FixtureTruncateTest, FtruncateShrinkGrow) {
+  std::vector<char> buf(10, 'a');
+  EXPECT_THAT(WriteFd(test_file_fd_.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(buf.size()));
+
+  // Shrink then regrow the file. This should clear the second half of the file.
+  EXPECT_THAT(ftruncate(test_file_fd_.get(), 5), SyscallSucceeds());
+  EXPECT_THAT(ftruncate(test_file_fd_.get(), 10), SyscallSucceeds());
+
+  EXPECT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), SyscallSucceeds());
+
+  std::vector<char> buf2(10);
+  EXPECT_THAT(ReadFd(test_file_fd_.get(), buf2.data(), buf2.size()),
+              SyscallSucceedsWithValue(buf2.size()));
+
+  std::vector<char> expect = {'a',  'a',  'a',  'a',  'a',
+                              '\0', '\0', '\0', '\0', '\0'};
+  EXPECT_EQ(expect, buf2);
+}
+
+TEST(TruncateTest, TruncateDir) {
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(truncate(temp_dir.path().c_str(), 0),
+              SyscallFailsWithErrno(EISDIR));
+}
+
+TEST(TruncateTest, FtruncateDir) {
+  auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(temp_dir.path(), O_DIRECTORY | O_RDONLY));
+  EXPECT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TruncateTest, TruncateNonWriteable) {
+  // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+  // always override write permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::string_view(), 0555 /* mode */));
+  EXPECT_THAT(truncate(temp_file.path().c_str(), 0),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(TruncateTest, FtruncateNonWriteable) {
+  auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+      GetAbsoluteTestTmpdir(), absl::string_view(), 0555 /* mode */));
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY));
+  EXPECT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TruncateTest, TruncateNonExist) {
+  EXPECT_THAT(truncate("/foo/bar", 0), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(TruncateTest, FtruncateVirtualTmp_NoRandomSave) {
+  auto temp_file = NewTempAbsPathInDir("/dev/shm");
+  const DisableSave ds;  // Incompatible permissions.
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file, O_RDWR | O_CREAT | O_EXCL, 0));
+  EXPECT_THAT(ftruncate(fd.get(), 100), SyscallSucceeds());
+}
+
+// NOTE: There are additional truncate(2)/ftruncate(2) tests in mknod.cc
+// which are there to avoid running the tests on a number of different
+// filesystems which may not support mknod.
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc
new file mode 100644
index 000000000..97d554e72
--- /dev/null
+++ b/test/syscalls/linux/tuntap.cc
@@ -0,0 +1,422 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <linux/capability.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/if_tun.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/str_split.h"
+#include "test/syscalls/linux/socket_netlink_route_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+constexpr int kIPLen = 4;
+
+constexpr const char kDevNetTun[] = "/dev/net/tun";
+constexpr const char kTapName[] = "tap0";
+
+constexpr const uint8_t kMacA[ETH_ALEN] = {0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA};
+constexpr const uint8_t kMacB[ETH_ALEN] = {0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB};
+
+PosixErrorOr<std::set<std::string>> DumpLinkNames() {
+  ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks());
+  std::set<std::string> names;
+  for (const auto& link : links) {
+    names.emplace(link.name);
+  }
+  return names;
+}
+
+PosixErrorOr<Link> GetLinkByName(const std::string& name) {
+  ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks());
+  for (const auto& link : links) {
+    if (link.name == name) {
+      return link;
+    }
+  }
+  return PosixError(ENOENT, "interface not found");
+}
+
+struct pihdr {
+  uint16_t pi_flags;
+  uint16_t pi_protocol;
+} __attribute__((packed));
+
+struct ping_pkt {
+  pihdr pi;
+  struct ethhdr eth;
+  struct iphdr ip;
+  struct icmphdr icmp;
+  char payload[64];
+} __attribute__((packed));
+
+ping_pkt CreatePingPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip,
+                          const uint8_t dstmac[ETH_ALEN], const char* dstip) {
+  ping_pkt pkt = {};
+
+  pkt.pi.pi_protocol = htons(ETH_P_IP);
+
+  memcpy(pkt.eth.h_dest, dstmac, sizeof(pkt.eth.h_dest));
+  memcpy(pkt.eth.h_source, srcmac, sizeof(pkt.eth.h_source));
+  pkt.eth.h_proto = htons(ETH_P_IP);
+
+  pkt.ip.ihl = 5;
+  pkt.ip.version = 4;
+  pkt.ip.tos = 0;
+  pkt.ip.tot_len = htons(sizeof(struct iphdr) + sizeof(struct icmphdr) +
+                         sizeof(pkt.payload));
+  pkt.ip.id = 1;
+  pkt.ip.frag_off = 1 << 6;  // Do not fragment
+  pkt.ip.ttl = 64;
+  pkt.ip.protocol = IPPROTO_ICMP;
+  inet_pton(AF_INET, dstip, &pkt.ip.daddr);
+  inet_pton(AF_INET, srcip, &pkt.ip.saddr);
+  pkt.ip.check = IPChecksum(pkt.ip);
+
+  pkt.icmp.type = ICMP_ECHO;
+  pkt.icmp.code = 0;
+  pkt.icmp.checksum = 0;
+  pkt.icmp.un.echo.sequence = 1;
+  pkt.icmp.un.echo.id = 1;
+
+  strncpy(pkt.payload, "abcd", sizeof(pkt.payload));
+  pkt.icmp.checksum = ICMPChecksum(pkt.icmp, pkt.payload, sizeof(pkt.payload));
+
+  return pkt;
+}
+
+struct arp_pkt {
+  pihdr pi;
+  struct ethhdr eth;
+  struct arphdr arp;
+  uint8_t arp_sha[ETH_ALEN];
+  uint8_t arp_spa[kIPLen];
+  uint8_t arp_tha[ETH_ALEN];
+  uint8_t arp_tpa[kIPLen];
+} __attribute__((packed));
+
+std::string CreateArpPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip,
+                            const uint8_t dstmac[ETH_ALEN], const char* dstip) {
+  std::string buffer;
+  buffer.resize(sizeof(arp_pkt));
+
+  arp_pkt* pkt = reinterpret_cast<arp_pkt*>(&buffer[0]);
+  {
+    pkt->pi.pi_protocol = htons(ETH_P_ARP);
+
+    memcpy(pkt->eth.h_dest, kMacA, sizeof(pkt->eth.h_dest));
+    memcpy(pkt->eth.h_source, kMacB, sizeof(pkt->eth.h_source));
+    pkt->eth.h_proto = htons(ETH_P_ARP);
+
+    pkt->arp.ar_hrd = htons(ARPHRD_ETHER);
+    pkt->arp.ar_pro = htons(ETH_P_IP);
+    pkt->arp.ar_hln = ETH_ALEN;
+    pkt->arp.ar_pln = kIPLen;
+    pkt->arp.ar_op = htons(ARPOP_REPLY);
+
+    memcpy(pkt->arp_sha, srcmac, sizeof(pkt->arp_sha));
+    inet_pton(AF_INET, srcip, pkt->arp_spa);
+    memcpy(pkt->arp_tha, dstmac, sizeof(pkt->arp_tha));
+    inet_pton(AF_INET, dstip, pkt->arp_tpa);
+  }
+  return buffer;
+}
+
+}  // namespace
+
+TEST(TuntapStaticTest, NetTunExists) {
+  struct stat statbuf;
+  ASSERT_THAT(stat(kDevNetTun, &statbuf), SyscallSucceeds());
+  // Check that it's a character device with rw-rw-rw- permissions.
+  EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666);
+}
+
+class TuntapTest : public ::testing::Test {
+ protected:
+  void TearDown() override {
+    if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))) {
+      // Bring back capability if we had dropped it in test case.
+      ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, true));
+    }
+  }
+};
+
+TEST_F(TuntapTest, CreateInterfaceNoCap) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, false));
+
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
+
+  struct ifreq ifr = {};
+  ifr.ifr_flags = IFF_TAP;
+  strncpy(ifr.ifr_name, kTapName, IFNAMSIZ);
+
+  EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallFailsWithErrno(EPERM));
+}
+
+TEST_F(TuntapTest, CreateFixedNameInterface) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
+
+  struct ifreq ifr_set = {};
+  ifr_set.ifr_flags = IFF_TAP;
+  strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ);
+  EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set),
+              SyscallSucceedsWithValue(0));
+
+  struct ifreq ifr_get = {};
+  EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get),
+              SyscallSucceedsWithValue(0));
+
+  struct ifreq ifr_expect = ifr_set;
+  // See __tun_chr_ioctl() in net/drivers/tun.c.
+  ifr_expect.ifr_flags |= IFF_NOFILTER;
+
+  EXPECT_THAT(DumpLinkNames(),
+              IsPosixErrorOkAndHolds(::testing::Contains(kTapName)));
+  EXPECT_THAT(memcmp(&ifr_expect, &ifr_get, sizeof(ifr_get)), ::testing::Eq(0));
+}
+
+TEST_F(TuntapTest, CreateInterface) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
+
+  struct ifreq ifr = {};
+  ifr.ifr_flags = IFF_TAP;
+  // Empty ifr.ifr_name. Let kernel assign.
+
+  EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0));
+
+  struct ifreq ifr_get = {};
+  EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get),
+              SyscallSucceedsWithValue(0));
+
+  std::string ifname = ifr_get.ifr_name;
+  EXPECT_THAT(ifname, ::testing::StartsWith("tap"));
+  EXPECT_THAT(DumpLinkNames(),
+              IsPosixErrorOkAndHolds(::testing::Contains(ifname)));
+}
+
+TEST_F(TuntapTest, InvalidReadWrite) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
+
+  char buf[128] = {};
+  EXPECT_THAT(read(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD));
+  EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD));
+}
+
+TEST_F(TuntapTest, WriteToDownDevice) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces.
+  SKIP_IF(IsRunningOnGvisor());
+
+  FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
+
+  // Device created should be down by default.
+  struct ifreq ifr = {};
+  ifr.ifr_flags = IFF_TAP;
+  EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0));
+
+  char buf[128] = {};
+  EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EIO));
+}
+
+PosixErrorOr<FileDescriptor> OpenAndAttachTap(
+    const std::string& dev_name, const std::string& dev_ipv4_addr) {
+  // Interface creation.
+  ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, Open(kDevNetTun, O_RDWR));
+
+  struct ifreq ifr_set = {};
+  ifr_set.ifr_flags = IFF_TAP;
+  strncpy(ifr_set.ifr_name, dev_name.c_str(), IFNAMSIZ);
+  if (ioctl(fd.get(), TUNSETIFF, &ifr_set) < 0) {
+    return PosixError(errno);
+  }
+
+  ASSIGN_OR_RETURN_ERRNO(auto link, GetLinkByName(dev_name));
+
+  // Interface setup.
+  struct in_addr addr;
+  inet_pton(AF_INET, dev_ipv4_addr.c_str(), &addr);
+  EXPECT_NO_ERRNO(LinkAddLocalAddr(link.index, AF_INET, /*prefixlen=*/24, &addr,
+                                   sizeof(addr)));
+
+  if (!IsRunningOnGvisor()) {
+    // FIXME(b/110961832): gVisor doesn't support setting MAC address on
+    // interfaces yet.
+    RETURN_IF_ERRNO(LinkSetMacAddr(link.index, kMacA, sizeof(kMacA)));
+
+    // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces.
+    RETURN_IF_ERRNO(LinkChangeFlags(link.index, IFF_UP, IFF_UP));
+  }
+
+  return fd;
+}
+
+// This test sets up a TAP device and pings kernel by sending ICMP echo request.
+//
+// It works as the following:
+// * Open /dev/net/tun, and create kTapName interface.
+// * Use rtnetlink to do initial setup of the interface:
+//   * Assign IP address 10.0.0.1/24 to kernel.
+//   * MAC address: kMacA
+//   * Bring up the interface.
+// * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel.
+// * Loop to receive packets from TAP device/fd:
+//   * If packet is an ICMP echo reply, it stops and passes the test.
+//   * If packet is an ARP request, it responds with canned reply and resends
+//   the
+//     ICMP request packet.
+TEST_F(TuntapTest, PingKernel) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1"));
+  ping_pkt ping_req = CreatePingPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1");
+  std::string arp_rep = CreateArpPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1");
+
+  // Send ping, this would trigger an ARP request on Linux.
+  EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)),
+              SyscallSucceedsWithValue(sizeof(ping_req)));
+
+  // Receive loop to process inbound packets.
+  struct inpkt {
+    union {
+      pihdr pi;
+      ping_pkt ping;
+      arp_pkt arp;
+    };
+  };
+  while (1) {
+    inpkt r = {};
+    int n = read(fd.get(), &r, sizeof(r));
+    EXPECT_THAT(n, SyscallSucceeds());
+
+    if (n < sizeof(pihdr)) {
+      std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol
+                << " len: " << n << std::endl;
+      continue;
+    }
+
+    // Process ARP packet.
+    if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) {
+      // Respond with canned ARP reply.
+      EXPECT_THAT(write(fd.get(), arp_rep.data(), arp_rep.size()),
+                  SyscallSucceedsWithValue(arp_rep.size()));
+      // First ping request might have been dropped due to mac address not in
+      // ARP cache. Send it again.
+      EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)),
+                  SyscallSucceedsWithValue(sizeof(ping_req)));
+    }
+
+    // Process ping response packet.
+    if (n >= sizeof(ping_pkt) && r.pi.pi_protocol == ping_req.pi.pi_protocol &&
+        r.ping.ip.protocol == ping_req.ip.protocol &&
+        !memcmp(&r.ping.ip.saddr, &ping_req.ip.daddr, kIPLen) &&
+        !memcmp(&r.ping.ip.daddr, &ping_req.ip.saddr, kIPLen) &&
+        r.ping.icmp.type == 0 && r.ping.icmp.code == 0) {
+      // Ends and passes the test.
+      break;
+    }
+  }
+}
+
+TEST_F(TuntapTest, SendUdpTriggersArpResolution) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1"));
+
+  // Send a UDP packet to remote.
+  int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
+  ASSERT_THAT(sock, SyscallSucceeds());
+
+  struct sockaddr_in remote = {};
+  remote.sin_family = AF_INET;
+  remote.sin_port = htons(42);
+  inet_pton(AF_INET, "10.0.0.2", &remote.sin_addr);
+  int ret = sendto(sock, "hello", 5, 0, reinterpret_cast<sockaddr*>(&remote),
+                   sizeof(remote));
+  ASSERT_THAT(ret, ::testing::AnyOf(SyscallSucceeds(),
+                                    SyscallFailsWithErrno(EHOSTDOWN)));
+
+  struct inpkt {
+    union {
+      pihdr pi;
+      arp_pkt arp;
+    };
+  };
+  while (1) {
+    inpkt r = {};
+    int n = read(fd.get(), &r, sizeof(r));
+    EXPECT_THAT(n, SyscallSucceeds());
+
+    if (n < sizeof(pihdr)) {
+      std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol
+                << " len: " << n << std::endl;
+      continue;
+    }
+
+    if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) {
+      break;
+    }
+  }
+}
+
+// Write hang bug found by syskaller: b/155928773
+// https://syzkaller.appspot.com/bug?id=065b893bd8d1d04a4e0a1d53c578537cde1efe99
+TEST_F(TuntapTest, WriteHangBug155928773) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+  FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1"));
+
+  int sock = socket(AF_INET, SOCK_DGRAM, 0);
+  ASSERT_THAT(sock, SyscallSucceeds());
+
+  struct sockaddr_in remote = {};
+  remote.sin_family = AF_INET;
+  remote.sin_port = htons(42);
+  inet_pton(AF_INET, "10.0.0.1", &remote.sin_addr);
+  // Return values do not matter in this test.
+  connect(sock, reinterpret_cast<struct sockaddr*>(&remote), sizeof(remote));
+  write(sock, "hello", 5);
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/tuntap_hostinet.cc b/test/syscalls/linux/tuntap_hostinet.cc
new file mode 100644
index 000000000..1513fb9d5
--- /dev/null
+++ b/test/syscalls/linux/tuntap_hostinet.cc
@@ -0,0 +1,38 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(TuntapHostInetTest, NoNetTun) {
+  SKIP_IF(!IsRunningOnGvisor());
+  SKIP_IF(!IsRunningWithHostinet());
+
+  struct stat statbuf;
+  ASSERT_THAT(stat("/dev/net/tun", &statbuf), SyscallFailsWithErrno(ENOENT));
+}
+
+}  // namespace
+}  // namespace testing
+
+}  // namespace gvisor
diff --git a/test/syscalls/linux/udp_bind.cc b/test/syscalls/linux/udp_bind.cc
new file mode 100644
index 000000000..6d92bdbeb
--- /dev/null
+++ b/test/syscalls/linux/udp_bind.cc
@@ -0,0 +1,316 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+struct sockaddr_in_common {
+  sa_family_t sin_family;
+  in_port_t sin_port;
+};
+
+struct SendtoTestParam {
+  // Human readable description of test parameter.
+  std::string description;
+
+  // Test is broken in gVisor, skip.
+  bool skip_on_gvisor;
+
+  // Domain for the socket that will do the sending.
+  int send_domain;
+
+  // Address to bind for the socket that will do the sending.
+  struct sockaddr_storage send_addr;
+  socklen_t send_addr_len;  // 0 for unbound.
+
+  // Address to connect to for the socket that will do the sending.
+  struct sockaddr_storage connect_addr;
+  socklen_t connect_addr_len;  // 0 for no connection.
+
+  // Domain for the socket that will do the receiving.
+  int recv_domain;
+
+  // Address to bind for the socket that will do the receiving.
+  struct sockaddr_storage recv_addr;
+  socklen_t recv_addr_len;
+
+  // Address to send to.
+  struct sockaddr_storage sendto_addr;
+  socklen_t sendto_addr_len;
+
+  // Expected errno for the sendto call.
+  std::vector<int> sendto_errnos;  // empty on success.
+};
+
+class SendtoTest : public ::testing::TestWithParam<SendtoTestParam> {
+ protected:
+  SendtoTest() {
+    // gUnit uses printf, so so will we.
+    printf("Testing with %s\n", GetParam().description.c_str());
+  }
+};
+
+TEST_P(SendtoTest, Sendto) {
+  auto param = GetParam();
+
+  SKIP_IF(param.skip_on_gvisor && IsRunningOnGvisor());
+
+  const FileDescriptor s1 =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(param.send_domain, SOCK_DGRAM, 0));
+  const FileDescriptor s2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(param.recv_domain, SOCK_DGRAM, 0));
+
+  if (param.send_addr_len > 0) {
+    ASSERT_THAT(bind(s1.get(), reinterpret_cast<sockaddr*>(&param.send_addr),
+                     param.send_addr_len),
+                SyscallSucceeds());
+  }
+
+  if (param.connect_addr_len > 0) {
+    ASSERT_THAT(
+        connect(s1.get(), reinterpret_cast<sockaddr*>(&param.connect_addr),
+                param.connect_addr_len),
+        SyscallSucceeds());
+  }
+
+  ASSERT_THAT(bind(s2.get(), reinterpret_cast<sockaddr*>(&param.recv_addr),
+                   param.recv_addr_len),
+              SyscallSucceeds());
+
+  struct sockaddr_storage real_recv_addr = {};
+  socklen_t real_recv_addr_len = param.recv_addr_len;
+  ASSERT_THAT(
+      getsockname(s2.get(), reinterpret_cast<sockaddr*>(&real_recv_addr),
+                  &real_recv_addr_len),
+      SyscallSucceeds());
+
+  ASSERT_EQ(real_recv_addr_len, param.recv_addr_len);
+
+  int recv_port =
+      reinterpret_cast<sockaddr_in_common*>(&real_recv_addr)->sin_port;
+
+  struct sockaddr_storage sendto_addr = param.sendto_addr;
+  reinterpret_cast<sockaddr_in_common*>(&sendto_addr)->sin_port = recv_port;
+
+  char buf[20] = {};
+  if (!param.sendto_errnos.empty()) {
+    ASSERT_THAT(RetryEINTR(sendto)(s1.get(), buf, sizeof(buf), 0,
+                                   reinterpret_cast<sockaddr*>(&sendto_addr),
+                                   param.sendto_addr_len),
+                SyscallFailsWithErrno(ElementOf(param.sendto_errnos)));
+    return;
+  }
+
+  ASSERT_THAT(RetryEINTR(sendto)(s1.get(), buf, sizeof(buf), 0,
+                                 reinterpret_cast<sockaddr*>(&sendto_addr),
+                                 param.sendto_addr_len),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  struct sockaddr_storage got_addr = {};
+  socklen_t got_addr_len = sizeof(sockaddr_storage);
+  ASSERT_THAT(RetryEINTR(recvfrom)(s2.get(), buf, sizeof(buf), 0,
+                                   reinterpret_cast<sockaddr*>(&got_addr),
+                                   &got_addr_len),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  ASSERT_GT(got_addr_len, sizeof(sockaddr_in_common));
+  int got_port = reinterpret_cast<sockaddr_in_common*>(&got_addr)->sin_port;
+
+  struct sockaddr_storage sender_addr = {};
+  socklen_t sender_addr_len = sizeof(sockaddr_storage);
+  ASSERT_THAT(getsockname(s1.get(), reinterpret_cast<sockaddr*>(&sender_addr),
+                          &sender_addr_len),
+              SyscallSucceeds());
+
+  ASSERT_GT(sender_addr_len, sizeof(sockaddr_in_common));
+  int sender_port =
+      reinterpret_cast<sockaddr_in_common*>(&sender_addr)->sin_port;
+
+  EXPECT_EQ(got_port, sender_port);
+}
+
+socklen_t Ipv4Addr(sockaddr_storage* addr, int port = 0) {
+  auto addr4 = reinterpret_cast<sockaddr_in*>(addr);
+  addr4->sin_family = AF_INET;
+  addr4->sin_port = port;
+  inet_pton(AF_INET, "127.0.0.1", &addr4->sin_addr.s_addr);
+  return sizeof(struct sockaddr_in);
+}
+
+socklen_t Ipv6Addr(sockaddr_storage* addr, int port = 0) {
+  auto addr6 = reinterpret_cast<sockaddr_in6*>(addr);
+  addr6->sin6_family = AF_INET6;
+  addr6->sin6_port = port;
+  inet_pton(AF_INET6, "::1", &addr6->sin6_addr.s6_addr);
+  return sizeof(struct sockaddr_in6);
+}
+
+socklen_t Ipv4MappedIpv6Addr(sockaddr_storage* addr, int port = 0) {
+  auto addr6 = reinterpret_cast<sockaddr_in6*>(addr);
+  addr6->sin6_family = AF_INET6;
+  addr6->sin6_port = port;
+  inet_pton(AF_INET6, "::ffff:127.0.0.1", &addr6->sin6_addr.s6_addr);
+  return sizeof(struct sockaddr_in6);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    UdpBindTest, SendtoTest,
+    ::testing::Values(
+        []() {
+          SendtoTestParam param = {};
+          param.description = "IPv4 mapped IPv6 sendto IPv4 mapped IPv6";
+          param.send_domain = AF_INET6;
+          param.send_addr_len = Ipv4MappedIpv6Addr(&param.send_addr);
+          param.recv_domain = AF_INET6;
+          param.recv_addr_len = Ipv4MappedIpv6Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "IPv6 sendto IPv6";
+          param.send_domain = AF_INET6;
+          param.send_addr_len = Ipv6Addr(&param.send_addr);
+          param.recv_domain = AF_INET6;
+          param.recv_addr_len = Ipv6Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv6Addr(&param.sendto_addr);
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "IPv4 sendto IPv4";
+          param.send_domain = AF_INET;
+          param.send_addr_len = Ipv4Addr(&param.send_addr);
+          param.recv_domain = AF_INET;
+          param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv4Addr(&param.sendto_addr);
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "IPv4 mapped IPv6 sendto IPv4";
+          param.send_domain = AF_INET6;
+          param.send_addr_len = Ipv4MappedIpv6Addr(&param.send_addr);
+          param.recv_domain = AF_INET;
+          param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "IPv4 sendto IPv4 mapped IPv6";
+          param.send_domain = AF_INET;
+          param.send_addr_len = Ipv4Addr(&param.send_addr);
+          param.recv_domain = AF_INET6;
+          param.recv_addr_len = Ipv4MappedIpv6Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv4Addr(&param.sendto_addr);
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "unbound IPv6 sendto IPv4 mapped IPv6";
+          param.send_domain = AF_INET6;
+          param.recv_domain = AF_INET6;
+          param.recv_addr_len = Ipv4MappedIpv6Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "unbound IPv6 sendto IPv4";
+          param.send_domain = AF_INET6;
+          param.recv_domain = AF_INET;
+          param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "IPv6 sendto IPv4";
+          param.send_domain = AF_INET6;
+          param.send_addr_len = Ipv6Addr(&param.send_addr);
+          param.recv_domain = AF_INET;
+          param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+          param.sendto_errnos = {ENETUNREACH};
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "IPv4 mapped IPv6 sendto IPv6";
+          param.send_domain = AF_INET6;
+          param.send_addr_len = Ipv4MappedIpv6Addr(&param.send_addr);
+          param.recv_domain = AF_INET6;
+          param.recv_addr_len = Ipv6Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv6Addr(&param.sendto_addr);
+          param.sendto_errnos = {EAFNOSUPPORT};
+          // The errno returned changed in Linux commit c8e6ad0829a723.
+          param.sendto_errnos = {EINVAL, EAFNOSUPPORT};
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "connected IPv4 mapped IPv6 sendto IPv6";
+          param.send_domain = AF_INET6;
+          param.connect_addr_len =
+              Ipv4MappedIpv6Addr(&param.connect_addr, 5000);
+          param.recv_domain = AF_INET6;
+          param.recv_addr_len = Ipv6Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv6Addr(&param.sendto_addr);
+          // The errno returned changed in Linux commit c8e6ad0829a723.
+          param.sendto_errnos = {EINVAL, EAFNOSUPPORT};
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "connected IPv6 sendto IPv4 mapped IPv6";
+          // TODO(igudger): Determine if this inconsistent behavior is worth
+          // implementing.
+          param.skip_on_gvisor = true;
+          param.send_domain = AF_INET6;
+          param.connect_addr_len = Ipv6Addr(&param.connect_addr, 5000);
+          param.recv_domain = AF_INET6;
+          param.recv_addr_len = Ipv4MappedIpv6Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+          return param;
+        }(),
+        []() {
+          SendtoTestParam param = {};
+          param.description = "connected IPv6 sendto IPv4";
+          // TODO(igudger): Determine if this inconsistent behavior is worth
+          // implementing.
+          param.skip_on_gvisor = true;
+          param.send_domain = AF_INET6;
+          param.connect_addr_len = Ipv6Addr(&param.connect_addr, 5000);
+          param.recv_domain = AF_INET;
+          param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+          param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+          return param;
+        }()));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/udp_socket.cc b/test/syscalls/linux/udp_socket.cc
new file mode 100644
index 000000000..7a8ac30a4
--- /dev/null
+++ b/test/syscalls/linux/udp_socket.cc
@@ -0,0 +1,30 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/udp_socket_test_cases.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+INSTANTIATE_TEST_SUITE_P(AllInetTests, UdpSocketTest,
+                         ::testing::Values(AddressFamily::kIpv4,
+                                           AddressFamily::kIpv6,
+                                           AddressFamily::kDualStack));
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/udp_socket_errqueue_test_case.cc b/test/syscalls/linux/udp_socket_errqueue_test_case.cc
new file mode 100644
index 000000000..54a0594f7
--- /dev/null
+++ b/test/syscalls/linux/udp_socket_errqueue_test_case.cc
@@ -0,0 +1,57 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef __fuchsia__
+
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <linux/errqueue.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/udp_socket_test_cases.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(UdpSocketTest, ErrorQueue) {
+  char cmsgbuf[CMSG_SPACE(sizeof(sock_extended_err))];
+  msghdr msg;
+  memset(&msg, 0, sizeof(msg));
+  iovec iov;
+  memset(&iov, 0, sizeof(iov));
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  msg.msg_control = cmsgbuf;
+  msg.msg_controllen = sizeof(cmsgbuf);
+
+  // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT.
+  EXPECT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, MSG_ERRQUEUE),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // __fuchsia__
diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc
new file mode 100644
index 000000000..9cc6be4fb
--- /dev/null
+++ b/test/syscalls/linux/udp_socket_test_cases.cc
@@ -0,0 +1,1727 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/udp_socket_test_cases.h"
+
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "absl/strings/str_format.h"
+#ifndef SIOCGSTAMP
+#include <linux/sockios.h>
+#endif
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Gets a pointer to the port component of the given address.
+uint16_t* Port(struct sockaddr_storage* addr) {
+  switch (addr->ss_family) {
+    case AF_INET: {
+      auto sin = reinterpret_cast<struct sockaddr_in*>(addr);
+      return &sin->sin_port;
+    }
+    case AF_INET6: {
+      auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr);
+      return &sin6->sin6_port;
+    }
+  }
+
+  return nullptr;
+}
+
+// Sets addr port to "port".
+void SetPort(struct sockaddr_storage* addr, uint16_t port) {
+  switch (addr->ss_family) {
+    case AF_INET: {
+      auto sin = reinterpret_cast<struct sockaddr_in*>(addr);
+      sin->sin_port = port;
+      break;
+    }
+    case AF_INET6: {
+      auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr);
+      sin6->sin6_port = port;
+      break;
+    }
+  }
+}
+
+void UdpSocketTest::SetUp() {
+  addrlen_ = GetAddrLength();
+
+  bind_ =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
+  memset(&bind_addr_storage_, 0, sizeof(bind_addr_storage_));
+  bind_addr_ = reinterpret_cast<struct sockaddr*>(&bind_addr_storage_);
+
+  sock_ =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
+}
+
+int UdpSocketTest::GetFamily() {
+  if (GetParam() == AddressFamily::kIpv4) {
+    return AF_INET;
+  }
+  return AF_INET6;
+}
+
+PosixError UdpSocketTest::BindLoopback() {
+  bind_addr_storage_ = InetLoopbackAddr();
+  struct sockaddr* bind_addr_ =
+      reinterpret_cast<struct sockaddr*>(&bind_addr_storage_);
+  return BindSocket(bind_.get(), bind_addr_);
+}
+
+PosixError UdpSocketTest::BindAny() {
+  bind_addr_storage_ = InetAnyAddr();
+  struct sockaddr* bind_addr_ =
+      reinterpret_cast<struct sockaddr*>(&bind_addr_storage_);
+  return BindSocket(bind_.get(), bind_addr_);
+}
+
+PosixError UdpSocketTest::BindSocket(int socket, struct sockaddr* addr) {
+  socklen_t len = sizeof(bind_addr_storage_);
+
+  // Bind, then check that we get the right address.
+  RETURN_ERROR_IF_SYSCALL_FAIL(bind(socket, addr, addrlen_));
+
+  RETURN_ERROR_IF_SYSCALL_FAIL(getsockname(socket, addr, &len));
+
+  if (addrlen_ != len) {
+    return PosixError(
+        EINVAL,
+        absl::StrFormat("getsockname len: %u expected: %u", len, addrlen_));
+  }
+  return PosixError(0);
+}
+
+socklen_t UdpSocketTest::GetAddrLength() {
+  struct sockaddr_storage addr;
+  if (GetFamily() == AF_INET) {
+    auto sin = reinterpret_cast<struct sockaddr_in*>(&addr);
+    return sizeof(*sin);
+  }
+
+  auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
+  return sizeof(*sin6);
+}
+
+sockaddr_storage UdpSocketTest::InetAnyAddr() {
+  struct sockaddr_storage addr;
+  memset(&addr, 0, sizeof(addr));
+  reinterpret_cast<struct sockaddr*>(&addr)->sa_family = GetFamily();
+
+  if (GetFamily() == AF_INET) {
+    auto sin = reinterpret_cast<struct sockaddr_in*>(&addr);
+    sin->sin_addr.s_addr = htonl(INADDR_ANY);
+    sin->sin_port = htons(0);
+    return addr;
+  }
+
+  auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
+  sin6->sin6_addr = IN6ADDR_ANY_INIT;
+  sin6->sin6_port = htons(0);
+  return addr;
+}
+
+sockaddr_storage UdpSocketTest::InetLoopbackAddr() {
+  struct sockaddr_storage addr;
+  memset(&addr, 0, sizeof(addr));
+  reinterpret_cast<struct sockaddr*>(&addr)->sa_family = GetFamily();
+
+  if (GetFamily() == AF_INET) {
+    auto sin = reinterpret_cast<struct sockaddr_in*>(&addr);
+    sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+    sin->sin_port = htons(0);
+    return addr;
+  }
+  auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
+  sin6->sin6_addr = in6addr_loopback;
+  sin6->sin6_port = htons(0);
+  return addr;
+}
+
+void UdpSocketTest::Disconnect(int sockfd) {
+  sockaddr_storage addr_storage = InetAnyAddr();
+  sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  socklen_t addrlen = sizeof(addr_storage);
+
+  addr->sa_family = AF_UNSPEC;
+  ASSERT_THAT(connect(sockfd, addr, addrlen), SyscallSucceeds());
+
+  // Check that after disconnect the socket is bound to the ANY address.
+  EXPECT_THAT(getsockname(sockfd, addr, &addrlen), SyscallSucceeds());
+  if (GetParam() == AddressFamily::kIpv4) {
+    auto addr_out = reinterpret_cast<struct sockaddr_in*>(addr);
+    EXPECT_EQ(addrlen, sizeof(*addr_out));
+    EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY));
+  } else {
+    auto addr_out = reinterpret_cast<struct sockaddr_in6*>(addr);
+    EXPECT_EQ(addrlen, sizeof(*addr_out));
+    struct in6_addr loopback = IN6ADDR_ANY_INIT;
+
+    EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0);
+  }
+}
+
+TEST_P(UdpSocketTest, Creation) {
+  FileDescriptor sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
+  EXPECT_THAT(close(sock.release()), SyscallSucceeds());
+
+  sock = ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, 0));
+  EXPECT_THAT(close(sock.release()), SyscallSucceeds());
+
+  ASSERT_THAT(socket(GetFamily(), SOCK_STREAM, IPPROTO_UDP), SyscallFails());
+}
+
+TEST_P(UdpSocketTest, Getsockname) {
+  // Check that we're not bound.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  struct sockaddr_storage any = InetAnyAddr();
+  EXPECT_EQ(memcmp(&addr, reinterpret_cast<struct sockaddr*>(&any), addrlen_),
+            0);
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  EXPECT_THAT(
+      getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, Getpeername) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Check that we're not connected.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallFailsWithErrno(ENOTCONN));
+
+  // Connect, then check that we get the right address.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, SendNotConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Do send & write, they must fail.
+  char buf[512];
+  EXPECT_THAT(send(sock_.get(), buf, sizeof(buf), 0),
+              SyscallFailsWithErrno(EDESTADDRREQ));
+
+  EXPECT_THAT(write(sock_.get(), buf, sizeof(buf)),
+              SyscallFailsWithErrno(EDESTADDRREQ));
+
+  // Use sendto.
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Check that we're bound now.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_NE(*Port(&addr), 0);
+}
+
+TEST_P(UdpSocketTest, ConnectBinds) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect the socket.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Check that we're bound now.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_NE(*Port(&addr), 0);
+}
+
+TEST_P(UdpSocketTest, ReceiveNotBound) {
+  char buf[512];
+  EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, Bind) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Try to bind again.
+  EXPECT_THAT(bind(bind_.get(), bind_addr_, addrlen_),
+              SyscallFailsWithErrno(EINVAL));
+
+  // Check that we're still bound to the original address.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(bind_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, BindInUse) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Try to bind again.
+  EXPECT_THAT(bind(sock_.get(), bind_addr_, addrlen_),
+              SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(UdpSocketTest, ReceiveAfterConnect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Send from sock_ to bind_
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Receive the data.
+  char received[sizeof(buf)];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, ReceiveAfterDisconnect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  for (int i = 0; i < 2; i++) {
+    // Connet sock_ to bound address.
+    ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+    struct sockaddr_storage addr;
+    socklen_t addrlen = sizeof(addr);
+    EXPECT_THAT(
+        getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+        SyscallSucceeds());
+    EXPECT_EQ(addrlen, addrlen_);
+
+    // Send from sock to bind_.
+    char buf[512];
+    RandomizeBuffer(buf, sizeof(buf));
+
+    ASSERT_THAT(sendto(bind_.get(), buf, sizeof(buf), 0,
+                       reinterpret_cast<sockaddr*>(&addr), addrlen),
+                SyscallSucceedsWithValue(sizeof(buf)));
+
+    // Receive the data.
+    char received[sizeof(buf)];
+    EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0),
+                SyscallSucceedsWithValue(sizeof(received)));
+    EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+
+    // Disconnect sock_.
+    struct sockaddr unspec = {};
+    unspec.sa_family = AF_UNSPEC;
+    ASSERT_THAT(connect(sock_.get(), &unspec, sizeof(unspec.sa_family)),
+                SyscallSucceeds());
+  }
+}
+
+TEST_P(UdpSocketTest, Connect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Check that we're connected to the right peer.
+  struct sockaddr_storage peer;
+  socklen_t peerlen = sizeof(peer);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
+      SyscallSucceeds());
+  EXPECT_EQ(peerlen, addrlen_);
+  EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0);
+
+  // Try to bind after connect.
+  struct sockaddr_storage any = InetAnyAddr();
+  EXPECT_THAT(
+      bind(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_),
+      SyscallFailsWithErrno(EINVAL));
+
+  struct sockaddr_storage bind2_storage = InetLoopbackAddr();
+  struct sockaddr* bind2_addr =
+      reinterpret_cast<struct sockaddr*>(&bind2_storage);
+  FileDescriptor bind2 =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP));
+  ASSERT_NO_ERRNO(BindSocket(bind2.get(), bind2_addr));
+
+  // Try to connect again.
+  EXPECT_THAT(connect(sock_.get(), bind2_addr, addrlen_), SyscallSucceeds());
+
+  // Check that peer name changed.
+  peerlen = sizeof(peer);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
+      SyscallSucceeds());
+  EXPECT_EQ(peerlen, addrlen_);
+  EXPECT_EQ(memcmp(&peer, bind2_addr, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, ConnectAnyZero) {
+  // TODO(138658473): Enable when we can connect to port 0 with gVisor.
+  SKIP_IF(IsRunningOnGvisor());
+
+  struct sockaddr_storage any = InetAnyAddr();
+  EXPECT_THAT(
+      connect(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_),
+      SyscallSucceeds());
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, ConnectAnyWithPort) {
+  ASSERT_NO_ERRNO(BindAny());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+}
+
+TEST_P(UdpSocketTest, DisconnectAfterConnectAny) {
+  // TODO(138658473): Enable when we can connect to port 0 with gVisor.
+  SKIP_IF(IsRunningOnGvisor());
+  struct sockaddr_storage any = InetAnyAddr();
+  EXPECT_THAT(
+      connect(sock_.get(), reinterpret_cast<struct sockaddr*>(&any), addrlen_),
+      SyscallSucceeds());
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallFailsWithErrno(ENOTCONN));
+
+  Disconnect(sock_.get());
+}
+
+TEST_P(UdpSocketTest, DisconnectAfterConnectAnyWithPort) {
+  ASSERT_NO_ERRNO(BindAny());
+  EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(*Port(&bind_addr_storage_), *Port(&addr));
+
+  Disconnect(sock_.get());
+}
+
+TEST_P(UdpSocketTest, DisconnectAfterBind) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Bind to the next port above bind_.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_NO_ERRNO(BindSocket(sock_.get(), addr));
+
+  // Connect the socket.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  struct sockaddr_storage unspec = {};
+  unspec.ss_family = AF_UNSPEC;
+  EXPECT_THAT(connect(sock_.get(), reinterpret_cast<sockaddr*>(&unspec),
+                      sizeof(unspec.ss_family)),
+              SyscallSucceeds());
+
+  // Check that we're still bound.
+  socklen_t addrlen = sizeof(unspec);
+  EXPECT_THAT(
+      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&unspec), &addrlen),
+      SyscallSucceeds());
+
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(addr, &unspec, addrlen_), 0);
+
+  addrlen = sizeof(addr);
+  EXPECT_THAT(getpeername(sock_.get(), addr, &addrlen),
+              SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, BindToAnyConnnectToLocalhost) {
+  ASSERT_NO_ERRNO(BindAny());
+
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  socklen_t addrlen = sizeof(addr);
+
+  // Connect the socket.
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  EXPECT_THAT(getsockname(bind_.get(), addr, &addrlen), SyscallSucceeds());
+
+  // If the socket is bound to ANY and connected to a loopback address,
+  // getsockname() has to return the loopback address.
+  if (GetParam() == AddressFamily::kIpv4) {
+    auto addr_out = reinterpret_cast<struct sockaddr_in*>(addr);
+    EXPECT_EQ(addrlen, sizeof(*addr_out));
+    EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_LOOPBACK));
+  } else {
+    auto addr_out = reinterpret_cast<struct sockaddr_in6*>(addr);
+    struct in6_addr loopback = IN6ADDR_LOOPBACK_INIT;
+    EXPECT_EQ(addrlen, sizeof(*addr_out));
+    EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0);
+  }
+}
+
+TEST_P(UdpSocketTest, DisconnectAfterBindToAny) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  struct sockaddr_storage any_storage = InetAnyAddr();
+  struct sockaddr* any = reinterpret_cast<struct sockaddr*>(&any_storage);
+  SetPort(&any_storage, *Port(&bind_addr_storage_) + 1);
+
+  ASSERT_NO_ERRNO(BindSocket(sock_.get(), any));
+
+  // Connect the socket.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  Disconnect(sock_.get());
+
+  // Check that we're still bound.
+  struct sockaddr_storage addr;
+  socklen_t addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallSucceeds());
+
+  EXPECT_EQ(addrlen, addrlen_);
+  EXPECT_EQ(memcmp(&addr, any, addrlen), 0);
+
+  addrlen = sizeof(addr);
+  EXPECT_THAT(
+      getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+      SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, Disconnect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  struct sockaddr_storage any_storage = InetAnyAddr();
+  struct sockaddr* any = reinterpret_cast<struct sockaddr*>(&any_storage);
+  SetPort(&any_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_NO_ERRNO(BindSocket(sock_.get(), any));
+
+  for (int i = 0; i < 2; i++) {
+    // Try to connect again.
+    EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+    // Check that we're connected to the right peer.
+    struct sockaddr_storage peer;
+    socklen_t peerlen = sizeof(peer);
+    EXPECT_THAT(
+        getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
+        SyscallSucceeds());
+    EXPECT_EQ(peerlen, addrlen_);
+    EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0);
+
+    // Try to disconnect.
+    struct sockaddr_storage addr = {};
+    addr.ss_family = AF_UNSPEC;
+    EXPECT_THAT(connect(sock_.get(), reinterpret_cast<sockaddr*>(&addr),
+                        sizeof(addr.ss_family)),
+                SyscallSucceeds());
+
+    peerlen = sizeof(peer);
+    EXPECT_THAT(
+        getpeername(sock_.get(), reinterpret_cast<sockaddr*>(&peer), &peerlen),
+        SyscallFailsWithErrno(ENOTCONN));
+
+    // Check that we're still bound.
+    socklen_t addrlen = sizeof(addr);
+    EXPECT_THAT(
+        getsockname(sock_.get(), reinterpret_cast<sockaddr*>(&addr), &addrlen),
+        SyscallSucceeds());
+    EXPECT_EQ(addrlen, addrlen_);
+    EXPECT_EQ(*Port(&addr), *Port(&any_storage));
+  }
+}
+
+TEST_P(UdpSocketTest, ConnectBadAddress) {
+  struct sockaddr addr = {};
+  addr.sa_family = GetFamily();
+  ASSERT_THAT(connect(sock_.get(), &addr, sizeof(addr.sa_family)),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  struct sockaddr_storage addr_storage = InetAnyAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Send to a different destination than we're connected to.
+  char buf[512];
+  EXPECT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, addr, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST_P(UdpSocketTest, ZerolengthWriteAllowed) {
+  // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  // Connect to loopback:bind_addr_+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:bind_addr_+1.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+
+  char buf[3];
+  // Send zero length packet from bind_ to sock_.
+  ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0));
+
+  struct pollfd pfd = {sock_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout*/ 1000),
+              SyscallSucceedsWithValue(1));
+
+  // Receive the packet.
+  char received[3];
+  EXPECT_THAT(read(sock_.get(), received, sizeof(received)),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) {
+  // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:bind_addr_port+1.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Set sock to non-blocking.
+  int opts = 0;
+  ASSERT_THAT(opts = fcntl(sock_.get(), F_GETFL), SyscallSucceeds());
+  ASSERT_THAT(fcntl(sock_.get(), F_SETFL, opts | O_NONBLOCK),
+              SyscallSucceeds());
+
+  char buf[3];
+  // Send zero length packet from bind_ to sock_.
+  ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0));
+
+  struct pollfd pfd = {sock_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // Receive the packet.
+  char received[3];
+  EXPECT_THAT(read(sock_.get(), received, sizeof(received)),
+              SyscallSucceedsWithValue(0));
+  EXPECT_THAT(read(sock_.get(), received, sizeof(received)),
+              SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(UdpSocketTest, SendAndReceiveNotConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Send some data to bind_.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Receive the data.
+  char received[sizeof(buf)];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, SendAndReceiveConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:TestPort+1.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Send some data from sock to bind_.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Receive the data.
+  char received[sizeof(buf)];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, ReceiveFromNotConnected) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:bind_addr_port+2.
+  struct sockaddr_storage addr2_storage = InetLoopbackAddr();
+  struct sockaddr* addr2 = reinterpret_cast<struct sockaddr*>(&addr2_storage);
+  SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2);
+  ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds());
+
+  // Send some data from sock to bind_.
+  char buf[512];
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Check that the data isn't received because it was sent from a different
+  // address than we're connected.
+  EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, ReceiveBeforeConnect) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Bind sock to loopback:bind_addr_port+2.
+  struct sockaddr_storage addr2_storage = InetLoopbackAddr();
+  struct sockaddr* addr2 = reinterpret_cast<struct sockaddr*>(&addr2_storage);
+  SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2);
+  ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds());
+
+  // Send some data from sock to bind_.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Connect to loopback:TestPort+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Receive the data. It works because it was sent before the connect.
+  char received[sizeof(buf)];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+
+  // Send again. This time it should not be received.
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  EXPECT_THAT(recv(bind_.get(), buf, sizeof(buf), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, ReceiveFrom) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind sock to loopback:TestPort+1.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Send some data from sock to bind_.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  // Receive the data and sender address.
+  char received[sizeof(buf)];
+  struct sockaddr_storage addr2;
+  socklen_t addr2len = sizeof(addr2);
+  EXPECT_THAT(recvfrom(bind_.get(), received, sizeof(received), 0,
+                       reinterpret_cast<sockaddr*>(&addr2), &addr2len),
+              SyscallSucceedsWithValue(sizeof(received)));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+  EXPECT_EQ(addr2len, addrlen_);
+  EXPECT_EQ(memcmp(addr, &addr2, addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, Listen) {
+  ASSERT_THAT(listen(sock_.get(), SOMAXCONN),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+TEST_P(UdpSocketTest, Accept) {
+  ASSERT_THAT(accept(sock_.get(), nullptr, nullptr),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+// This test validates that a read shutdown with pending data allows the read
+// to proceed with the data before returning EAGAIN.
+TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  // Bind to loopback:bind_addr_port+1 and connect to bind_addr_.
+  ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Verify that we get EWOULDBLOCK when there is nothing to read.
+  char received[512];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  const char* buf = "abc";
+  EXPECT_THAT(write(sock_.get(), buf, 3), SyscallSucceedsWithValue(3));
+
+  int opts = 0;
+  ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds());
+  ASSERT_THAT(fcntl(bind_.get(), F_SETFL, opts | O_NONBLOCK),
+              SyscallSucceeds());
+  ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds());
+  ASSERT_NE(opts & O_NONBLOCK, 0);
+
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds());
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // We should get the data even though read has been shutdown.
+  EXPECT_THAT(recv(bind_.get(), received, 2, 0), SyscallSucceedsWithValue(2));
+
+  // Because we read less than the entire packet length, since it's a packet
+  // based socket any subsequent reads should return EWOULDBLOCK.
+  EXPECT_THAT(recv(bind_.get(), received, 1, 0),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+// This test is validating that even after a socket is shutdown if it's
+// reconnected it will reset the shutdown state.
+TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) {
+  char received[512];
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
+
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Connect the socket, then try to shutdown again.
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Connect to loopback:bind_addr_port+1.
+  struct sockaddr_storage addr_storage = InetLoopbackAddr();
+  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
+  SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1);
+  ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds());
+
+  EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, ReadShutdown) {
+  // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without
+  // MSG_DONTWAIT blocks indefinitely.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  char received[512];
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
+
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Connect the socket, then try to shutdown again.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds());
+
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UdpSocketTest, ReadShutdownDifferentThread) {
+  // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without
+  // MSG_DONTWAIT blocks indefinitely.
+  SKIP_IF(IsRunningWithHostinet());
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  char received[512];
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Connect the socket, then shutdown from another thread.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT),
+              SyscallFailsWithErrno(EWOULDBLOCK));
+
+  ScopedThread t([&] {
+    absl::SleepFor(absl::Milliseconds(200));
+    EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds());
+  });
+  EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(0));
+  t.Join();
+
+  EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UdpSocketTest, WriteShutdown) {
+  ASSERT_NO_ERRNO(BindLoopback());
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallSucceeds());
+}
+
+TEST_P(UdpSocketTest, SynchronousReceive) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Send some data to bind_ from another thread.
+  char buf[512];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  // Receive the data prior to actually starting the other thread.
+  char received[512];
+  EXPECT_THAT(
+      RetryEINTR(recv)(bind_.get(), received, sizeof(received), MSG_DONTWAIT),
+      SyscallFailsWithErrno(EWOULDBLOCK));
+
+  // Start the thread.
+  ScopedThread t([&] {
+    absl::SleepFor(absl::Milliseconds(200));
+    ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, this->bind_addr_,
+                       this->addrlen_),
+                SyscallSucceedsWithValue(sizeof(buf)));
+  });
+
+  EXPECT_THAT(RetryEINTR(recv)(bind_.get(), received, sizeof(received), 0),
+              SyscallSucceedsWithValue(512));
+  EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, BoundaryPreserved_SendRecv) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Send 3 packets from sock to bind_.
+  constexpr int psize = 100;
+  char buf[3 * psize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  for (int i = 0; i < 3; ++i) {
+    ASSERT_THAT(
+        sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(psize));
+  }
+
+  // Receive the data as 3 separate packets.
+  char received[6 * psize];
+  for (int i = 0; i < 3; ++i) {
+    EXPECT_THAT(recv(bind_.get(), received + i * psize, 3 * psize, 0),
+                SyscallSucceedsWithValue(psize));
+  }
+  EXPECT_EQ(memcmp(buf, received, 3 * psize), 0);
+}
+
+TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Direct writes from sock to bind_.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Send 2 packets from sock to bind_, where each packet's data consists of
+  // 2 discontiguous iovecs.
+  constexpr size_t kPieceSize = 100;
+  char buf[4 * kPieceSize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  for (int i = 0; i < 2; i++) {
+    struct iovec iov[2];
+    for (int j = 0; j < 2; j++) {
+      iov[j].iov_base = reinterpret_cast<void*>(
+          reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize);
+      iov[j].iov_len = kPieceSize;
+    }
+    ASSERT_THAT(writev(sock_.get(), iov, 2),
+                SyscallSucceedsWithValue(2 * kPieceSize));
+  }
+
+  // Receive the data as 2 separate packets.
+  char received[6 * kPieceSize];
+  for (int i = 0; i < 2; i++) {
+    struct iovec iov[3];
+    for (int j = 0; j < 3; j++) {
+      iov[j].iov_base = reinterpret_cast<void*>(
+          reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize);
+      iov[j].iov_len = kPieceSize;
+    }
+    ASSERT_THAT(readv(bind_.get(), iov, 3),
+                SyscallSucceedsWithValue(2 * kPieceSize));
+  }
+  EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0);
+}
+
+TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Send 2 packets from sock to bind_, where each packet's data consists of
+  // 2 discontiguous iovecs.
+  constexpr size_t kPieceSize = 100;
+  char buf[4 * kPieceSize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  for (int i = 0; i < 2; i++) {
+    struct iovec iov[2];
+    for (int j = 0; j < 2; j++) {
+      iov[j].iov_base = reinterpret_cast<void*>(
+          reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize);
+      iov[j].iov_len = kPieceSize;
+    }
+    struct msghdr msg = {};
+    msg.msg_name = bind_addr_;
+    msg.msg_namelen = addrlen_;
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 2;
+    ASSERT_THAT(sendmsg(sock_.get(), &msg, 0),
+                SyscallSucceedsWithValue(2 * kPieceSize));
+  }
+
+  // Receive the data as 2 separate packets.
+  char received[6 * kPieceSize];
+  for (int i = 0; i < 2; i++) {
+    struct iovec iov[3];
+    for (int j = 0; j < 3; j++) {
+      iov[j].iov_base = reinterpret_cast<void*>(
+          reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize);
+      iov[j].iov_len = kPieceSize;
+    }
+    struct msghdr msg = {};
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 3;
+    ASSERT_THAT(recvmsg(bind_.get(), &msg, 0),
+                SyscallSucceedsWithValue(2 * kPieceSize));
+  }
+  EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0);
+}
+
+TEST_P(UdpSocketTest, FIONREADShutdown) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  int n = -1;
+  EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  // A UDP socket must be connected before it can be shutdown.
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+}
+
+TEST_P(UdpSocketTest, FIONREADWriteShutdown) {
+  int n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // A UDP socket must be connected before it can be shutdown.
+  ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  const char str[] = "abc";
+  ASSERT_THAT(send(bind_.get(), str, sizeof(str), 0),
+              SyscallSucceedsWithValue(sizeof(str)));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, sizeof(str));
+
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, sizeof(str));
+}
+
+// NOTE: Do not use `FIONREAD` as test name because it will be replaced by the
+// corresponding macro and become `0x541B`.
+TEST_P(UdpSocketTest, Fionread) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Check that the bound socket with an empty buffer reports an empty first
+  // packet.
+  int n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  // Send 3 packets from sock to bind_.
+  constexpr int psize = 100;
+  char buf[3 * psize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  for (int i = 0; i < 3; ++i) {
+    ASSERT_THAT(
+        sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(psize));
+
+    ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+                SyscallSucceedsWithValue(1));
+
+    // Check that regardless of how many packets are in the queue, the size
+    // reported is that of a single packet.
+    n = -1;
+    EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+    EXPECT_EQ(n, psize);
+  }
+}
+
+TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) {
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // Check that the bound socket with an empty buffer reports an empty first
+  // packet.
+  int n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  // Send 3 packets from sock to bind_.
+  constexpr int psize = 100;
+  char buf[3 * psize];
+  RandomizeBuffer(buf, sizeof(buf));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  for (int i = 0; i < 3; ++i) {
+    ASSERT_THAT(
+        sendto(sock_.get(), buf + i * psize, 0, 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(0));
+
+    // TODO(gvisor.dev/issue/2726): sending a zero-length message to a hostinet
+    // socket does not cause a poll event to be triggered.
+    if (!IsRunningWithHostinet()) {
+      ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+                  SyscallSucceedsWithValue(1));
+    }
+
+    // Check that regardless of how many packets are in the queue, the size
+    // reported is that of a single packet.
+    n = -1;
+    EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+    EXPECT_EQ(n, 0);
+  }
+}
+
+TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) {
+  int n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  // A UDP socket must be connected before it can be shutdown.
+  ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  const char str[] = "abc";
+  ASSERT_THAT(send(bind_.get(), str, 0, 0), SyscallSucceedsWithValue(0));
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds());
+
+  n = -1;
+  EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+  EXPECT_EQ(n, 0);
+}
+
+TEST_P(UdpSocketTest, SoNoCheckOffByDefault) {
+  // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by
+  // hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  int v = -1;
+  socklen_t optlen = sizeof(v);
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen),
+              SyscallSucceeds());
+  ASSERT_EQ(v, kSockOptOff);
+  ASSERT_EQ(optlen, sizeof(v));
+}
+
+TEST_P(UdpSocketTest, SoNoCheck) {
+  // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by
+  // hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  int v = kSockOptOn;
+  socklen_t optlen = sizeof(v);
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen),
+              SyscallSucceeds());
+  v = -1;
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen),
+              SyscallSucceeds());
+  ASSERT_EQ(v, kSockOptOn);
+  ASSERT_EQ(optlen, sizeof(v));
+
+  v = kSockOptOff;
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen),
+              SyscallSucceeds());
+  v = -1;
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen),
+              SyscallSucceeds());
+  ASSERT_EQ(v, kSockOptOff);
+  ASSERT_EQ(optlen, sizeof(v));
+}
+
+TEST_P(UdpSocketTest, SoTimestampOffByDefault) {
+  // TODO(gvisor.dev/issue/1202): SO_TIMESTAMP socket option not supported by
+  // hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  int v = -1;
+  socklen_t optlen = sizeof(v);
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, &optlen),
+              SyscallSucceeds());
+  ASSERT_EQ(v, kSockOptOff);
+  ASSERT_EQ(optlen, sizeof(v));
+}
+
+TEST_P(UdpSocketTest, SoTimestamp) {
+  // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not
+  // supported by hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  int v = 1;
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)),
+              SyscallSucceeds());
+
+  char buf[3];
+  // Send zero length packet from sock to bind_.
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0),
+              SyscallSucceedsWithValue(0));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))];
+  msghdr msg;
+  memset(&msg, 0, sizeof(msg));
+  iovec iov;
+  memset(&iov, 0, sizeof(iov));
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  msg.msg_control = cmsgbuf;
+  msg.msg_controllen = sizeof(cmsgbuf);
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0),
+              SyscallSucceedsWithValue(0));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP);
+  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval)));
+
+  struct timeval tv = {};
+  memcpy(&tv, CMSG_DATA(cmsg), sizeof(struct timeval));
+
+  ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
+
+  // There should be nothing to get via ioctl.
+  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_P(UdpSocketTest, WriteShutdownNotConnected) {
+  EXPECT_THAT(shutdown(bind_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, TimestampIoctl) {
+  // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  char buf[3];
+  // Send packet from sock to bind_.
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // There should be no control messages.
+  char recv_buf[sizeof(buf)];
+  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf)));
+
+  // A nonzero timeval should be available via ioctl.
+  struct timeval tv = {};
+  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds());
+  ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
+}
+
+TEST_P(UdpSocketTest, TimestampIoctlNothingRead) {
+  // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  struct timeval tv = {};
+  ASSERT_THAT(ioctl(sock_.get(), SIOCGSTAMP, &tv),
+              SyscallFailsWithErrno(ENOENT));
+}
+
+// Test that the timestamp accessed via SIOCGSTAMP is still accessible after
+// SO_TIMESTAMP is enabled and used to retrieve a timestamp.
+TEST_P(UdpSocketTest, TimestampIoctlPersistence) {
+  // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not
+  // supported by hostinet.
+  SKIP_IF(IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  char buf[3];
+  // Send packet from sock to bind_.
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)),
+              SyscallSucceedsWithValue(sizeof(buf)));
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0),
+              SyscallSucceedsWithValue(0));
+
+  struct pollfd pfd = {bind_.get(), POLLIN, 0};
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // There should be no control messages.
+  char recv_buf[sizeof(buf)];
+  ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf)));
+
+  // A nonzero timeval should be available via ioctl.
+  struct timeval tv = {};
+  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds());
+  ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
+
+  // Enable SO_TIMESTAMP and send a message.
+  int v = 1;
+  EXPECT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)),
+              SyscallSucceeds());
+  ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0),
+              SyscallSucceedsWithValue(0));
+
+  ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000),
+              SyscallSucceedsWithValue(1));
+
+  // There should be a message for SO_TIMESTAMP.
+  char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))];
+  msghdr msg = {};
+  iovec iov = {};
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  msg.msg_control = cmsgbuf;
+  msg.msg_controllen = sizeof(cmsgbuf);
+  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0),
+              SyscallSucceedsWithValue(0));
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+
+  // The ioctl should return the exact same values as before.
+  struct timeval tv2 = {};
+  ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv2), SyscallSucceeds());
+  ASSERT_EQ(tv.tv_sec, tv2.tv_sec);
+  ASSERT_EQ(tv.tv_usec, tv2.tv_usec);
+}
+
+// Test that a socket with IP_TOS or IPV6_TCLASS set will set the TOS byte on
+// outgoing packets, and that a receiving socket with IP_RECVTOS or
+// IPV6_RECVTCLASS will create the corresponding control message.
+TEST_P(UdpSocketTest, SetAndReceiveTOS) {
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Allow socket to receive control message.
+  int recv_level = SOL_IP;
+  int recv_type = IP_RECVTOS;
+  if (GetParam() != AddressFamily::kIpv4) {
+    recv_level = SOL_IPV6;
+    recv_type = IPV6_RECVTCLASS;
+  }
+  ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &kSockOptOn,
+                         sizeof(kSockOptOn)),
+              SyscallSucceeds());
+
+  // Set socket TOS.
+  int sent_level = recv_level;
+  int sent_type = IP_TOS;
+  if (sent_level == SOL_IPV6) {
+    sent_type = IPV6_TCLASS;
+  }
+  int sent_tos = IPTOS_LOWDELAY;  // Choose some TOS value.
+  ASSERT_THAT(setsockopt(sock_.get(), sent_level, sent_type, &sent_tos,
+                         sizeof(sent_tos)),
+              SyscallSucceeds());
+
+  // Prepare message to send.
+  constexpr size_t kDataLength = 1024;
+  struct msghdr sent_msg = {};
+  struct iovec sent_iov = {};
+  char sent_data[kDataLength];
+  sent_iov.iov_base = &sent_data[0];
+  sent_iov.iov_len = kDataLength;
+  sent_msg.msg_iov = &sent_iov;
+  sent_msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  // Receive message.
+  struct msghdr received_msg = {};
+  struct iovec received_iov = {};
+  char received_data[kDataLength];
+  received_iov.iov_base = &received_data[0];
+  received_iov.iov_len = kDataLength;
+  received_msg.msg_iov = &received_iov;
+  received_msg.msg_iovlen = 1;
+  size_t cmsg_data_len = sizeof(int8_t);
+  if (sent_type == IPV6_TCLASS) {
+    cmsg_data_len = sizeof(int);
+  }
+  std::vector<char> received_cmsgbuf(CMSG_SPACE(cmsg_data_len));
+  received_msg.msg_control = &received_cmsgbuf[0];
+  received_msg.msg_controllen = received_cmsgbuf.size();
+  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len));
+  EXPECT_EQ(cmsg->cmsg_level, sent_level);
+  EXPECT_EQ(cmsg->cmsg_type, sent_type);
+  int8_t received_tos = 0;
+  memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos));
+  EXPECT_EQ(received_tos, sent_tos);
+}
+
+// Test that sendmsg with IP_TOS and IPV6_TCLASS control messages will set the
+// TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or
+// IPV6_RECVTCLASS will create the corresponding control message.
+TEST_P(UdpSocketTest, SendAndReceiveTOS) {
+  // TODO(b/146661005): Setting TOS via cmsg not supported for netstack.
+  SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet());
+
+  ASSERT_NO_ERRNO(BindLoopback());
+  ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds());
+
+  // Allow socket to receive control message.
+  int recv_level = SOL_IP;
+  int recv_type = IP_RECVTOS;
+  if (GetParam() != AddressFamily::kIpv4) {
+    recv_level = SOL_IPV6;
+    recv_type = IPV6_RECVTCLASS;
+  }
+  int recv_opt = kSockOptOn;
+  ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &recv_opt,
+                         sizeof(recv_opt)),
+              SyscallSucceeds());
+
+  // Prepare message to send.
+  constexpr size_t kDataLength = 1024;
+  int sent_level = recv_level;
+  int sent_type = IP_TOS;
+  int sent_tos = IPTOS_LOWDELAY;  // Choose some TOS value.
+
+  struct msghdr sent_msg = {};
+  struct iovec sent_iov = {};
+  char sent_data[kDataLength];
+  sent_iov.iov_base = &sent_data[0];
+  sent_iov.iov_len = kDataLength;
+  sent_msg.msg_iov = &sent_iov;
+  sent_msg.msg_iovlen = 1;
+  size_t cmsg_data_len = sizeof(int8_t);
+  if (sent_level == SOL_IPV6) {
+    sent_type = IPV6_TCLASS;
+    cmsg_data_len = sizeof(int);
+  }
+  std::vector<char> sent_cmsgbuf(CMSG_SPACE(cmsg_data_len));
+  sent_msg.msg_control = &sent_cmsgbuf[0];
+  sent_msg.msg_controllen = CMSG_LEN(cmsg_data_len);
+
+  // Manually add control message.
+  struct cmsghdr* sent_cmsg = CMSG_FIRSTHDR(&sent_msg);
+  sent_cmsg->cmsg_len = CMSG_LEN(cmsg_data_len);
+  sent_cmsg->cmsg_level = sent_level;
+  sent_cmsg->cmsg_type = sent_type;
+  *(int8_t*)CMSG_DATA(sent_cmsg) = sent_tos;
+
+  ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  // Receive message.
+  struct msghdr received_msg = {};
+  struct iovec received_iov = {};
+  char received_data[kDataLength];
+  received_iov.iov_base = &received_data[0];
+  received_iov.iov_len = kDataLength;
+  received_msg.msg_iov = &received_iov;
+  received_msg.msg_iovlen = 1;
+  std::vector<char> received_cmsgbuf(CMSG_SPACE(cmsg_data_len));
+  received_msg.msg_control = &received_cmsgbuf[0];
+  received_msg.msg_controllen = CMSG_LEN(cmsg_data_len);
+  ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0),
+              SyscallSucceedsWithValue(kDataLength));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg);
+  ASSERT_NE(cmsg, nullptr);
+  EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len));
+  EXPECT_EQ(cmsg->cmsg_level, sent_level);
+  EXPECT_EQ(cmsg->cmsg_type, sent_type);
+  int8_t received_tos = 0;
+  memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos));
+  EXPECT_EQ(received_tos, sent_tos);
+}
+
+TEST_P(UdpSocketTest, RecvBufLimitsEmptyRcvBuf) {
+  // Discover minimum buffer size by setting it to zero.
+  constexpr int kRcvBufSz = 0;
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+                         sizeof(kRcvBufSz)),
+              SyscallSucceeds());
+
+  int min = 0;
+  socklen_t min_len = sizeof(min);
+  ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+              SyscallSucceeds());
+
+  // Bind bind_ to loopback.
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  {
+    // Send data of size min and verify that it's received.
+    std::vector<char> buf(min);
+    RandomizeBuffer(buf.data(), buf.size());
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    std::vector<char> received(buf.size());
+    EXPECT_THAT(
+        recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
+        SyscallSucceedsWithValue(received.size()));
+  }
+
+  {
+    // Send data of size min + 1 and verify that its received. Both linux and
+    // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
+    // is currently empty.
+    std::vector<char> buf(min + 1);
+    RandomizeBuffer(buf.data(), buf.size());
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+
+    std::vector<char> received(buf.size());
+    EXPECT_THAT(
+        recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
+        SyscallSucceedsWithValue(received.size()));
+  }
+}
+
+// Test that receive buffer limits are enforced.
+TEST_P(UdpSocketTest, RecvBufLimits) {
+  // Bind s_ to loopback.
+  ASSERT_NO_ERRNO(BindLoopback());
+
+  int min = 0;
+  {
+    // Discover minimum buffer size by trying to set it to zero.
+    constexpr int kRcvBufSz = 0;
+    ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+                           sizeof(kRcvBufSz)),
+                SyscallSucceeds());
+
+    socklen_t min_len = sizeof(min);
+    ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+                SyscallSucceeds());
+  }
+
+  // Now set the limit to min * 4.
+  int new_rcv_buf_sz = min * 4;
+  if (!IsRunningOnGvisor() || IsRunningWithHostinet()) {
+    // Linux doubles the value specified so just set to min * 2.
+    new_rcv_buf_sz = min * 2;
+  }
+
+  ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
+                         sizeof(new_rcv_buf_sz)),
+              SyscallSucceeds());
+  int rcv_buf_sz = 0;
+  {
+    socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
+    ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
+                           &rcv_buf_len),
+                SyscallSucceeds());
+  }
+
+  {
+    std::vector<char> buf(min);
+    RandomizeBuffer(buf.data(), buf.size());
+
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    ASSERT_THAT(
+        sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+        SyscallSucceedsWithValue(buf.size()));
+    int sent = 4;
+    if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
+      // Linux seems to drop the 4th packet even though technically it should
+      // fit in the receive buffer.
+      ASSERT_THAT(
+          sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_),
+          SyscallSucceedsWithValue(buf.size()));
+      sent++;
+    }
+
+    for (int i = 0; i < sent - 1; i++) {
+      // Receive the data.
+      std::vector<char> received(buf.size());
+      EXPECT_THAT(
+          recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
+          SyscallSucceedsWithValue(received.size()));
+      EXPECT_EQ(memcmp(buf.data(), received.data(), buf.size()), 0);
+    }
+
+    // The last receive should fail with EAGAIN as the last packet should have
+    // been dropped due to lack of space in the receive buffer.
+    std::vector<char> received(buf.size());
+    EXPECT_THAT(
+        recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT),
+        SyscallFailsWithErrno(EAGAIN));
+  }
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/udp_socket_test_cases.h b/test/syscalls/linux/udp_socket_test_cases.h
new file mode 100644
index 000000000..f7e25c805
--- /dev/null
+++ b/test/syscalls/linux/udp_socket_test_cases.h
@@ -0,0 +1,82 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_
+#define THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_
+
+#include <sys/socket.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+
+namespace gvisor {
+namespace testing {
+
+// The initial port to be be used on gvisor.
+constexpr int TestPort = 40000;
+
+// Fixture for tests parameterized by the address family to use (AF_INET and
+// AF_INET6) when creating sockets.
+class UdpSocketTest
+    : public ::testing::TestWithParam<gvisor::testing::AddressFamily> {
+ protected:
+  // Creates two sockets that will be used by test cases.
+  void SetUp() override;
+
+  // Binds the socket bind_ to the loopback and updates bind_addr_.
+  PosixError BindLoopback();
+
+  // Binds the socket bind_ to Any and updates bind_addr_.
+  PosixError BindAny();
+
+  // Binds given socket to address addr and updates.
+  PosixError BindSocket(int socket, struct sockaddr* addr);
+
+  // Return initialized Any address to port 0.
+  struct sockaddr_storage InetAnyAddr();
+
+  // Return initialized Loopback address to port 0.
+  struct sockaddr_storage InetLoopbackAddr();
+
+  // Disconnects socket sockfd.
+  void Disconnect(int sockfd);
+
+  // Get family for the test.
+  int GetFamily();
+
+  // Socket used by Bind methods
+  FileDescriptor bind_;
+
+  // Second socket used for tests.
+  FileDescriptor sock_;
+
+  // Address for bind_ socket.
+  struct sockaddr* bind_addr_;
+
+  // Initialized to the length based on GetFamily().
+  socklen_t addrlen_;
+
+  // Storage for bind_addr_.
+  struct sockaddr_storage bind_addr_storage_;
+
+ private:
+  // Helper to initialize addrlen_ for the test case.
+  socklen_t GetAddrLength();
+};
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_
diff --git a/test/syscalls/linux/uidgid.cc b/test/syscalls/linux/uidgid.cc
new file mode 100644
index 000000000..64d6d0b8f
--- /dev/null
+++ b/test/syscalls/linux/uidgid.cc
@@ -0,0 +1,276 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <grp.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "test/util/capability_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/uid_util.h"
+
+ABSL_FLAG(int32_t, scratch_uid1, 65534, "first scratch UID");
+ABSL_FLAG(int32_t, scratch_uid2, 65533, "second scratch UID");
+ABSL_FLAG(int32_t, scratch_gid1, 65534, "first scratch GID");
+ABSL_FLAG(int32_t, scratch_gid2, 65533, "second scratch GID");
+
+using ::testing::UnorderedElementsAreArray;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(UidGidTest, Getuid) {
+  uid_t ruid, euid, suid;
+  EXPECT_THAT(getresuid(&ruid, &euid, &suid), SyscallSucceeds());
+  EXPECT_THAT(getuid(), SyscallSucceedsWithValue(ruid));
+  EXPECT_THAT(geteuid(), SyscallSucceedsWithValue(euid));
+}
+
+TEST(UidGidTest, Getgid) {
+  gid_t rgid, egid, sgid;
+  EXPECT_THAT(getresgid(&rgid, &egid, &sgid), SyscallSucceeds());
+  EXPECT_THAT(getgid(), SyscallSucceedsWithValue(rgid));
+  EXPECT_THAT(getegid(), SyscallSucceedsWithValue(egid));
+}
+
+TEST(UidGidTest, Getgroups) {
+  // "If size is zero, list is not modified, but the total number of
+  // supplementary group IDs for the process is returned." - getgroups(2)
+  int nr_groups;
+  ASSERT_THAT(nr_groups = getgroups(0, nullptr), SyscallSucceeds());
+  std::vector<gid_t> list(nr_groups);
+  EXPECT_THAT(getgroups(list.size(), list.data()), SyscallSucceeds());
+
+  // "EINVAL: size is less than the number of supplementary group IDs, but is
+  // not zero."
+  EXPECT_THAT(getgroups(-1, nullptr), SyscallFailsWithErrno(EINVAL));
+
+  // Testing for EFAULT requires actually having groups, which isn't guaranteed
+  // here; see the setgroups test below.
+}
+
+// Checks that the calling process' real/effective/saved user IDs are
+// ruid/euid/suid respectively.
+PosixError CheckUIDs(uid_t ruid, uid_t euid, uid_t suid) {
+  uid_t actual_ruid, actual_euid, actual_suid;
+  int rc = getresuid(&actual_ruid, &actual_euid, &actual_suid);
+  MaybeSave();
+  if (rc < 0) {
+    return PosixError(errno, "getresuid");
+  }
+  if (ruid != actual_ruid || euid != actual_euid || suid != actual_suid) {
+    return PosixError(
+        EPERM, absl::StrCat(
+                   "incorrect user IDs: got (",
+                   absl::StrJoin({actual_ruid, actual_euid, actual_suid}, ", "),
+                   ", wanted (", absl::StrJoin({ruid, euid, suid}, ", "), ")"));
+  }
+  return NoError();
+}
+
+PosixError CheckGIDs(gid_t rgid, gid_t egid, gid_t sgid) {
+  gid_t actual_rgid, actual_egid, actual_sgid;
+  int rc = getresgid(&actual_rgid, &actual_egid, &actual_sgid);
+  MaybeSave();
+  if (rc < 0) {
+    return PosixError(errno, "getresgid");
+  }
+  if (rgid != actual_rgid || egid != actual_egid || sgid != actual_sgid) {
+    return PosixError(
+        EPERM, absl::StrCat(
+                   "incorrect group IDs: got (",
+                   absl::StrJoin({actual_rgid, actual_egid, actual_sgid}, ", "),
+                   ", wanted (", absl::StrJoin({rgid, egid, sgid}, ", "), ")"));
+  }
+  return NoError();
+}
+
+// N.B. These tests may break horribly unless run via a gVisor test runner,
+// because changing UID in one test may forfeit permissions required by other
+// tests. (The test runner runs each test in a separate process.)
+
+TEST(UidGidRootTest, Setuid) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+  // Do setuid in a separate thread so that after finishing this test, the
+  // process can still open files the test harness created before starting this
+  // test. Otherwise, the files are created by root (UID before the test), but
+  // cannot be opened by the `uid` set below after the test. After calling
+  // setuid(non-zero-UID), there is no way to get root privileges back.
+  ScopedThread([&] {
+    // Use syscall instead of glibc setuid wrapper because we want this setuid
+    // call to only apply to this task. POSIX threads, however, require that all
+    // threads have the same UIDs, so using the setuid wrapper sets all threads'
+    // real UID.
+    EXPECT_THAT(syscall(SYS_setuid, -1), SyscallFailsWithErrno(EINVAL));
+
+    const uid_t uid = absl::GetFlag(FLAGS_scratch_uid1);
+    EXPECT_THAT(syscall(SYS_setuid, uid), SyscallSucceeds());
+    // "If the effective UID of the caller is root (more precisely: if the
+    // caller has the CAP_SETUID capability), the real UID and saved set-user-ID
+    // are also set." - setuid(2)
+    EXPECT_NO_ERRNO(CheckUIDs(uid, uid, uid));
+  });
+}
+
+TEST(UidGidRootTest, Setgid) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+  EXPECT_THAT(setgid(-1), SyscallFailsWithErrno(EINVAL));
+
+  const gid_t gid = absl::GetFlag(FLAGS_scratch_gid1);
+  ASSERT_THAT(setgid(gid), SyscallSucceeds());
+  EXPECT_NO_ERRNO(CheckGIDs(gid, gid, gid));
+}
+
+TEST(UidGidRootTest, SetgidNotFromThreadGroupLeader) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+  const gid_t gid = absl::GetFlag(FLAGS_scratch_gid1);
+  // NOTE(b/64676707): Do setgid in a separate thread so that we can test if
+  // info.si_pid is set correctly.
+  ScopedThread([gid] { ASSERT_THAT(setgid(gid), SyscallSucceeds()); });
+  EXPECT_NO_ERRNO(CheckGIDs(gid, gid, gid));
+}
+
+TEST(UidGidRootTest, Setreuid) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+  // "Supplying a value of -1 for either the real or effective user ID forces
+  // the system to leave that ID unchanged." - setreuid(2)
+  EXPECT_THAT(setreuid(-1, -1), SyscallSucceeds());
+  EXPECT_NO_ERRNO(CheckUIDs(0, 0, 0));
+
+  // Do setuid in a separate thread so that after finishing this test, the
+  // process can still open files the test harness created before starting this
+  // test. Otherwise, the files are created by root (UID before the test), but
+  // cannot be opened by the `uid` set below after the test. After calling
+  // setuid(non-zero-UID), there is no way to get root privileges back.
+  ScopedThread([&] {
+    const uid_t ruid = absl::GetFlag(FLAGS_scratch_uid1);
+    const uid_t euid = absl::GetFlag(FLAGS_scratch_uid2);
+
+    // Use syscall instead of glibc setuid wrapper because we want this setuid
+    // call to only apply to this task. posix threads, however, require that all
+    // threads have the same UIDs, so using the setuid wrapper sets all threads'
+    // real UID.
+    EXPECT_THAT(syscall(SYS_setreuid, ruid, euid), SyscallSucceeds());
+
+    // "If the real user ID is set or the effective user ID is set to a value
+    // not equal to the previous real user ID, the saved set-user-ID will be set
+    // to the new effective user ID." - setreuid(2)
+    EXPECT_NO_ERRNO(CheckUIDs(ruid, euid, euid));
+  });
+}
+
+TEST(UidGidRootTest, Setregid) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+  EXPECT_THAT(setregid(-1, -1), SyscallSucceeds());
+  EXPECT_NO_ERRNO(CheckGIDs(0, 0, 0));
+
+  const gid_t rgid = absl::GetFlag(FLAGS_scratch_gid1);
+  const gid_t egid = absl::GetFlag(FLAGS_scratch_gid2);
+  ASSERT_THAT(setregid(rgid, egid), SyscallSucceeds());
+  EXPECT_NO_ERRNO(CheckGIDs(rgid, egid, egid));
+}
+
+TEST(UidGidRootTest, Setresuid) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+  // "If one of the arguments equals -1, the corresponding value is not
+  // changed." - setresuid(2)
+  EXPECT_THAT(setresuid(-1, -1, -1), SyscallSucceeds());
+  EXPECT_NO_ERRNO(CheckUIDs(0, 0, 0));
+
+  // Do setuid in a separate thread so that after finishing this test, the
+  // process can still open files the test harness created before starting this
+  // test. Otherwise, the files are created by root (UID before the test), but
+  // cannot be opened by the `uid` set below after the test. After calling
+  // setuid(non-zero-UID), there is no way to get root privileges back.
+  ScopedThread([&] {
+    const uid_t ruid = 12345;
+    const uid_t euid = 23456;
+    const uid_t suid = 34567;
+
+    // Use syscall instead of glibc setuid wrapper because we want this setuid
+    // call to only apply to this task. posix threads, however, require that all
+    // threads have the same UIDs, so using the setuid wrapper sets all threads'
+    // real UID.
+    EXPECT_THAT(syscall(SYS_setresuid, ruid, euid, suid), SyscallSucceeds());
+    EXPECT_NO_ERRNO(CheckUIDs(ruid, euid, suid));
+  });
+}
+
+TEST(UidGidRootTest, Setresgid) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+  EXPECT_THAT(setresgid(-1, -1, -1), SyscallSucceeds());
+  EXPECT_NO_ERRNO(CheckGIDs(0, 0, 0));
+
+  const gid_t rgid = 12345;
+  const gid_t egid = 23456;
+  const gid_t sgid = 34567;
+  ASSERT_THAT(setresgid(rgid, egid, sgid), SyscallSucceeds());
+  EXPECT_NO_ERRNO(CheckGIDs(rgid, egid, sgid));
+}
+
+TEST(UidGidRootTest, Setgroups) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+  std::vector<gid_t> list = {123, 500};
+  ASSERT_THAT(setgroups(list.size(), list.data()), SyscallSucceeds());
+  std::vector<gid_t> list2(list.size());
+  ASSERT_THAT(getgroups(list2.size(), list2.data()), SyscallSucceeds());
+  EXPECT_THAT(list, UnorderedElementsAreArray(list2));
+
+  // "EFAULT: list has an invalid address."
+  EXPECT_THAT(getgroups(100, reinterpret_cast<gid_t*>(-1)),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(UidGidRootTest, Setuid_prlimit) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+  // Do seteuid in a separate thread so that after finishing this test, the
+  // process can still open files the test harness created before starting this
+  // test. Otherwise, the files are created by root (UID before the test), but
+  // cannot be opened by the `uid` set below after the test.
+  ScopedThread([&] {
+    // Use syscall instead of glibc setuid wrapper because we want this seteuid
+    // call to only apply to this task. POSIX threads, however, require that all
+    // threads have the same UIDs, so using the seteuid wrapper sets all
+    // threads' UID.
+    EXPECT_THAT(syscall(SYS_setreuid, -1, 65534), SyscallSucceeds());
+
+    // Despite the UID change, we should be able to get our own limits.
+    struct rlimit rl = {};
+    EXPECT_THAT(prlimit(0, RLIMIT_NOFILE, NULL, &rl), SyscallSucceeds());
+  });
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/uname.cc b/test/syscalls/linux/uname.cc
new file mode 100644
index 000000000..d8824b171
--- /dev/null
+++ b/test/syscalls/linux/uname.cc
@@ -0,0 +1,111 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/util/capability_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(UnameTest, Sanity) {
+  struct utsname buf;
+  ASSERT_THAT(uname(&buf), SyscallSucceeds());
+  EXPECT_NE(strlen(buf.release), 0);
+  EXPECT_NE(strlen(buf.version), 0);
+  EXPECT_NE(strlen(buf.machine), 0);
+  EXPECT_NE(strlen(buf.sysname), 0);
+  EXPECT_NE(strlen(buf.nodename), 0);
+  EXPECT_NE(strlen(buf.domainname), 0);
+}
+
+TEST(UnameTest, SetNames) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  char hostname[65];
+  ASSERT_THAT(sethostname("0123456789", 3), SyscallSucceeds());
+  EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds());
+  EXPECT_EQ(absl::string_view(hostname), "012");
+
+  ASSERT_THAT(sethostname("0123456789\0xxx", 11), SyscallSucceeds());
+  EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds());
+  EXPECT_EQ(absl::string_view(hostname), "0123456789");
+
+  ASSERT_THAT(sethostname("0123456789\0xxx", 12), SyscallSucceeds());
+  EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds());
+  EXPECT_EQ(absl::string_view(hostname), "0123456789");
+
+  constexpr char kHostname[] = "wubbalubba";
+  ASSERT_THAT(sethostname(kHostname, sizeof(kHostname)), SyscallSucceeds());
+
+  constexpr char kDomainname[] = "dubdub.com";
+  ASSERT_THAT(setdomainname(kDomainname, sizeof(kDomainname)),
+              SyscallSucceeds());
+
+  struct utsname buf;
+  EXPECT_THAT(uname(&buf), SyscallSucceeds());
+  EXPECT_EQ(absl::string_view(buf.nodename), kHostname);
+  EXPECT_EQ(absl::string_view(buf.domainname), kDomainname);
+
+  // These should just be glibc wrappers that also call uname(2).
+  EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds());
+  EXPECT_EQ(absl::string_view(hostname), kHostname);
+
+  char domainname[65];
+  EXPECT_THAT(getdomainname(domainname, sizeof(domainname)), SyscallSucceeds());
+  EXPECT_EQ(absl::string_view(domainname), kDomainname);
+}
+
+TEST(UnameTest, UnprivilegedSetNames) {
+  if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) {
+    EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false));
+  }
+
+  EXPECT_THAT(sethostname("", 0), SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(setdomainname("", 0), SyscallFailsWithErrno(EPERM));
+}
+
+TEST(UnameTest, UnshareUTS) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  struct utsname init;
+  ASSERT_THAT(uname(&init), SyscallSucceeds());
+
+  ScopedThread([&]() {
+    EXPECT_THAT(unshare(CLONE_NEWUTS), SyscallSucceeds());
+
+    constexpr char kHostname[] = "wubbalubba";
+    EXPECT_THAT(sethostname(kHostname, sizeof(kHostname)), SyscallSucceeds());
+
+    char hostname[65];
+    EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds());
+  });
+
+  struct utsname after;
+  EXPECT_THAT(uname(&after), SyscallSucceeds());
+  EXPECT_EQ(absl::string_view(after.nodename), init.nodename);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/unix_domain_socket_test_util.cc b/test/syscalls/linux/unix_domain_socket_test_util.cc
new file mode 100644
index 000000000..b05ab2900
--- /dev/null
+++ b/test/syscalls/linux/unix_domain_socket_test_util.cc
@@ -0,0 +1,351 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+
+#include <sys/un.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::string DescribeUnixDomainSocketType(int type) {
+  const char* type_str = nullptr;
+  switch (type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC)) {
+    case SOCK_STREAM:
+      type_str = "SOCK_STREAM";
+      break;
+    case SOCK_DGRAM:
+      type_str = "SOCK_DGRAM";
+      break;
+    case SOCK_SEQPACKET:
+      type_str = "SOCK_SEQPACKET";
+      break;
+  }
+  if (!type_str) {
+    return absl::StrCat("Unix domain socket with unknown type ", type);
+  } else {
+    return absl::StrCat(((type & SOCK_NONBLOCK) != 0) ? "non-blocking " : "",
+                        ((type & SOCK_CLOEXEC) != 0) ? "close-on-exec " : "",
+                        type_str, " Unix domain socket");
+  }
+}
+
+SocketPairKind UnixDomainSocketPair(int type) {
+  return SocketPairKind{DescribeUnixDomainSocketType(type), AF_UNIX, type, 0,
+                        SyscallSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind FilesystemBoundUnixDomainSocketPair(int type) {
+  std::string description = absl::StrCat(DescribeUnixDomainSocketType(type),
+                                         " created with filesystem binding");
+  if ((type & SOCK_DGRAM) == SOCK_DGRAM) {
+    return SocketPairKind{
+        description, AF_UNIX, type, 0,
+        FilesystemBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)};
+  }
+  return SocketPairKind{
+      description, AF_UNIX, type, 0,
+      FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind AbstractBoundUnixDomainSocketPair(int type) {
+  std::string description =
+      absl::StrCat(DescribeUnixDomainSocketType(type),
+                   " created with abstract namespace binding");
+  if ((type & SOCK_DGRAM) == SOCK_DGRAM) {
+    return SocketPairKind{
+        description, AF_UNIX, type, 0,
+        AbstractBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)};
+  }
+  return SocketPairKind{description, AF_UNIX, type, 0,
+                        AbstractAcceptBindSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind SocketpairGoferUnixDomainSocketPair(int type) {
+  std::string description = absl::StrCat(DescribeUnixDomainSocketType(type),
+                                         " created with the socketpair gofer");
+  return SocketPairKind{description, AF_UNIX, type, 0,
+                        SocketpairGoferSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind SocketpairGoferFileSocketPair(int type) {
+  std::string description =
+      absl::StrCat(((type & O_NONBLOCK) != 0) ? "non-blocking " : "",
+                   ((type & O_CLOEXEC) != 0) ? "close-on-exec " : "",
+                   "file socket created with the socketpair gofer");
+  // The socketpair gofer always creates SOCK_STREAM sockets on open(2).
+  return SocketPairKind{description, AF_UNIX, SOCK_STREAM, 0,
+                        SocketpairGoferFileSocketPairCreator(type)};
+}
+
+SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type) {
+  return SocketPairKind{absl::StrCat(DescribeUnixDomainSocketType(type),
+                                     " unbound with a filesystem address"),
+                        AF_UNIX, type, 0,
+                        FilesystemUnboundSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind AbstractUnboundUnixDomainSocketPair(int type) {
+  return SocketPairKind{
+      absl::StrCat(DescribeUnixDomainSocketType(type),
+                   " unbound with an abstract namespace address"),
+      AF_UNIX, type, 0, AbstractUnboundSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+void SendSingleFD(int sock, int fd, char buf[], int buf_size) {
+  ASSERT_NO_FATAL_FAILURE(SendFDs(sock, &fd, 1, buf, buf_size));
+}
+
+void SendFDs(int sock, int fds[], int fds_size, char buf[], int buf_size) {
+  struct msghdr msg = {};
+  std::vector<char> control(CMSG_SPACE(fds_size * sizeof(int)));
+  msg.msg_control = &control[0];
+  msg.msg_controllen = control.size();
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  cmsg->cmsg_len = CMSG_LEN(fds_size * sizeof(int));
+  cmsg->cmsg_level = SOL_SOCKET;
+  cmsg->cmsg_type = SCM_RIGHTS;
+  for (int i = 0; i < fds_size; i++) {
+    memcpy(CMSG_DATA(cmsg) + i * sizeof(int), &fds[i], sizeof(int));
+  }
+
+  ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size),
+              IsPosixErrorOkAndHolds(buf_size));
+}
+
+void RecvSingleFD(int sock, int* fd, char buf[], int buf_size) {
+  ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, buf_size));
+}
+
+void RecvSingleFD(int sock, int* fd, char buf[], int buf_size,
+                  int expected_size) {
+  ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, expected_size));
+}
+
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size) {
+  ASSERT_NO_FATAL_FAILURE(
+      RecvFDs(sock, fds, fds_size, buf, buf_size, buf_size));
+}
+
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size,
+             int expected_size, bool peek) {
+  struct msghdr msg = {};
+  std::vector<char> control(CMSG_SPACE(fds_size * sizeof(int)));
+  msg.msg_control = &control[0];
+  msg.msg_controllen = control.size();
+
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = buf_size;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  int flags = 0;
+  if (peek) {
+    flags |= MSG_PEEK;
+  }
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, flags),
+              SyscallSucceedsWithValue(expected_size));
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(fds_size * sizeof(int)));
+  ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+  for (int i = 0; i < fds_size; i++) {
+    memcpy(&fds[i], CMSG_DATA(cmsg) + i * sizeof(int), sizeof(int));
+  }
+}
+
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size,
+             int expected_size) {
+  ASSERT_NO_FATAL_FAILURE(
+      RecvFDs(sock, fds, fds_size, buf, buf_size, expected_size, false));
+}
+
+void PeekSingleFD(int sock, int* fd, char buf[], int buf_size) {
+  ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, buf_size, true));
+}
+
+void RecvNoCmsg(int sock, char buf[], int buf_size, int expected_size) {
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(struct ucred))];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = buf_size;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0),
+              SyscallSucceedsWithValue(expected_size));
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  EXPECT_EQ(cmsg, nullptr);
+}
+
+void SendNullCmsg(int sock, char buf[], int buf_size) {
+  struct msghdr msg = {};
+  msg.msg_control = nullptr;
+  msg.msg_controllen = 0;
+
+  ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size),
+              IsPosixErrorOkAndHolds(buf_size));
+}
+
+void SendCreds(int sock, ucred creds, char buf[], int buf_size) {
+  struct msghdr msg = {};
+
+  char control[CMSG_SPACE(sizeof(struct ucred))];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  cmsg->cmsg_level = SOL_SOCKET;
+  cmsg->cmsg_type = SCM_CREDENTIALS;
+  cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+  memcpy(CMSG_DATA(cmsg), &creds, sizeof(struct ucred));
+
+  ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size),
+              IsPosixErrorOkAndHolds(buf_size));
+}
+
+void SendCredsAndFD(int sock, ucred creds, int fd, char buf[], int buf_size) {
+  struct msghdr msg = {};
+
+  char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = {};
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct cmsghdr* cmsg1 = CMSG_FIRSTHDR(&msg);
+  cmsg1->cmsg_level = SOL_SOCKET;
+  cmsg1->cmsg_type = SCM_CREDENTIALS;
+  cmsg1->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+  memcpy(CMSG_DATA(cmsg1), &creds, sizeof(struct ucred));
+
+  struct cmsghdr* cmsg2 = CMSG_NXTHDR(&msg, cmsg1);
+  cmsg2->cmsg_level = SOL_SOCKET;
+  cmsg2->cmsg_type = SCM_RIGHTS;
+  cmsg2->cmsg_len = CMSG_LEN(sizeof(int));
+  memcpy(CMSG_DATA(cmsg2), &fd, sizeof(int));
+
+  ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size),
+              IsPosixErrorOkAndHolds(buf_size));
+}
+
+void RecvCreds(int sock, ucred* creds, char buf[], int buf_size) {
+  ASSERT_NO_FATAL_FAILURE(RecvCreds(sock, creds, buf, buf_size, buf_size));
+}
+
+void RecvCreds(int sock, ucred* creds, char buf[], int buf_size,
+               int expected_size) {
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(struct ucred))];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = buf_size;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0),
+              SyscallSucceedsWithValue(expected_size));
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct ucred)));
+  ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS);
+
+  memcpy(creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+}
+
+void RecvCredsAndFD(int sock, ucred* creds, int* fd, char buf[], int buf_size) {
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = buf_size;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0),
+              SyscallSucceedsWithValue(buf_size));
+
+  struct cmsghdr* cmsg1 = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg1, nullptr);
+  ASSERT_EQ(cmsg1->cmsg_len, CMSG_LEN(sizeof(struct ucred)));
+  ASSERT_EQ(cmsg1->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg1->cmsg_type, SCM_CREDENTIALS);
+  memcpy(creds, CMSG_DATA(cmsg1), sizeof(struct ucred));
+
+  struct cmsghdr* cmsg2 = CMSG_NXTHDR(&msg, cmsg1);
+  ASSERT_NE(cmsg2, nullptr);
+  ASSERT_EQ(cmsg2->cmsg_len, CMSG_LEN(sizeof(int)));
+  ASSERT_EQ(cmsg2->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg2->cmsg_type, SCM_RIGHTS);
+  memcpy(fd, CMSG_DATA(cmsg2), sizeof(int));
+}
+
+void RecvSingleFDUnaligned(int sock, int* fd, char buf[], int buf_size) {
+  struct msghdr msg = {};
+  char control[CMSG_SPACE(sizeof(int)) - sizeof(int)];
+  msg.msg_control = control;
+  msg.msg_controllen = sizeof(control);
+
+  struct iovec iov;
+  iov.iov_base = buf;
+  iov.iov_len = buf_size;
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+
+  ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0),
+              SyscallSucceedsWithValue(buf_size));
+
+  struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+  ASSERT_NE(cmsg, nullptr);
+  ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+  ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+  ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+  memcpy(fd, CMSG_DATA(cmsg), sizeof(int));
+}
+
+void SetSoPassCred(int sock) {
+  int one = 1;
+  EXPECT_THAT(setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)),
+              SyscallSucceeds());
+}
+
+void UnsetSoPassCred(int sock) {
+  int zero = 0;
+  EXPECT_THAT(setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &zero, sizeof(zero)),
+              SyscallSucceeds());
+}
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/unix_domain_socket_test_util.h b/test/syscalls/linux/unix_domain_socket_test_util.h
new file mode 100644
index 000000000..b8073db17
--- /dev/null
+++ b/test/syscalls/linux/unix_domain_socket_test_util.h
@@ -0,0 +1,162 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_
+
+#include <string>
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// DescribeUnixDomainSocketType returns a human-readable string explaining the
+// given Unix domain socket type.
+std::string DescribeUnixDomainSocketType(int type);
+
+// UnixDomainSocketPair returns a SocketPairKind that represents SocketPairs
+// created by invoking the socketpair() syscall with AF_UNIX and the given type.
+SocketPairKind UnixDomainSocketPair(int type);
+
+// FilesystemBoundUnixDomainSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with a temp file path,
+// AF_UNIX and the given type.
+SocketPairKind FilesystemBoundUnixDomainSocketPair(int type);
+
+// AbstractBoundUnixDomainSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with a temp abstract
+// path, AF_UNIX and the given type.
+SocketPairKind AbstractBoundUnixDomainSocketPair(int type);
+
+// SocketpairGoferUnixDomainSocketPair returns a SocketPairKind that was created
+// with two sockets connected to the socketpair gofer.
+SocketPairKind SocketpairGoferUnixDomainSocketPair(int type);
+
+// SocketpairGoferFileSocketPair returns a SocketPairKind that was created with
+// two open() calls on paths backed by the socketpair gofer.
+SocketPairKind SocketpairGoferFileSocketPair(int type);
+
+// FilesystemUnboundUnixDomainSocketPair returns a SocketPairKind that
+// represents two unbound sockets and a filesystem path for binding.
+SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type);
+
+// AbstractUnboundUnixDomainSocketPair returns a SocketPairKind that represents
+// two unbound sockets and an abstract namespace path for binding.
+SocketPairKind AbstractUnboundUnixDomainSocketPair(int type);
+
+// SendSingleFD sends both a single FD and some data over a unix domain socket
+// specified by an FD. Note that calls to this function must be wrapped in
+// ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void SendSingleFD(int sock, int fd, char buf[], int buf_size);
+
+// SendFDs sends an arbitrary number of FDs and some data over a unix domain
+// socket specified by an FD. Note that calls to this function must be wrapped
+// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void SendFDs(int sock, int fds[], int fds_size, char buf[], int buf_size);
+
+// RecvSingleFD receives both a single FD and some data over a unix domain
+// socket specified by an FD. Note that calls to this function must be wrapped
+// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void RecvSingleFD(int sock, int* fd, char buf[], int buf_size);
+
+// RecvSingleFD receives both a single FD and some data over a unix domain
+// socket specified by an FD. This version allows the expected amount of data
+// received to be different than the buffer size. Note that calls to this
+// function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions
+// to halt the test.
+void RecvSingleFD(int sock, int* fd, char buf[], int buf_size,
+                  int expected_size);
+
+// PeekSingleFD peeks at both a single FD and some data over a unix domain
+// socket specified by an FD. Note that calls to this function must be wrapped
+// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void PeekSingleFD(int sock, int* fd, char buf[], int buf_size);
+
+// RecvFDs receives both an arbitrary number of FDs and some data over a unix
+// domain socket specified by an FD. Note that calls to this function must be
+// wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size);
+
+// RecvFDs receives both an arbitrary number of FDs and some data over a unix
+// domain socket specified by an FD. This version allows the expected amount of
+// data received to be different than the buffer size. Note that calls to this
+// function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions
+// to halt the test.
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size,
+             int expected_size);
+
+// RecvNoCmsg receives some data over a unix domain socket specified by an FD
+// and asserts that no control messages are available for receiving. Note that
+// calls to this function must be wrapped in ASSERT_NO_FATAL_FAILURE for
+// internal assertions to halt the test.
+void RecvNoCmsg(int sock, char buf[], int buf_size, int expected_size);
+
+inline void RecvNoCmsg(int sock, char buf[], int buf_size) {
+  RecvNoCmsg(sock, buf, buf_size, buf_size);
+}
+
+// SendCreds sends the credentials of the current process and some data over a
+// unix domain socket specified by an FD. Note that calls to this function must
+// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the
+// test.
+void SendCreds(int sock, ucred creds, char buf[], int buf_size);
+
+// SendCredsAndFD sends the credentials of the current process, a single FD, and
+// some data over a unix domain socket specified by an FD. Note that calls to
+// this function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal
+// assertions to halt the test.
+void SendCredsAndFD(int sock, ucred creds, int fd, char buf[], int buf_size);
+
+// RecvCreds receives some credentials and some data over a unix domain socket
+// specified by an FD. Note that calls to this function must be wrapped in
+// ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void RecvCreds(int sock, ucred* creds, char buf[], int buf_size);
+
+// RecvCreds receives some credentials and some data over a unix domain socket
+// specified by an FD. This version allows the expected amount of data received
+// to be different than the buffer size. Note that calls to this function must
+// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the
+// test.
+void RecvCreds(int sock, ucred* creds, char buf[], int buf_size,
+               int expected_size);
+
+// RecvCredsAndFD receives some credentials, a single FD, and some data over a
+// unix domain socket specified by an FD. Note that calls to this function must
+// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the
+// test.
+void RecvCredsAndFD(int sock, ucred* creds, int* fd, char buf[], int buf_size);
+
+// SendNullCmsg sends a null control message and some data over a unix domain
+// socket specified by an FD. Note that calls to this function must be wrapped
+// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void SendNullCmsg(int sock, char buf[], int buf_size);
+
+// RecvSingleFDUnaligned sends both a single FD and some data over a unix domain
+// socket specified by an FD. This function does not obey the spec, but Linux
+// allows it and the apphosting code depends on this quirk. Note that calls to
+// this function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal
+// assertions to halt the test.
+void RecvSingleFDUnaligned(int sock, int* fd, char buf[], int buf_size);
+
+// SetSoPassCred sets the SO_PASSCRED option on the specified socket.
+void SetSoPassCred(int sock);
+
+// UnsetSoPassCred clears the SO_PASSCRED option on the specified socket.
+void UnsetSoPassCred(int sock);
+
+}  // namespace testing
+}  // namespace gvisor
+
+#endif  // GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_
diff --git a/test/syscalls/linux/unlink.cc b/test/syscalls/linux/unlink.cc
new file mode 100644
index 000000000..2040375c9
--- /dev/null
+++ b/test/syscalls/linux/unlink.cc
@@ -0,0 +1,214 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(UnlinkTest, IsDir) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  EXPECT_THAT(unlink(dir.path().c_str()), SyscallFailsWithErrno(EISDIR));
+}
+
+TEST(UnlinkTest, DirNotEmpty) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  int fd;
+  std::string path = JoinPath(dir.path(), "ExistingFile");
+  EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  EXPECT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(ENOTEMPTY));
+}
+
+TEST(UnlinkTest, Rmdir) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, AtDir) {
+  int dirfd;
+  auto tmpdir = GetAbsoluteTestTmpdir();
+  EXPECT_THAT(dirfd = open(tmpdir.c_str(), O_DIRECTORY, 0), SyscallSucceeds());
+
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(tmpdir));
+  auto dir_relpath =
+      ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(tmpdir, dir.path()));
+  EXPECT_THAT(unlinkat(dirfd, dir_relpath.c_str(), AT_REMOVEDIR),
+              SyscallSucceeds());
+  ASSERT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, AtDirDegradedPermissions_NoRandomSave) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+  int dirfd;
+  ASSERT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0),
+              SyscallSucceeds());
+
+  std::string sub_dir = JoinPath(dir.path(), "NewDir");
+  EXPECT_THAT(mkdir(sub_dir.c_str(), 0755), SyscallSucceeds());
+  EXPECT_THAT(fchmod(dirfd, 0444), SyscallSucceeds());
+  EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR),
+              SyscallFailsWithErrno(EACCES));
+  ASSERT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+// Files cannot be unlinked if the parent is not writable and executable.
+TEST(UnlinkTest, ParentDegradedPermissions) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+  ASSERT_THAT(chmod(dir.path().c_str(), 0000), SyscallSucceeds());
+
+  struct stat st;
+  ASSERT_THAT(stat(file.path().c_str(), &st), SyscallFailsWithErrno(EACCES));
+  ASSERT_THAT(unlinkat(AT_FDCWD, file.path().c_str(), 0),
+              SyscallFailsWithErrno(EACCES));
+
+  // Non-existent files also return EACCES.
+  const std::string nonexist = JoinPath(dir.path(), "doesnotexist");
+  ASSERT_THAT(stat(nonexist.c_str(), &st), SyscallFailsWithErrno(EACCES));
+  ASSERT_THAT(unlinkat(AT_FDCWD, nonexist.c_str(), 0),
+              SyscallFailsWithErrno(EACCES));
+}
+
+TEST(UnlinkTest, AtBad) {
+  int dirfd;
+  EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0),
+              SyscallSucceeds());
+
+  // Try removing a directory as a file.
+  std::string path = JoinPath(GetAbsoluteTestTmpdir(), "NewDir");
+  EXPECT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+  EXPECT_THAT(unlinkat(dirfd, "NewDir", 0), SyscallFailsWithErrno(EISDIR));
+  EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR), SyscallSucceeds());
+
+  // Try removing a file as a directory.
+  int fd;
+  EXPECT_THAT(fd = openat(dirfd, "UnlinkAtFile", O_RDWR | O_CREAT, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", AT_REMOVEDIR),
+              SyscallFailsWithErrno(ENOTDIR));
+  EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile/", 0),
+              SyscallFailsWithErrno(ENOTDIR));
+  ASSERT_THAT(close(fd), SyscallSucceeds());
+  EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", 0), SyscallSucceeds());
+
+  // Cleanup.
+  ASSERT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, AbsTmpFile) {
+  int fd;
+  std::string path = JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile");
+  EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  EXPECT_THAT(unlink(path.c_str()), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, TooLongName) {
+  EXPECT_THAT(unlink(std::vector<char>(16384, '0').data()),
+              SyscallFailsWithErrno(ENAMETOOLONG));
+}
+
+TEST(UnlinkTest, BadNamePtr) {
+  EXPECT_THAT(unlink(reinterpret_cast<char*>(1)),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(UnlinkTest, AtFile) {
+  int dirfd;
+  EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0666),
+              SyscallSucceeds());
+  int fd;
+  EXPECT_THAT(fd = openat(dirfd, "UnlinkAtFile", O_RDWR | O_CREAT, 0666),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+  EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", 0), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, OpenFile_NoRandomSave) {
+  // We can't save unlinked file unless they are on tmpfs.
+  const DisableSave ds;
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  int fd;
+  EXPECT_THAT(fd = open(file.path().c_str(), O_RDWR, 0666), SyscallSucceeds());
+  EXPECT_THAT(unlink(file.path().c_str()), SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, CannotRemoveDots) {
+  auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const std::string self = JoinPath(file.path(), ".");
+  ASSERT_THAT(unlink(self.c_str()), SyscallFailsWithErrno(ENOTDIR));
+  const std::string parent = JoinPath(file.path(), "..");
+  ASSERT_THAT(unlink(parent.c_str()), SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(UnlinkTest, CannotRemoveRoot) {
+  ASSERT_THAT(unlinkat(-1, "/", AT_REMOVEDIR), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(UnlinkTest, CannotRemoveRootWithAtDir) {
+  const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open(GetAbsoluteTestTmpdir(), O_DIRECTORY, 0666));
+  ASSERT_THAT(unlinkat(dirfd.get(), "/", AT_REMOVEDIR),
+              SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(RmdirTest, CannotRemoveDots) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string self = JoinPath(dir.path(), ".");
+  ASSERT_THAT(rmdir(self.c_str()), SyscallFailsWithErrno(EINVAL));
+  const std::string parent = JoinPath(dir.path(), "..");
+  ASSERT_THAT(rmdir(parent.c_str()), SyscallFailsWithErrno(ENOTEMPTY));
+}
+
+TEST(RmdirTest, CanRemoveWithTrailingSlashes) {
+  auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string slash = absl::StrCat(dir1.path(), "/");
+  ASSERT_THAT(rmdir(slash.c_str()), SyscallSucceeds());
+  auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const std::string slashslash = absl::StrCat(dir2.path(), "//");
+  ASSERT_THAT(rmdir(slashslash.c_str()), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/unshare.cc b/test/syscalls/linux/unshare.cc
new file mode 100644
index 000000000..e32619efe
--- /dev/null
+++ b/test/syscalls/linux/unshare.cc
@@ -0,0 +1,50 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sched.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/synchronization/mutex.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(UnshareTest, AllowsZeroFlags) {
+  ASSERT_THAT(unshare(0), SyscallSucceeds());
+}
+
+TEST(UnshareTest, ThreadFlagFailsIfMultithreaded) {
+  absl::Mutex mu;
+  bool finished = false;
+  ScopedThread t([&] {
+    mu.Lock();
+    mu.Await(absl::Condition(&finished));
+    mu.Unlock();
+  });
+  ASSERT_THAT(unshare(CLONE_THREAD), SyscallFailsWithErrno(EINVAL));
+  mu.Lock();
+  finished = true;
+  mu.Unlock();
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc
new file mode 100644
index 000000000..e647d2896
--- /dev/null
+++ b/test/syscalls/linux/utimes.cc
@@ -0,0 +1,319 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <utime.h>
+
+#include <string>
+
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// TimeBoxed runs fn, setting before and after to (coarse realtime) times
+// guaranteed* to come before and after fn started and completed, respectively.
+//
+// fn may be called more than once if the clock is adjusted.
+void TimeBoxed(absl::Time* before, absl::Time* after,
+               std::function<void()> const& fn) {
+  do {
+    // N.B. utimes and friends use CLOCK_REALTIME_COARSE for setting time (i.e.,
+    // current_kernel_time()). See fs/attr.c:notify_change.
+    //
+    // notify_change truncates the time to a multiple of s_time_gran, but most
+    // filesystems set it to 1, so we don't do any truncation.
+    struct timespec ts;
+    EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds());
+    // FIXME(b/132819225): gVisor filesystem timestamps inconsistently use the
+    // internal or host clock, which may diverge slightly. Allow some slack on
+    // times to account for the difference.
+    *before = absl::TimeFromTimespec(ts) - absl::Seconds(1);
+
+    fn();
+
+    EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds());
+    *after = absl::TimeFromTimespec(ts) + absl::Seconds(1);
+
+    if (*after < *before) {
+      // Clock jumped backwards; retry.
+      //
+      // Technically this misses jumps small enough to keep after > before,
+      // which could lead to test failures, but that is very unlikely to happen.
+      continue;
+    }
+  } while (*after < *before);
+}
+
+void TestUtimesOnPath(std::string const& path) {
+  struct stat statbuf;
+
+  struct timeval times[2] = {{10, 0}, {20, 0}};
+  EXPECT_THAT(utimes(path.c_str(), times), SyscallSucceeds());
+  EXPECT_THAT(stat(path.c_str(), &statbuf), SyscallSucceeds());
+  EXPECT_EQ(10, statbuf.st_atime);
+  EXPECT_EQ(20, statbuf.st_mtime);
+
+  absl::Time before;
+  absl::Time after;
+  TimeBoxed(&before, &after, [&] {
+    EXPECT_THAT(utimes(path.c_str(), nullptr), SyscallSucceeds());
+  });
+
+  EXPECT_THAT(stat(path.c_str(), &statbuf), SyscallSucceeds());
+
+  absl::Time atime = absl::TimeFromTimespec(statbuf.st_atim);
+  EXPECT_GE(atime, before);
+  EXPECT_LE(atime, after);
+
+  absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim);
+  EXPECT_GE(mtime, before);
+  EXPECT_LE(mtime, after);
+}
+
+TEST(UtimesTest, OnFile) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  TestUtimesOnPath(f.path());
+}
+
+TEST(UtimesTest, OnDir) {
+  auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TestUtimesOnPath(dir.path());
+}
+
+TEST(UtimesTest, MissingPath) {
+  auto path = NewTempAbsPath();
+  struct timeval times[2] = {{10, 0}, {20, 0}};
+  EXPECT_THAT(utimes(path.c_str(), times), SyscallFailsWithErrno(ENOENT));
+}
+
+void TestFutimesat(int dirFd, std::string const& path) {
+  struct stat statbuf;
+
+  struct timeval times[2] = {{10, 0}, {20, 0}};
+  EXPECT_THAT(futimesat(dirFd, path.c_str(), times), SyscallSucceeds());
+  EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds());
+  EXPECT_EQ(10, statbuf.st_atime);
+  EXPECT_EQ(20, statbuf.st_mtime);
+
+  absl::Time before;
+  absl::Time after;
+  TimeBoxed(&before, &after, [&] {
+    EXPECT_THAT(futimesat(dirFd, path.c_str(), nullptr), SyscallSucceeds());
+  });
+
+  EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds());
+
+  absl::Time atime = absl::TimeFromTimespec(statbuf.st_atim);
+  EXPECT_GE(atime, before);
+  EXPECT_LE(atime, after);
+
+  absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim);
+  EXPECT_GE(mtime, before);
+  EXPECT_LE(mtime, after);
+}
+
+TEST(FutimesatTest, OnAbsPath) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  TestFutimesat(0, f.path());
+}
+
+TEST(FutimesatTest, OnRelPath) {
+  auto d = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(d.path()));
+  auto basename = std::string(Basename(f.path()));
+  const FileDescriptor dirFd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(d.path(), O_RDONLY | O_DIRECTORY));
+  TestFutimesat(dirFd.get(), basename);
+}
+
+TEST(FutimesatTest, InvalidNsec) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  struct timeval times[4][2] = {{
+                                    {0, 1},                         // Valid
+                                    {1, static_cast<int64_t>(1e7)}  // Invalid
+                                },
+                                {
+                                    {1, static_cast<int64_t>(1e7)},  // Invalid
+                                    {0, 1}                           // Valid
+                                },
+                                {
+                                    {0, 1},  // Valid
+                                    {1, -1}  // Invalid
+                                },
+                                {
+                                    {1, -1},  // Invalid
+                                    {0, 1}    // Valid
+                                }};
+
+  for (unsigned int i = 0; i < sizeof(times) / sizeof(times[0]); i++) {
+    std::cout << "test:" << i << "\n";
+    EXPECT_THAT(futimesat(0, f.path().c_str(), times[i]),
+                SyscallFailsWithErrno(EINVAL));
+  }
+}
+
+void TestUtimensat(int dirFd, std::string const& path) {
+  struct stat statbuf;
+  const struct timespec times[2] = {{10, 0}, {20, 0}};
+  EXPECT_THAT(utimensat(dirFd, path.c_str(), times, 0), SyscallSucceeds());
+  EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds());
+  EXPECT_EQ(10, statbuf.st_atime);
+  EXPECT_EQ(20, statbuf.st_mtime);
+
+  // Test setting with UTIME_NOW and UTIME_OMIT.
+  struct stat statbuf2;
+  const struct timespec times2[2] = {
+      {0, UTIME_NOW},  // Should set atime to now.
+      {0, UTIME_OMIT}  // Should not change mtime.
+  };
+
+  absl::Time before;
+  absl::Time after;
+  TimeBoxed(&before, &after, [&] {
+    EXPECT_THAT(utimensat(dirFd, path.c_str(), times2, 0), SyscallSucceeds());
+  });
+
+  EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf2, 0), SyscallSucceeds());
+
+  absl::Time atime2 = absl::TimeFromTimespec(statbuf2.st_atim);
+  EXPECT_GE(atime2, before);
+  EXPECT_LE(atime2, after);
+
+  absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim);
+  absl::Time mtime2 = absl::TimeFromTimespec(statbuf2.st_mtim);
+  // mtime should not be changed.
+  EXPECT_EQ(mtime, mtime2);
+
+  // Test setting with times = NULL. Should set both atime and mtime to the
+  // current system time.
+  struct stat statbuf3;
+  TimeBoxed(&before, &after, [&] {
+    EXPECT_THAT(utimensat(dirFd, path.c_str(), nullptr, 0), SyscallSucceeds());
+  });
+
+  EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf3, 0), SyscallSucceeds());
+
+  absl::Time atime3 = absl::TimeFromTimespec(statbuf3.st_atim);
+  EXPECT_GE(atime3, before);
+  EXPECT_LE(atime3, after);
+
+  absl::Time mtime3 = absl::TimeFromTimespec(statbuf3.st_mtim);
+  EXPECT_GE(mtime3, before);
+  EXPECT_LE(mtime3, after);
+
+  EXPECT_EQ(atime3, mtime3);
+}
+
+TEST(UtimensatTest, OnAbsPath) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  TestUtimensat(0, f.path());
+}
+
+TEST(UtimensatTest, OnRelPath) {
+  auto d = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(d.path()));
+  auto basename = std::string(Basename(f.path()));
+  const FileDescriptor dirFd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(d.path(), O_RDONLY | O_DIRECTORY));
+  TestUtimensat(dirFd.get(), basename);
+}
+
+TEST(UtimensatTest, OmitNoop) {
+  // Setting both timespecs to UTIME_OMIT on a nonexistant path should succeed.
+  auto path = NewTempAbsPath();
+  const struct timespec times[2] = {{0, UTIME_OMIT}, {0, UTIME_OMIT}};
+  EXPECT_THAT(utimensat(0, path.c_str(), times, 0), SyscallSucceeds());
+}
+
+// Verify that we can actually set atime and mtime to 0.
+TEST(UtimeTest, ZeroAtimeandMtime) {
+  const auto tmp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const auto tmp_file =
+      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(tmp_dir.path()));
+
+  // Stat the file before and after updating atime and mtime.
+  struct stat stat_before = {};
+  EXPECT_THAT(stat(tmp_file.path().c_str(), &stat_before), SyscallSucceeds());
+
+  ASSERT_NE(stat_before.st_atime, 0);
+  ASSERT_NE(stat_before.st_mtime, 0);
+
+  const struct utimbuf times = {};  // Zero for both atime and mtime.
+  EXPECT_THAT(utime(tmp_file.path().c_str(), &times), SyscallSucceeds());
+
+  struct stat stat_after = {};
+  EXPECT_THAT(stat(tmp_file.path().c_str(), &stat_after), SyscallSucceeds());
+
+  // We should see the atime and mtime changed when we set them to 0.
+  ASSERT_EQ(stat_after.st_atime, 0);
+  ASSERT_EQ(stat_after.st_mtime, 0);
+}
+
+TEST(UtimensatTest, InvalidNsec) {
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  struct timespec times[2][2] = {
+      {
+          {0, UTIME_OMIT},                 // Valid
+          {2, static_cast<int64_t>(1e10)}  // Invalid
+      },
+      {
+          {2, static_cast<int64_t>(1e10)},  // Invalid
+          {0, UTIME_OMIT}                   // Valid
+      }};
+
+  for (unsigned int i = 0; i < sizeof(times) / sizeof(times[0]); i++) {
+    std::cout << "test:" << i << "\n";
+    EXPECT_THAT(utimensat(0, f.path().c_str(), times[i], 0),
+                SyscallFailsWithErrno(EINVAL));
+  }
+}
+
+TEST(Utimensat, NullPath) {
+  // From man utimensat(2):
+  // "the Linux utimensat() system call implements a nonstandard feature: if
+  // pathname is NULL, then the call modifies the timestamps of the file
+  // referred to by the file descriptor dirfd (which may refer to any type of
+  // file).
+  // Note, however, that the glibc wrapper for utimensat() disallows
+  // passing NULL as the value for file: the wrapper function returns the error
+  // EINVAL in this case."
+  auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+  const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+  struct stat statbuf;
+  const struct timespec times[2] = {{10, 0}, {20, 0}};
+  // Call syscall directly.
+  EXPECT_THAT(syscall(SYS_utimensat, fd.get(), NULL, times, 0),
+              SyscallSucceeds());
+  EXPECT_THAT(fstatat(0, f.path().c_str(), &statbuf, 0), SyscallSucceeds());
+  EXPECT_EQ(10, statbuf.st_atime);
+  EXPECT_EQ(20, statbuf.st_mtime);
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/vdso.cc b/test/syscalls/linux/vdso.cc
new file mode 100644
index 000000000..19c80add8
--- /dev/null
+++ b/test/syscalls/linux/vdso.cc
@@ -0,0 +1,48 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include <sys/mman.h>
+
+#include <algorithm>
+
+#include "gtest/gtest.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/proc_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Ensure that the vvar page cannot be made writable.
+TEST(VvarTest, WriteVvar) {
+  auto contents = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+  auto maps = ASSERT_NO_ERRNO_AND_VALUE(ParseProcMaps(contents));
+  auto it = std::find_if(maps.begin(), maps.end(), [](const ProcMapsEntry& e) {
+    return e.filename == "[vvar]";
+  });
+
+  SKIP_IF(it == maps.end());
+  EXPECT_THAT(mprotect(reinterpret_cast<void*>(it->start), kPageSize,
+                       PROT_READ | PROT_WRITE),
+              SyscallFailsWithErrno(EACCES));
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/vdso_clock_gettime.cc b/test/syscalls/linux/vdso_clock_gettime.cc
new file mode 100644
index 000000000..ce1899f45
--- /dev/null
+++ b/test/syscalls/linux/vdso_clock_gettime.cc
@@ -0,0 +1,108 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <syscall.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <map>
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+std::string PrintClockId(::testing::TestParamInfo<clockid_t> info) {
+  switch (info.param) {
+    case CLOCK_MONOTONIC:
+      return "CLOCK_MONOTONIC";
+    case CLOCK_REALTIME:
+      return "CLOCK_REALTIME";
+    case CLOCK_BOOTTIME:
+      return "CLOCK_BOOTTIME";
+    default:
+      return absl::StrCat(info.param);
+  }
+}
+
+class CorrectVDSOClockTest : public ::testing::TestWithParam<clockid_t> {};
+
+TEST_P(CorrectVDSOClockTest, IsCorrect) {
+  struct timespec tvdso, tsys;
+  absl::Time vdso_time, sys_time;
+  uint64_t total_calls = 0;
+
+  // It is expected that 82.5% of clock_gettime calls will be less than 100us
+  // skewed from the system time.
+  // Unfortunately this is not only influenced by the VDSO clock skew, but also
+  // by arbitrary scheduling delays and the like. The test is therefore
+  // regularly disabled.
+  std::map<absl::Duration, std::tuple<double, uint64_t, uint64_t>> confidence =
+      {
+          {absl::Microseconds(100), std::make_tuple(0.825, 0, 0)},
+          {absl::Microseconds(250), std::make_tuple(0.94, 0, 0)},
+          {absl::Milliseconds(1), std::make_tuple(0.999, 0, 0)},
+      };
+
+  absl::Time start = absl::Now();
+  while (absl::Now() < start + absl::Seconds(30)) {
+    EXPECT_THAT(clock_gettime(GetParam(), &tvdso), SyscallSucceeds());
+    EXPECT_THAT(syscall(__NR_clock_gettime, GetParam(), &tsys),
+                SyscallSucceeds());
+
+    vdso_time = absl::TimeFromTimespec(tvdso);
+
+    for (auto const& conf : confidence) {
+      std::get<1>(confidence[conf.first]) +=
+          (sys_time - vdso_time) < conf.first;
+    }
+
+    sys_time = absl::TimeFromTimespec(tsys);
+
+    for (auto const& conf : confidence) {
+      std::get<2>(confidence[conf.first]) +=
+          (vdso_time - sys_time) < conf.first;
+    }
+
+    ++total_calls;
+  }
+
+  for (auto const& conf : confidence) {
+    EXPECT_GE(std::get<1>(conf.second) / static_cast<double>(total_calls),
+              std::get<0>(conf.second));
+    EXPECT_GE(std::get<2>(conf.second) / static_cast<double>(total_calls),
+              std::get<0>(conf.second));
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(ClockGettime, CorrectVDSOClockTest,
+                         ::testing::Values(CLOCK_MONOTONIC, CLOCK_REALTIME,
+                                           CLOCK_BOOTTIME),
+                         PrintClockId);
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/vfork.cc b/test/syscalls/linux/vfork.cc
new file mode 100644
index 000000000..19d05998e
--- /dev/null
+++ b/test/syscalls/linux/vfork.cc
@@ -0,0 +1,195 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/test_util.h"
+#include "test/util/time_util.h"
+
+ABSL_FLAG(bool, vfork_test_child, false,
+          "If true, run the VforkTest child workload.");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// We don't test with raw CLONE_VFORK to avoid interacting with glibc's use of
+// TLS.
+//
+// Even with vfork(2), we must be careful to do little more in the child than
+// call execve(2). We use the simplest sleep function possible, though this is
+// still precarious, as we're officially only allowed to call execve(2) and
+// _exit(2).
+constexpr absl::Duration kChildDelay = absl::Seconds(10);
+
+// Exit code for successful child subprocesses. We don't want to use 0 since
+// it's too common, and an execve(2) failure causes the child to exit with the
+// errno, so kChildExitCode is chosen to be an unlikely errno:
+constexpr int kChildExitCode = 118;  // ENOTNAM: Not a XENIX named type file
+
+int64_t MonotonicNow() {
+  struct timespec now;
+  TEST_PCHECK(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+  return now.tv_sec * 1000000000ll + now.tv_nsec;
+}
+
+TEST(VforkTest, ParentStopsUntilChildExits) {
+  const auto test = [] {
+    // N.B. Run the test in a single-threaded subprocess because
+    // vfork is not safe in a multi-threaded process.
+
+    const int64_t start = MonotonicNow();
+
+    pid_t pid = vfork();
+    if (pid == 0) {
+      SleepSafe(kChildDelay);
+      _exit(kChildExitCode);
+    }
+    TEST_PCHECK_MSG(pid > 0, "vfork failed");
+    MaybeSave();
+
+    const int64_t end = MonotonicNow();
+
+    absl::Duration dur = absl::Nanoseconds(end - start);
+
+    TEST_CHECK(dur >= kChildDelay);
+
+    int status = 0;
+    TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0));
+    TEST_CHECK(WIFEXITED(status));
+    TEST_CHECK(WEXITSTATUS(status) == kChildExitCode);
+  };
+
+  EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0));
+}
+
+TEST(VforkTest, ParentStopsUntilChildExecves_NoRandomSave) {
+  ExecveArray const owned_child_argv = {"/proc/self/exe", "--vfork_test_child"};
+  char* const* const child_argv = owned_child_argv.get();
+
+  const auto test = [&] {
+    const int64_t start = MonotonicNow();
+
+    pid_t pid = vfork();
+    if (pid == 0) {
+      SleepSafe(kChildDelay);
+      execve(child_argv[0], child_argv, /* envp = */ nullptr);
+      _exit(errno);
+    }
+    // Don't attempt save/restore until after recording end_time,
+    // since the test expects an upper bound on the time spent
+    // stopped.
+    int saved_errno = errno;
+    const int64_t end = MonotonicNow();
+    errno = saved_errno;
+    TEST_PCHECK_MSG(pid > 0, "vfork failed");
+    MaybeSave();
+
+    absl::Duration dur = absl::Nanoseconds(end - start);
+
+    // The parent should resume execution after execve, but before
+    // the post-execve test child exits.
+    TEST_CHECK(dur >= kChildDelay);
+    TEST_CHECK(dur <= 2 * kChildDelay);
+
+    int status = 0;
+    TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0));
+    TEST_CHECK(WIFEXITED(status));
+    TEST_CHECK(WEXITSTATUS(status) == kChildExitCode);
+  };
+
+  EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0));
+}
+
+// A vfork child does not unstop the parent a second time when it exits after
+// exec.
+TEST(VforkTest, ExecedChildExitDoesntUnstopParent_NoRandomSave) {
+  ExecveArray const owned_child_argv = {"/proc/self/exe", "--vfork_test_child"};
+  char* const* const child_argv = owned_child_argv.get();
+
+  const auto test = [&] {
+    pid_t pid1 = vfork();
+    if (pid1 == 0) {
+      execve(child_argv[0], child_argv, /* envp = */ nullptr);
+      _exit(errno);
+    }
+    TEST_PCHECK_MSG(pid1 > 0, "vfork failed");
+    MaybeSave();
+
+    // pid1 exec'd and is now sleeping.
+    SleepSafe(kChildDelay / 2);
+
+    const int64_t start = MonotonicNow();
+
+    pid_t pid2 = vfork();
+    if (pid2 == 0) {
+      SleepSafe(kChildDelay);
+      _exit(kChildExitCode);
+    }
+    TEST_PCHECK_MSG(pid2 > 0, "vfork failed");
+    MaybeSave();
+
+    const int64_t end = MonotonicNow();
+
+    absl::Duration dur = absl::Nanoseconds(end - start);
+
+    // The parent should resume execution only after pid2 exits, not
+    // when pid1 exits.
+    TEST_CHECK(dur >= kChildDelay);
+
+    int status = 0;
+    TEST_PCHECK(RetryEINTR(waitpid)(pid1, &status, 0));
+    TEST_CHECK(WIFEXITED(status));
+    TEST_CHECK(WEXITSTATUS(status) == kChildExitCode);
+
+    TEST_PCHECK(RetryEINTR(waitpid)(pid2, &status, 0));
+    TEST_CHECK(WIFEXITED(status));
+    TEST_CHECK(WEXITSTATUS(status) == kChildExitCode);
+  };
+
+  EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0));
+}
+
+int RunChild() {
+  SleepSafe(kChildDelay);
+  return kChildExitCode;
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
+
+int main(int argc, char** argv) {
+  gvisor::testing::TestInit(&argc, &argv);
+
+  if (absl::GetFlag(FLAGS_vfork_test_child)) {
+    return gvisor::testing::RunChild();
+  }
+
+  return gvisor::testing::RunAllTests();
+}
diff --git a/test/syscalls/linux/vsyscall.cc b/test/syscalls/linux/vsyscall.cc
new file mode 100644
index 000000000..ae4377108
--- /dev/null
+++ b/test/syscalls/linux/vsyscall.cc
@@ -0,0 +1,46 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <time.h>
+
+#include "gtest/gtest.h"
+#include "test/util/proc_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#if defined(__x86_64__) || defined(__i386__)
+time_t vsyscall_time(time_t* t) {
+  constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400;
+  return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t);
+}
+
+TEST(VsyscallTest, VsyscallAlwaysAvailableOnGvisor) {
+  SKIP_IF(!IsRunningOnGvisor());
+  // Vsyscall is always advertised by gvisor.
+  EXPECT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+  // Vsyscall should always works on gvisor.
+  time_t t;
+  EXPECT_THAT(vsyscall_time(&t), SyscallSucceeds());
+}
+#endif
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/wait.cc b/test/syscalls/linux/wait.cc
new file mode 100644
index 000000000..944149d5e
--- /dev/null
+++ b/test/syscalls/linux/wait.cc
@@ -0,0 +1,913 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <functional>
+#include <tuple>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/time_util.h"
+
+using ::testing::UnorderedElementsAre;
+
+// These unit tests focus on the wait4(2) system call, but include a basic
+// checks for the i386 waitpid(2) syscall, which is a subset of wait4(2).
+//
+// NOTE(b/22640830,b/27680907,b/29049891): Some functionality is not tested as
+// it is not currently supported by gVisor:
+// * Process groups.
+// * Core dump status (WCOREDUMP).
+//
+// Tests for waiting on stopped/continued children are in sigstop.cc.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// The CloneChild function seems to need more than one page of stack space.
+static const size_t kStackSize = 2 * kPageSize;
+
+// The child thread created in CloneAndExit runs this function.
+// This child does not have the TLS setup, so it must not use glibc functions.
+int CloneChild(void* priv) {
+  int64_t sleep = reinterpret_cast<int64_t>(priv);
+  SleepSafe(absl::Seconds(sleep));
+
+  // glibc's _exit(2) function wrapper will helpfully call exit_group(2),
+  // exiting the entire process.
+  syscall(__NR_exit, 0);
+  return 1;
+}
+
+// ForkAndExit forks a child process which exits with exit_code, after
+// sleeping for the specified duration (seconds).
+pid_t ForkAndExit(int exit_code, int64_t sleep) {
+  pid_t child = fork();
+  if (child == 0) {
+    SleepSafe(absl::Seconds(sleep));
+    _exit(exit_code);
+  }
+  return child;
+}
+
+int64_t clock_gettime_nsecs(clockid_t id) {
+  struct timespec ts;
+  TEST_PCHECK(clock_gettime(id, &ts) == 0);
+  return (ts.tv_sec * 1000000000 + ts.tv_nsec);
+}
+
+void spin(int64_t sec) {
+  int64_t ns = sec * 1000000000;
+  int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID);
+  int64_t end = start + ns;
+
+  do {
+    constexpr int kLoopCount = 1000000;  // large and arbitrary
+    // volatile to prevent the compiler from skipping this loop.
+    for (volatile int i = 0; i < kLoopCount; i++) {
+    }
+  } while (clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID) < end);
+}
+
+// ForkSpinAndExit forks a child process which exits with exit_code, after
+// spinning for the specified duration (seconds).
+pid_t ForkSpinAndExit(int exit_code, int64_t spintime) {
+  pid_t child = fork();
+  if (child == 0) {
+    spin(spintime);
+    _exit(exit_code);
+  }
+  return child;
+}
+
+absl::Duration RusageCpuTime(const struct rusage& ru) {
+  return absl::DurationFromTimeval(ru.ru_utime) +
+         absl::DurationFromTimeval(ru.ru_stime);
+}
+
+// Returns the address of the top of the stack.
+// Free with FreeStack.
+uintptr_t AllocStack() {
+  void* addr = mmap(nullptr, kStackSize, PROT_READ | PROT_WRITE,
+                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+  if (addr == MAP_FAILED) {
+    return reinterpret_cast<uintptr_t>(MAP_FAILED);
+  }
+
+  return reinterpret_cast<uintptr_t>(addr) + kStackSize;
+}
+
+// Frees a stack page allocated with AllocStack.
+int FreeStack(uintptr_t addr) {
+  addr -= kStackSize;
+  return munmap(reinterpret_cast<void*>(addr), kPageSize);
+}
+
+// CloneAndExit clones a child thread, which exits with 0 after sleeping for
+// the specified duration (must be in seconds). extra_flags are ORed against
+// the standard clone(2) flags.
+int CloneAndExit(int64_t sleep, uintptr_t stack, int extra_flags) {
+  return clone(CloneChild, reinterpret_cast<void*>(stack),
+               CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_VM | extra_flags,
+               reinterpret_cast<void*>(sleep));
+}
+
+// Simple wrappers around wait4(2) and waitid(2) that ignore interrupts.
+constexpr auto Wait4 = RetryEINTR(wait4);
+constexpr auto Waitid = RetryEINTR(waitid);
+
+// Fixture for tests parameterized by a function that waits for any child to
+// exit with the given options, checks that it exited with the given code, and
+// then returns its PID.
+//
+// N.B. These tests run in a multi-threaded environment. We assume that
+// background threads do not create child processes and are not themselves
+// created with clone(... | SIGCHLD). Either may cause these tests to
+// erroneously wait on child processes/threads.
+class WaitAnyChildTest : public ::testing::TestWithParam<
+                             std::function<PosixErrorOr<pid_t>(int, int)>> {
+ protected:
+  PosixErrorOr<pid_t> WaitAny(int code) { return WaitAnyWithOptions(code, 0); }
+
+  PosixErrorOr<pid_t> WaitAnyWithOptions(int code, int options) {
+    return GetParam()(code, options);
+  }
+};
+
+// Wait for any child to exit.
+TEST_P(WaitAnyChildTest, Fork) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+  EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
+}
+
+// Call wait4 for any process after the child has already exited.
+TEST_P(WaitAnyChildTest, AfterExit) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+  absl::SleepFor(absl::Seconds(5));
+
+  EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
+}
+
+// Wait for multiple children to exit, waiting for either at a time.
+TEST_P(WaitAnyChildTest, MultipleFork) {
+  pid_t child1, child2;
+  ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds());
+  ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds());
+
+  std::vector<pid_t> pids;
+  pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
+  pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
+  EXPECT_THAT(pids, UnorderedElementsAre(child1, child2));
+}
+
+// Wait for any child to exit.
+// A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like
+// a forked process.
+TEST_P(WaitAnyChildTest, CloneSIGCHLD) {
+  uintptr_t stack;
+  ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+  auto free =
+      Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+  int child;
+  ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
+
+  EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
+}
+
+// Wait for a child thread and process.
+TEST_P(WaitAnyChildTest, ForkAndClone) {
+  pid_t process;
+  ASSERT_THAT(process = ForkAndExit(0, 0), SyscallSucceeds());
+
+  uintptr_t stack;
+  ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+  auto free =
+      Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+  int thread;
+  // Send SIGCHLD for normal wait semantics.
+  ASSERT_THAT(thread = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
+
+  std::vector<pid_t> pids;
+  pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
+  pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
+  EXPECT_THAT(pids, UnorderedElementsAre(process, thread));
+}
+
+// Return immediately if no child has exited.
+TEST_P(WaitAnyChildTest, WaitWNOHANG) {
+  EXPECT_THAT(WaitAnyWithOptions(0, WNOHANG),
+              PosixErrorIs(ECHILD, ::testing::_));
+}
+
+// Bad options passed
+TEST_P(WaitAnyChildTest, BadOption) {
+  EXPECT_THAT(WaitAnyWithOptions(0, 123456),
+              PosixErrorIs(EINVAL, ::testing::_));
+}
+
+TEST_P(WaitAnyChildTest, WaitedChildRusage) {
+  struct rusage before;
+  ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds());
+
+  pid_t child;
+  constexpr absl::Duration kSpin = absl::Seconds(3);
+  ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)),
+              SyscallSucceeds());
+  ASSERT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
+
+  struct rusage after;
+  ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds());
+
+  EXPECT_GE(RusageCpuTime(after) - RusageCpuTime(before), kSpin);
+}
+
+TEST_P(WaitAnyChildTest, IgnoredChildRusage) {
+  // "POSIX.1-2001 specifies that if the disposition of SIGCHLD is
+  // set to SIG_IGN or the SA_NOCLDWAIT flag is set for SIGCHLD (see
+  // sigaction(2)), then children that terminate do not become zombies and a
+  // call to wait() or waitpid() will block until all children have terminated,
+  // and then fail with errno set to ECHILD." - waitpid(2)
+  //
+  // "RUSAGE_CHILDREN: Return resource usage statistics for all children of the
+  // calling process that have terminated *and been waited for*." -
+  // getrusage(2), emphasis added
+
+  struct sigaction sa;
+  sa.sa_handler = SIG_IGN;
+  const auto cleanup_sigact =
+      ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa));
+
+  struct rusage before;
+  ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds());
+
+  const absl::Duration start =
+      absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC));
+
+  constexpr absl::Duration kSpin = absl::Seconds(3);
+
+  // ForkAndSpin uses CLOCK_THREAD_CPUTIME_ID, which is lower resolution than,
+  // and may diverge from, CLOCK_MONOTONIC, so we allow a small grace period but
+  // still check that we blocked for a while.
+  constexpr absl::Duration kSpinGrace = absl::Milliseconds(100);
+
+  pid_t child;
+  ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)),
+              SyscallSucceeds());
+  ASSERT_THAT(WaitAny(0), PosixErrorIs(ECHILD, ::testing::_));
+  const absl::Duration end =
+      absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC));
+  EXPECT_GE(end - start, kSpin - kSpinGrace);
+
+  struct rusage after;
+  ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds());
+  EXPECT_EQ(before.ru_utime.tv_sec, after.ru_utime.tv_sec);
+  EXPECT_EQ(before.ru_utime.tv_usec, after.ru_utime.tv_usec);
+  EXPECT_EQ(before.ru_stime.tv_sec, after.ru_stime.tv_sec);
+  EXPECT_EQ(before.ru_stime.tv_usec, after.ru_stime.tv_usec);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    Waiters, WaitAnyChildTest,
+    ::testing::Values(
+        [](int code, int options) -> PosixErrorOr<pid_t> {
+          int status;
+          auto const pid = Wait4(-1, &status, options, nullptr);
+          MaybeSave();
+          if (pid < 0) {
+            return PosixError(errno, "wait4");
+          }
+          if (!WIFEXITED(status) || WEXITSTATUS(status) != code) {
+            return PosixError(
+                EINVAL, absl::StrCat("unexpected wait status: got ", status,
+                                     ", wanted ", code));
+          }
+          return static_cast<pid_t>(pid);
+        },
+        [](int code, int options) -> PosixErrorOr<pid_t> {
+          siginfo_t si;
+          auto const rv = Waitid(P_ALL, 0, &si, WEXITED | options);
+          MaybeSave();
+          if (rv < 0) {
+            return PosixError(errno, "waitid");
+          }
+          if (si.si_signo != SIGCHLD) {
+            return PosixError(
+                EINVAL, absl::StrCat("unexpected signo: got ", si.si_signo,
+                                     ", wanted ", SIGCHLD));
+          }
+          if (si.si_status != code) {
+            return PosixError(
+                EINVAL, absl::StrCat("unexpected status: got ", si.si_status,
+                                     ", wanted ", code));
+          }
+          if (si.si_code != CLD_EXITED) {
+            return PosixError(EINVAL,
+                              absl::StrCat("unexpected code: got ", si.si_code,
+                                           ", wanted ", CLD_EXITED));
+          }
+          auto const uid = getuid();
+          if (si.si_uid != uid) {
+            return PosixError(EINVAL,
+                              absl::StrCat("unexpected uid: got ", si.si_uid,
+                                           ", wanted ", uid));
+          }
+          return static_cast<pid_t>(si.si_pid);
+        }));
+
+// Fixture for tests parameterized by a (sysno, function) tuple. The function
+// takes the PID of a specific child to wait for, waits for it to exit, and
+// checks that it exits with the given code.
+class WaitSpecificChildTest
+    : public ::testing::TestWithParam<
+          std::tuple<int, std::function<PosixError(pid_t, int, int)>>> {
+ protected:
+  int Sysno() { return std::get<0>(GetParam()); }
+
+  PosixError WaitForWithOptions(pid_t pid, int options, int code) {
+    return std::get<1>(GetParam())(pid, options, code);
+  }
+
+  PosixError WaitFor(pid_t pid, int code) {
+    return std::get<1>(GetParam())(pid, 0, code);
+  }
+};
+
+// Wait for specific child to exit.
+TEST_P(WaitSpecificChildTest, Fork) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// Non-zero exit codes are correctly propagated.
+TEST_P(WaitSpecificChildTest, NormalExit) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitFor(child, 42));
+}
+
+// Wait for multiple children to exit.
+TEST_P(WaitSpecificChildTest, MultipleFork) {
+  pid_t child1, child2;
+  ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds());
+  ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitFor(child1, 0));
+  EXPECT_NO_ERRNO(WaitFor(child2, 0));
+}
+
+// Wait for multiple children to exit, out of the order they were created.
+TEST_P(WaitSpecificChildTest, MultipleForkOutOfOrder) {
+  pid_t child1, child2;
+  ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds());
+  ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitFor(child2, 0));
+  EXPECT_NO_ERRNO(WaitFor(child1, 0));
+}
+
+// Wait for specific child to exit, entering wait4 before the exit occurs.
+TEST_P(WaitSpecificChildTest, ForkSleep) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// Wait should block until the child exits.
+TEST_P(WaitSpecificChildTest, ForkBlock) {
+  pid_t child;
+
+  auto start = absl::Now();
+  ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitFor(child, 0));
+
+  EXPECT_GE(absl::Now() - start, absl::Seconds(5));
+}
+
+// Waiting after the child has already exited returns immediately.
+TEST_P(WaitSpecificChildTest, AfterExit) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+  absl::SleepFor(absl::Seconds(5));
+
+  EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// Wait for child of sibling thread.
+TEST_P(WaitSpecificChildTest, SiblingChildren) {
+  absl::Mutex mu;
+  pid_t child;
+  bool ready = false;
+  bool stop = false;
+
+  ScopedThread t([&] {
+    absl::MutexLock ml(&mu);
+    EXPECT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+    ready = true;
+    mu.Await(absl::Condition(&stop));
+  });
+
+  // N.B. This must be declared after ScopedThread, so it is destructed first,
+  // thus waking the thread.
+  absl::MutexLock ml(&mu);
+  mu.Await(absl::Condition(&ready));
+
+  EXPECT_NO_ERRNO(WaitFor(child, 0));
+
+  // Keep the sibling alive until after we've waited so the child isn't
+  // reparented.
+  stop = true;
+}
+
+// Waiting for child of sibling thread not allowed with WNOTHREAD.
+TEST_P(WaitSpecificChildTest, SiblingChildrenWNOTHREAD) {
+  // Linux added WNOTHREAD support to waitid(2) in
+  // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to
+  // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7.
+  //
+  // Skip the test if it isn't supported yet.
+  if (Sysno() == SYS_waitid) {
+    int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WNOTHREAD);
+    SKIP_IF(ret < 0 && errno == EINVAL);
+  }
+
+  absl::Mutex mu;
+  pid_t child;
+  bool ready = false;
+  bool stop = false;
+
+  ScopedThread t([&] {
+    absl::MutexLock ml(&mu);
+    EXPECT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+    ready = true;
+    mu.Await(absl::Condition(&stop));
+
+    // This thread can wait on child.
+    EXPECT_NO_ERRNO(WaitForWithOptions(child, __WNOTHREAD, 0));
+  });
+
+  // N.B. This must be declared after ScopedThread, so it is destructed first,
+  // thus waking the thread.
+  absl::MutexLock ml(&mu);
+  mu.Await(absl::Condition(&ready));
+
+  // This thread can't wait on child.
+  EXPECT_THAT(WaitForWithOptions(child, __WNOTHREAD, 0),
+              PosixErrorIs(ECHILD, ::testing::_));
+
+  // Keep the sibling alive until after we've waited so the child isn't
+  // reparented.
+  stop = true;
+}
+
+// Wait for specific child to exit.
+// A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like
+// a forked process.
+TEST_P(WaitSpecificChildTest, CloneSIGCHLD) {
+  uintptr_t stack;
+  ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+  auto free =
+      Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+  int child;
+  ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// Wait for specific child to exit.
+// A non-CLONE_THREAD child which does not send SIGCHLD upon exit can be waited
+// on, but returns ECHILD.
+TEST_P(WaitSpecificChildTest, CloneNoSIGCHLD) {
+  uintptr_t stack;
+  ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+  auto free =
+      Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+  int child;
+  ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds());
+
+  EXPECT_THAT(WaitFor(child, 0), PosixErrorIs(ECHILD, ::testing::_));
+}
+
+// Waiting after the child has already exited returns immediately.
+TEST_P(WaitSpecificChildTest, CloneAfterExit) {
+  uintptr_t stack;
+  ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+  auto free =
+      Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+  int child;
+  // Send SIGCHLD for normal wait semantics.
+  ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
+
+  absl::SleepFor(absl::Seconds(5));
+
+  EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// A CLONE_THREAD child cannot be waited on.
+TEST_P(WaitSpecificChildTest, CloneThread) {
+  uintptr_t stack;
+  ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+  auto free =
+      Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+  int child;
+  ASSERT_THAT(child = CloneAndExit(15, stack, CLONE_THREAD), SyscallSucceeds());
+  auto start = absl::Now();
+
+  EXPECT_THAT(WaitFor(child, 0), PosixErrorIs(ECHILD, ::testing::_));
+
+  // Ensure wait4 didn't block.
+  EXPECT_LE(absl::Now() - start, absl::Seconds(10));
+
+  // Since we can't wait on the child, we sleep to try to avoid freeing its
+  // stack before it exits.
+  absl::SleepFor(absl::Seconds(5));
+}
+
+// A child that does not send a SIGCHLD on exit may be waited on with
+// the __WCLONE flag.
+TEST_P(WaitSpecificChildTest, CloneWCLONE) {
+  // Linux added WCLONE support to waitid(2) in
+  // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to
+  // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7.
+  //
+  // Skip the test if it isn't supported yet.
+  if (Sysno() == SYS_waitid) {
+    int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WCLONE);
+    SKIP_IF(ret < 0 && errno == EINVAL);
+  }
+
+  uintptr_t stack;
+  ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+  auto free =
+      Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+  int child;
+  ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitForWithOptions(child, __WCLONE, 0));
+}
+
+// A forked child cannot be waited on with WCLONE.
+TEST_P(WaitSpecificChildTest, ForkWCLONE) {
+  // Linux added WCLONE support to waitid(2) in
+  // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to
+  // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7.
+  //
+  // Skip the test if it isn't supported yet.
+  if (Sysno() == SYS_waitid) {
+    int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WCLONE);
+    SKIP_IF(ret < 0 && errno == EINVAL);
+  }
+
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+  EXPECT_THAT(WaitForWithOptions(child, WNOHANG | __WCLONE, 0),
+              PosixErrorIs(ECHILD, ::testing::_));
+
+  EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// Any type of child can be waited on with WALL.
+TEST_P(WaitSpecificChildTest, WALL) {
+  // Linux added WALL support to waitid(2) in
+  // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to
+  // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7.
+  //
+  // Skip the test if it isn't supported yet.
+  if (Sysno() == SYS_waitid) {
+    int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WALL);
+    SKIP_IF(ret < 0 && errno == EINVAL);
+  }
+
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitForWithOptions(child, __WALL, 0));
+
+  uintptr_t stack;
+  ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+  auto free =
+      Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+  ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds());
+
+  EXPECT_NO_ERRNO(WaitForWithOptions(child, __WALL, 0));
+}
+
+// Return ECHILD for bad child.
+TEST_P(WaitSpecificChildTest, BadChild) {
+  EXPECT_THAT(WaitFor(42, 0), PosixErrorIs(ECHILD, ::testing::_));
+}
+
+// Wait for a child process that only exits after calling execve(2) from a
+// non-leader thread.
+TEST_P(WaitSpecificChildTest, AfterChildExecve) {
+  ExecveArray const owned_child_argv = {"/bin/true"};
+  char* const* const child_argv = owned_child_argv.get();
+
+  uintptr_t stack;
+  ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+  auto free =
+      Cleanup([stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+  pid_t const child = fork();
+  if (child == 0) {
+    // Give the parent some time to start waiting.
+    SleepSafe(absl::Seconds(5));
+    // Pass CLONE_VFORK to block the original thread in the child process until
+    // the clone thread calls execve, annihilating them both. (This means that
+    // if clone returns at all, something went wrong.)
+    //
+    // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's
+    // x86_64 implementation is safe. See glibc
+    // sysdeps/unix/sysv/linux/x86_64/clone.S.
+    clone(
+        +[](void* arg) {
+          auto child_argv = static_cast<char* const*>(arg);
+          execve(child_argv[0], child_argv, /* envp = */ nullptr);
+          return errno;
+        },
+        reinterpret_cast<void*>(stack),
+        CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM |
+            CLONE_VFORK,
+        const_cast<char**>(child_argv));
+    _exit(errno);
+  }
+  ASSERT_THAT(child, SyscallSucceeds());
+  EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+PosixError CheckWait4(pid_t pid, int options, int code) {
+  int status;
+  auto const rv = Wait4(pid, &status, options, nullptr);
+  MaybeSave();
+  if (rv < 0) {
+    return PosixError(errno, "wait4");
+  } else if (rv != pid) {
+    return PosixError(
+        EINVAL, absl::StrCat("unexpected pid: got ", rv, ", wanted ", pid));
+  }
+  if (!WIFEXITED(status) || WEXITSTATUS(status) != code) {
+    return PosixError(EINVAL, absl::StrCat("unexpected wait status: got ",
+                                           status, ", wanted ", code));
+  }
+  return NoError();
+};
+
+PosixError CheckWaitid(pid_t pid, int options, int code) {
+  siginfo_t si;
+  auto const rv = Waitid(P_PID, pid, &si, options | WEXITED);
+  MaybeSave();
+  if (rv < 0) {
+    return PosixError(errno, "waitid");
+  }
+  if (si.si_pid != pid) {
+    return PosixError(EINVAL, absl::StrCat("unexpected pid: got ", si.si_pid,
+                                           ", wanted ", pid));
+  }
+  if (si.si_signo != SIGCHLD) {
+    return PosixError(EINVAL, absl::StrCat("unexpected signo: got ",
+                                           si.si_signo, ", wanted ", SIGCHLD));
+  }
+  if (si.si_status != code) {
+    return PosixError(EINVAL, absl::StrCat("unexpected status: got ",
+                                           si.si_status, ", wanted ", code));
+  }
+  if (si.si_code != CLD_EXITED) {
+    return PosixError(EINVAL, absl::StrCat("unexpected code: got ", si.si_code,
+                                           ", wanted ", CLD_EXITED));
+  }
+  return NoError();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    Waiters, WaitSpecificChildTest,
+    ::testing::Values(std::make_tuple(SYS_wait4, CheckWait4),
+                      std::make_tuple(SYS_waitid, CheckWaitid)));
+
+// WIFEXITED, WIFSIGNALED, WTERMSIG indicate signal exit.
+TEST(WaitTest, SignalExit) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(0, 10), SyscallSucceeds());
+
+  EXPECT_THAT(kill(child, SIGKILL), SyscallSucceeds());
+
+  int status;
+  EXPECT_THAT(Wait4(child, &status, 0, nullptr),
+              SyscallSucceedsWithValue(child));
+
+  EXPECT_FALSE(WIFEXITED(status));
+  EXPECT_TRUE(WIFSIGNALED(status));
+  EXPECT_EQ(SIGKILL, WTERMSIG(status));
+}
+
+// waitid requires at least one option.
+TEST(WaitTest, WaitidOptions) {
+  EXPECT_THAT(Waitid(P_ALL, 0, nullptr, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+// waitid does not wait for a child to exit if not passed WEXITED.
+TEST(WaitTest, WaitidNoWEXITED) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+  EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WSTOPPED),
+              SyscallFailsWithErrno(ECHILD));
+  EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WEXITED), SyscallSucceeds());
+}
+
+// WNOWAIT allows the same wait result to be returned again.
+TEST(WaitTest, WaitidWNOWAIT) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
+
+  siginfo_t info;
+  ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED | WNOWAIT),
+              SyscallSucceeds());
+  EXPECT_EQ(child, info.si_pid);
+  EXPECT_EQ(SIGCHLD, info.si_signo);
+  EXPECT_EQ(CLD_EXITED, info.si_code);
+  EXPECT_EQ(42, info.si_status);
+
+  ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED), SyscallSucceeds());
+  EXPECT_EQ(child, info.si_pid);
+  EXPECT_EQ(SIGCHLD, info.si_signo);
+  EXPECT_EQ(CLD_EXITED, info.si_code);
+  EXPECT_EQ(42, info.si_status);
+
+  EXPECT_THAT(Waitid(P_PID, child, &info, WEXITED),
+              SyscallFailsWithErrno(ECHILD));
+}
+
+// waitpid(pid, status, options) is equivalent to
+// wait4(pid, status, options, nullptr).
+// This is a dedicated syscall on i386, glibc maps it to wait4 on amd64.
+TEST(WaitTest, WaitPid) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
+
+  int status;
+  EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+              SyscallSucceedsWithValue(child));
+
+  EXPECT_TRUE(WIFEXITED(status));
+  EXPECT_EQ(42, WEXITSTATUS(status));
+}
+
+// Test that signaling a zombie succeeds. This is a signals test that is in this
+// file for some reason.
+TEST(WaitTest, KillZombie) {
+  pid_t child;
+  ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
+
+  // Sleep for three seconds to ensure the child has exited.
+  absl::SleepFor(absl::Seconds(3));
+
+  // The child is now a zombie. Check that killing it returns 0.
+  EXPECT_THAT(kill(child, SIGTERM), SyscallSucceeds());
+  EXPECT_THAT(kill(child, 0), SyscallSucceeds());
+
+  EXPECT_THAT(Wait4(child, nullptr, 0, nullptr),
+              SyscallSucceedsWithValue(child));
+}
+
+TEST(WaitTest, Wait4Rusage) {
+  pid_t child;
+  constexpr absl::Duration kSpin = absl::Seconds(3);
+  ASSERT_THAT(child = ForkSpinAndExit(21, absl::ToInt64Seconds(kSpin)),
+              SyscallSucceeds());
+
+  int status;
+  struct rusage rusage = {};
+  ASSERT_THAT(Wait4(child, &status, 0, &rusage),
+              SyscallSucceedsWithValue(child));
+
+  EXPECT_TRUE(WIFEXITED(status));
+  EXPECT_EQ(21, WEXITSTATUS(status));
+
+  EXPECT_GE(RusageCpuTime(rusage), kSpin);
+}
+
+TEST(WaitTest, WaitidRusage) {
+  pid_t child;
+  constexpr absl::Duration kSpin = absl::Seconds(3);
+  ASSERT_THAT(child = ForkSpinAndExit(27, absl::ToInt64Seconds(kSpin)),
+              SyscallSucceeds());
+
+  siginfo_t si = {};
+  struct rusage rusage = {};
+
+  // From waitid(2):
+  // The  raw  waitid()  system  call  takes a fifth argument, of type
+  // struct rusage *. If this argument is non-NULL, then  it  is  used
+  // to return resource  usage  information  about  the  child,  in the
+  // same manner as wait4(2).
+  EXPECT_THAT(
+      RetryEINTR(syscall)(SYS_waitid, P_PID, child, &si, WEXITED, &rusage),
+      SyscallSucceeds());
+  EXPECT_EQ(si.si_signo, SIGCHLD);
+  EXPECT_EQ(si.si_code, CLD_EXITED);
+  EXPECT_EQ(si.si_status, 27);
+  EXPECT_EQ(si.si_pid, child);
+
+  EXPECT_GE(RusageCpuTime(rusage), kSpin);
+}
+
+// After bf959931ddb88c4e4366e96dd22e68fa0db9527c ("wait/ptrace: assume __WALL
+// if the child is traced") (Linux 4.7), tracees are always eligible for
+// waiting, regardless of type.
+TEST(WaitTest, TraceeWALL) {
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  FileDescriptor rfd(fds[0]);
+  FileDescriptor wfd(fds[1]);
+
+  pid_t child = fork();
+  if (child == 0) {
+    // Child.
+    rfd.reset();
+
+    TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == 0);
+
+    // Notify parent that we're now a tracee.
+    wfd.reset();
+
+    _exit(0);
+  }
+  ASSERT_THAT(child, SyscallSucceeds());
+
+  wfd.reset();
+
+  // Wait for child to become tracee.
+  char c;
+  EXPECT_THAT(ReadFd(rfd.get(), &c, sizeof(c)), SyscallSucceedsWithValue(0));
+
+  // We can wait on the fork child with WCLONE, as it is a tracee.
+  int status;
+  if (IsRunningOnGvisor()) {
+    ASSERT_THAT(Wait4(child, &status, __WCLONE, nullptr),
+                SyscallSucceedsWithValue(child));
+
+    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status;
+  } else {
+    // On older versions of Linux, we may get ECHILD.
+    ASSERT_THAT(Wait4(child, &status, __WCLONE, nullptr),
+                ::testing::AnyOf(SyscallSucceedsWithValue(child),
+                                 SyscallFailsWithErrno(ECHILD)));
+  }
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/write.cc b/test/syscalls/linux/write.cc
new file mode 100644
index 000000000..39b5b2f56
--- /dev/null
+++ b/test/syscalls/linux/write.cc
@@ -0,0 +1,139 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/cleanup.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// TODO(gvisor.dev/issue/2370): This test is currently very rudimentary.
+class WriteTest : public ::testing::Test {
+ public:
+  ssize_t WriteBytes(int fd, int bytes) {
+    std::vector<char> buf(bytes);
+    std::fill(buf.begin(), buf.end(), 'a');
+    return WriteFd(fd, buf.data(), buf.size());
+  }
+};
+
+TEST_F(WriteTest, WriteNoExceedsRLimit) {
+  // Get the current rlimit and restore after test run.
+  struct rlimit initial_lim;
+  ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  auto cleanup = Cleanup([&initial_lim] {
+    EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  });
+
+  int fd;
+  struct rlimit setlim;
+  const int target_lim = 1024;
+  setlim.rlim_cur = target_lim;
+  setlim.rlim_max = RLIM_INFINITY;
+  const std::string pathname = NewTempAbsPath();
+  ASSERT_THAT(fd = open(pathname.c_str(), O_WRONLY | O_CREAT, S_IRWXU),
+              SyscallSucceeds());
+  ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+
+  EXPECT_THAT(WriteBytes(fd, target_lim), SyscallSucceedsWithValue(target_lim));
+
+  std::vector<char> buf(target_lim + 1);
+  std::fill(buf.begin(), buf.end(), 'a');
+  EXPECT_THAT(pwrite(fd, buf.data(), target_lim, 1), SyscallSucceeds());
+  EXPECT_THAT(pwrite64(fd, buf.data(), target_lim, 1), SyscallSucceeds());
+
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(WriteTest, WriteExceedsRLimit) {
+  // Get the current rlimit and restore after test run.
+  struct rlimit initial_lim;
+  ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  auto cleanup = Cleanup([&initial_lim] {
+    EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+  });
+
+  int fd;
+  sigset_t filesize_mask;
+  sigemptyset(&filesize_mask);
+  sigaddset(&filesize_mask, SIGXFSZ);
+
+  struct rlimit setlim;
+  const int target_lim = 1024;
+  setlim.rlim_cur = target_lim;
+  setlim.rlim_max = RLIM_INFINITY;
+
+  const std::string pathname = NewTempAbsPath();
+  ASSERT_THAT(fd = open(pathname.c_str(), O_WRONLY | O_CREAT, S_IRWXU),
+              SyscallSucceeds());
+  ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+  ASSERT_THAT(sigprocmask(SIG_BLOCK, &filesize_mask, nullptr),
+              SyscallSucceeds());
+  std::vector<char> buf(target_lim + 2);
+  std::fill(buf.begin(), buf.end(), 'a');
+
+  EXPECT_THAT(write(fd, buf.data(), target_lim + 1),
+              SyscallSucceedsWithValue(target_lim));
+  EXPECT_THAT(write(fd, buf.data(), 1), SyscallFailsWithErrno(EFBIG));
+  siginfo_t info;
+  struct timespec timelimit = {0, 0};
+  ASSERT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, &info, &timelimit),
+              SyscallSucceedsWithValue(SIGXFSZ));
+  EXPECT_EQ(info.si_code, SI_USER);
+  EXPECT_EQ(info.si_pid, getpid());
+  EXPECT_EQ(info.si_uid, getuid());
+
+  EXPECT_THAT(pwrite(fd, buf.data(), target_lim + 1, 1),
+              SyscallSucceedsWithValue(target_lim - 1));
+  EXPECT_THAT(pwrite(fd, buf.data(), 1, target_lim),
+              SyscallFailsWithErrno(EFBIG));
+  ASSERT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, &info, &timelimit),
+              SyscallSucceedsWithValue(SIGXFSZ));
+  EXPECT_EQ(info.si_code, SI_USER);
+  EXPECT_EQ(info.si_pid, getpid());
+  EXPECT_EQ(info.si_uid, getuid());
+
+  EXPECT_THAT(pwrite64(fd, buf.data(), target_lim + 1, 1),
+              SyscallSucceedsWithValue(target_lim - 1));
+  EXPECT_THAT(pwrite64(fd, buf.data(), 1, target_lim),
+              SyscallFailsWithErrno(EFBIG));
+  ASSERT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, &info, &timelimit),
+              SyscallSucceedsWithValue(SIGXFSZ));
+  EXPECT_EQ(info.si_code, SI_USER);
+  EXPECT_EQ(info.si_pid, getpid());
+  EXPECT_EQ(info.si_uid, getuid());
+
+  ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &filesize_mask, nullptr),
+              SyscallSucceeds());
+  EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor
diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc
new file mode 100644
index 000000000..cbcf08451
--- /dev/null
+++ b/test/syscalls/linux/xattr.cc
@@ -0,0 +1,610 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_set.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class XattrTest : public FileTest {};
+
+TEST_F(XattrTest, XattrNonexistentFile) {
+  const char* path = "/does/not/exist";
+  const char* name = "user.test";
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0),
+              SyscallFailsWithErrno(ENOENT));
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENOENT));
+  EXPECT_THAT(listxattr(path, nullptr, 0), SyscallFailsWithErrno(ENOENT));
+  EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_F(XattrTest, XattrNullName) {
+  const char* path = test_file_name_.c_str();
+
+  EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0),
+              SyscallFailsWithErrno(EFAULT));
+  EXPECT_THAT(getxattr(path, nullptr, nullptr, 0),
+              SyscallFailsWithErrno(EFAULT));
+  EXPECT_THAT(removexattr(path, nullptr), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(XattrTest, XattrEmptyName) {
+  const char* path = test_file_name_.c_str();
+
+  EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0),
+              SyscallFailsWithErrno(ERANGE));
+  EXPECT_THAT(getxattr(path, "", nullptr, 0), SyscallFailsWithErrno(ERANGE));
+  EXPECT_THAT(removexattr(path, ""), SyscallFailsWithErrno(ERANGE));
+}
+
+TEST_F(XattrTest, XattrLargeName) {
+  const char* path = test_file_name_.c_str();
+  std::string name = "user.";
+  name += std::string(XATTR_NAME_MAX - name.length(), 'a');
+
+  if (!IsRunningOnGvisor()) {
+    // In gVisor, access to xattrs is controlled with an explicit list of
+    // allowed names. This name isn't going to be configured to allow access, so
+    // don't test it.
+    EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0),
+                SyscallSucceeds());
+    EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0),
+                SyscallSucceedsWithValue(0));
+  }
+
+  name += "a";
+  EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0),
+              SyscallFailsWithErrno(ERANGE));
+  EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0),
+              SyscallFailsWithErrno(ERANGE));
+  EXPECT_THAT(removexattr(path, name.c_str()), SyscallFailsWithErrno(ERANGE));
+}
+
+TEST_F(XattrTest, XattrInvalidPrefix) {
+  const char* path = test_file_name_.c_str();
+  std::string name(XATTR_NAME_MAX, 'a');
+  EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+  EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+  EXPECT_THAT(removexattr(path, name.c_str()),
+              SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+// Do not allow save/restore cycles after making the test file read-only, as
+// the restore will fail to open it with r/w permissions.
+TEST_F(XattrTest, XattrReadOnly_NoRandomSave) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  char val = 'a';
+  size_t size = sizeof(val);
+
+  EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds());
+
+  DisableSave ds;
+  ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IRUSR));
+
+  EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0),
+              SyscallFailsWithErrno(EACCES));
+  EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EACCES));
+
+  char buf = '-';
+  EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size));
+  EXPECT_EQ(buf, val);
+
+  char list[sizeof(name)];
+  EXPECT_THAT(listxattr(path, list, sizeof(list)),
+              SyscallSucceedsWithValue(sizeof(name)));
+  EXPECT_STREQ(list, name);
+}
+
+// Do not allow save/restore cycles after making the test file write-only, as
+// the restore will fail to open it with r/w permissions.
+TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) {
+  // Drop capabilities that allow us to override file and directory permissions.
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+  ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+  DisableSave ds;
+  ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IWUSR));
+
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  char val = 'a';
+  size_t size = sizeof(val);
+
+  EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds());
+
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(EACCES));
+
+  // listxattr will succeed even without read permissions.
+  char list[sizeof(name)];
+  EXPECT_THAT(listxattr(path, list, sizeof(list)),
+              SyscallSucceedsWithValue(sizeof(name)));
+  EXPECT_STREQ(list, name);
+
+  EXPECT_THAT(removexattr(path, name), SyscallSucceeds());
+}
+
+TEST_F(XattrTest, XattrTrustedWithNonadmin) {
+  // TODO(b/148380782): Support setxattr and getxattr with "trusted" prefix.
+  SKIP_IF(IsRunningOnGvisor());
+  SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+  const char* path = test_file_name_.c_str();
+  const char name[] = "trusted.abc";
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0),
+              SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
+}
+
+TEST_F(XattrTest, XattrOnDirectory) {
+  TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(dir.path().c_str(), name, nullptr, 0, /*flags=*/0),
+              SyscallSucceeds());
+  EXPECT_THAT(getxattr(dir.path().c_str(), name, nullptr, 0),
+              SyscallSucceedsWithValue(0));
+
+  char list[sizeof(name)];
+  EXPECT_THAT(listxattr(dir.path().c_str(), list, sizeof(list)),
+              SyscallSucceedsWithValue(sizeof(name)));
+  EXPECT_STREQ(list, name);
+
+  EXPECT_THAT(removexattr(dir.path().c_str(), name), SyscallSucceeds());
+}
+
+TEST_F(XattrTest, XattrOnSymlink) {
+  TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(dir.path(), test_file_name_));
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(link.path().c_str(), name, nullptr, 0, /*flags=*/0),
+              SyscallSucceeds());
+  EXPECT_THAT(getxattr(link.path().c_str(), name, nullptr, 0),
+              SyscallSucceedsWithValue(0));
+
+  char list[sizeof(name)];
+  EXPECT_THAT(listxattr(link.path().c_str(), list, sizeof(list)),
+              SyscallSucceedsWithValue(sizeof(name)));
+  EXPECT_STREQ(list, name);
+
+  EXPECT_THAT(removexattr(link.path().c_str(), name), SyscallSucceeds());
+}
+
+TEST_F(XattrTest, XattrOnInvalidFileTypes) {
+  const char name[] = "user.test";
+
+  char char_device[] = "/dev/zero";
+  EXPECT_THAT(setxattr(char_device, name, nullptr, 0, /*flags=*/0),
+              SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(getxattr(char_device, name, nullptr, 0),
+              SyscallFailsWithErrno(ENODATA));
+  EXPECT_THAT(listxattr(char_device, nullptr, 0), SyscallSucceedsWithValue(0));
+
+  // Use tmpfs, where creation of named pipes is supported.
+  const std::string fifo = NewTempAbsPathInDir("/dev/shm");
+  const char* path = fifo.c_str();
+  EXPECT_THAT(mknod(path, S_IFIFO | S_IRUSR | S_IWUSR, 0), SyscallSucceeds());
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0),
+              SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
+  EXPECT_THAT(listxattr(path, nullptr, 0), SyscallSucceedsWithValue(0));
+  EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM));
+}
+
+TEST_F(XattrTest, SetxattrSizeSmallerThanValue) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  std::vector<char> val = {'a', 'a'};
+  size_t size = 1;
+  EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0),
+              SyscallSucceeds());
+
+  std::vector<char> buf = {'-', '-'};
+  std::vector<char> expected_buf = {'a', '-'};
+  EXPECT_THAT(getxattr(path, name, buf.data(), buf.size()),
+              SyscallSucceedsWithValue(size));
+  EXPECT_EQ(buf, expected_buf);
+}
+
+TEST_F(XattrTest, SetxattrZeroSize) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  char val = 'a';
+  EXPECT_THAT(setxattr(path, name, &val, 0, /*flags=*/0), SyscallSucceeds());
+
+  char buf = '-';
+  EXPECT_THAT(getxattr(path, name, &buf, XATTR_SIZE_MAX),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(buf, '-');
+}
+
+TEST_F(XattrTest, SetxattrSizeTooLarge) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+
+  // Note that each particular fs implementation may stipulate a lower size
+  // limit, in which case we actually may fail (e.g. error with ENOSPC) for
+  // some sizes under XATTR_SIZE_MAX.
+  size_t size = XATTR_SIZE_MAX + 1;
+  std::vector<char> val(size);
+  EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0),
+              SyscallFailsWithErrno(E2BIG));
+
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
+}
+
+TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0),
+              SyscallFailsWithErrno(EFAULT));
+
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
+}
+
+TEST_F(XattrTest, SetxattrNullValueAndZeroSize) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
+
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  std::vector<char> val(XATTR_SIZE_MAX + 1);
+  std::fill(val.begin(), val.end(), 'a');
+  size_t size = 1;
+  EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0),
+              SyscallSucceeds());
+
+  std::vector<char> buf = {'-', '-'};
+  std::vector<char> expected_buf = {'a', '-'};
+  EXPECT_THAT(getxattr(path, name, buf.data(), size),
+              SyscallSucceedsWithValue(size));
+  EXPECT_EQ(buf, expected_buf);
+}
+
+TEST_F(XattrTest, SetxattrReplaceWithSmaller) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  std::vector<char> val = {'a', 'a'};
+  EXPECT_THAT(setxattr(path, name, val.data(), 2, /*flags=*/0),
+              SyscallSucceeds());
+  EXPECT_THAT(setxattr(path, name, val.data(), 1, /*flags=*/0),
+              SyscallSucceeds());
+
+  std::vector<char> buf = {'-', '-'};
+  std::vector<char> expected_buf = {'a', '-'};
+  EXPECT_THAT(getxattr(path, name, buf.data(), 2), SyscallSucceedsWithValue(1));
+  EXPECT_EQ(buf, expected_buf);
+}
+
+TEST_F(XattrTest, SetxattrReplaceWithLarger) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  std::vector<char> val = {'a', 'a'};
+  EXPECT_THAT(setxattr(path, name, val.data(), 1, /*flags=*/0),
+              SyscallSucceeds());
+  EXPECT_THAT(setxattr(path, name, val.data(), 2, /*flags=*/0),
+              SyscallSucceeds());
+
+  std::vector<char> buf = {'-', '-'};
+  EXPECT_THAT(getxattr(path, name, buf.data(), 2), SyscallSucceedsWithValue(2));
+  EXPECT_EQ(buf, val);
+}
+
+TEST_F(XattrTest, SetxattrCreateFlag) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE),
+              SyscallSucceeds());
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE),
+              SyscallFailsWithErrno(EEXIST));
+
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(XattrTest, SetxattrReplaceFlag) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE),
+              SyscallFailsWithErrno(ENODATA));
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE),
+              SyscallSucceeds());
+
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(XattrTest, SetxattrInvalidFlags) {
+  const char* path = test_file_name_.c_str();
+  int invalid_flags = 0xff;
+  EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(XattrTest, Getxattr) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  int val = 1234;
+  size_t size = sizeof(val);
+  EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds());
+
+  int buf = 0;
+  EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size));
+  EXPECT_EQ(buf, val);
+}
+
+TEST_F(XattrTest, GetxattrSizeSmallerThanValue) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  std::vector<char> val = {'a', 'a'};
+  size_t size = val.size();
+  EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds());
+
+  char buf = '-';
+  EXPECT_THAT(getxattr(path, name, &buf, 1), SyscallFailsWithErrno(ERANGE));
+  EXPECT_EQ(buf, '-');
+}
+
+TEST_F(XattrTest, GetxattrSizeLargerThanValue) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  char val = 'a';
+  EXPECT_THAT(setxattr(path, name, &val, 1, /*flags=*/0), SyscallSucceeds());
+
+  std::vector<char> buf(XATTR_SIZE_MAX);
+  std::fill(buf.begin(), buf.end(), '-');
+  std::vector<char> expected_buf = buf;
+  expected_buf[0] = 'a';
+  EXPECT_THAT(getxattr(path, name, buf.data(), buf.size()),
+              SyscallSucceedsWithValue(1));
+  EXPECT_EQ(buf, expected_buf);
+}
+
+TEST_F(XattrTest, GetxattrZeroSize) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  char val = 'a';
+  EXPECT_THAT(setxattr(path, name, &val, sizeof(val), /*flags=*/0),
+              SyscallSucceeds());
+
+  char buf = '-';
+  EXPECT_THAT(getxattr(path, name, &buf, 0),
+              SyscallSucceedsWithValue(sizeof(val)));
+  EXPECT_EQ(buf, '-');
+}
+
+TEST_F(XattrTest, GetxattrSizeTooLarge) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  char val = 'a';
+  EXPECT_THAT(setxattr(path, name, &val, sizeof(val), /*flags=*/0),
+              SyscallSucceeds());
+
+  std::vector<char> buf(XATTR_SIZE_MAX + 1);
+  std::fill(buf.begin(), buf.end(), '-');
+  std::vector<char> expected_buf = buf;
+  expected_buf[0] = 'a';
+  EXPECT_THAT(getxattr(path, name, buf.data(), buf.size()),
+              SyscallSucceedsWithValue(sizeof(val)));
+  EXPECT_EQ(buf, expected_buf);
+}
+
+TEST_F(XattrTest, GetxattrNullValue) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  char val = 'a';
+  size_t size = sizeof(val);
+  EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds());
+
+  EXPECT_THAT(getxattr(path, name, nullptr, size),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(XattrTest, GetxattrNullValueAndZeroSize) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  char val = 'a';
+  size_t size = sizeof(val);
+  // Set value with zero size.
+  EXPECT_THAT(setxattr(path, name, &val, 0, /*flags=*/0), SyscallSucceeds());
+  // Get value with nonzero size.
+  EXPECT_THAT(getxattr(path, name, nullptr, size), SyscallSucceedsWithValue(0));
+
+  // Set value with nonzero size.
+  EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds());
+  // Get value with zero size.
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(size));
+}
+
+TEST_F(XattrTest, GetxattrNonexistentName) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
+}
+
+TEST_F(XattrTest, Listxattr) {
+  const char* path = test_file_name_.c_str();
+  const std::string name = "user.test";
+  const std::string name2 = "user.test2";
+  const std::string name3 = "user.test3";
+  EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0),
+              SyscallSucceeds());
+  EXPECT_THAT(setxattr(path, name2.c_str(), nullptr, 0, /*flags=*/0),
+              SyscallSucceeds());
+  EXPECT_THAT(setxattr(path, name3.c_str(), nullptr, 0, /*flags=*/0),
+              SyscallSucceeds());
+
+  std::vector<char> list(name.size() + 1 + name2.size() + 1 + name3.size() + 1);
+  char* buf = list.data();
+  EXPECT_THAT(listxattr(path, buf, XATTR_SIZE_MAX),
+              SyscallSucceedsWithValue(list.size()));
+
+  absl::flat_hash_set<std::string> got = {};
+  for (char* p = buf; p < buf + list.size(); p += strlen(p) + 1) {
+    got.insert(std::string{p});
+  }
+
+  absl::flat_hash_set<std::string> expected = {name, name2, name3};
+  EXPECT_EQ(got, expected);
+}
+
+TEST_F(XattrTest, ListxattrNoXattrs) {
+  const char* path = test_file_name_.c_str();
+
+  std::vector<char> list, expected;
+  EXPECT_THAT(listxattr(path, list.data(), sizeof(list)),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(list, expected);
+
+  // Listxattr should succeed if there are no attributes, even if the buffer
+  // passed in is a nullptr.
+  EXPECT_THAT(listxattr(path, nullptr, sizeof(list)),
+              SyscallSucceedsWithValue(0));
+}
+
+TEST_F(XattrTest, ListxattrNullBuffer) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
+
+  EXPECT_THAT(listxattr(path, nullptr, sizeof(name)),
+              SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(XattrTest, ListxattrSizeTooSmall) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
+
+  char list[sizeof(name) - 1];
+  EXPECT_THAT(listxattr(path, list, sizeof(list)),
+              SyscallFailsWithErrno(ERANGE));
+}
+
+TEST_F(XattrTest, ListxattrZeroSize) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
+  EXPECT_THAT(listxattr(path, nullptr, 0),
+              SyscallSucceedsWithValue(sizeof(name)));
+}
+
+TEST_F(XattrTest, RemoveXattr) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
+  EXPECT_THAT(removexattr(path, name), SyscallSucceeds());
+  EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
+}
+
+TEST_F(XattrTest, RemoveXattrNonexistentName) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(ENODATA));
+}
+
+TEST_F(XattrTest, LXattrOnSymlink) {
+  const char name[] = "user.test";
+  TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+      TempPath::CreateSymlinkTo(dir.path(), test_file_name_));
+
+  EXPECT_THAT(lsetxattr(link.path().c_str(), name, nullptr, 0, 0),
+              SyscallFailsWithErrno(EPERM));
+  EXPECT_THAT(lgetxattr(link.path().c_str(), name, nullptr, 0),
+              SyscallFailsWithErrno(ENODATA));
+  EXPECT_THAT(llistxattr(link.path().c_str(), nullptr, 0),
+              SyscallSucceedsWithValue(0));
+  EXPECT_THAT(lremovexattr(link.path().c_str(), name),
+              SyscallFailsWithErrno(EPERM));
+}
+
+TEST_F(XattrTest, LXattrOnNonsymlink) {
+  const char* path = test_file_name_.c_str();
+  const char name[] = "user.test";
+  int val = 1234;
+  size_t size = sizeof(val);
+  EXPECT_THAT(lsetxattr(path, name, &val, size, /*flags=*/0),
+              SyscallSucceeds());
+
+  int buf = 0;
+  EXPECT_THAT(lgetxattr(path, name, &buf, size),
+              SyscallSucceedsWithValue(size));
+  EXPECT_EQ(buf, val);
+
+  char list[sizeof(name)];
+  EXPECT_THAT(llistxattr(path, list, sizeof(list)),
+              SyscallSucceedsWithValue(sizeof(name)));
+  EXPECT_STREQ(list, name);
+
+  EXPECT_THAT(lremovexattr(path, name), SyscallSucceeds());
+}
+
+TEST_F(XattrTest, XattrWithFD) {
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_.c_str(), 0));
+  const char name[] = "user.test";
+  int val = 1234;
+  size_t size = sizeof(val);
+  EXPECT_THAT(fsetxattr(fd.get(), name, &val, size, /*flags=*/0),
+              SyscallSucceeds());
+
+  int buf = 0;
+  EXPECT_THAT(fgetxattr(fd.get(), name, &buf, size),
+              SyscallSucceedsWithValue(size));
+  EXPECT_EQ(buf, val);
+
+  char list[sizeof(name)];
+  EXPECT_THAT(flistxattr(fd.get(), list, sizeof(list)),
+              SyscallSucceedsWithValue(sizeof(name)));
+  EXPECT_STREQ(list, name);
+
+  EXPECT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds());
+}
+
+}  // namespace
+
+}  // namespace testing
+}  // namespace gvisor